Spaces:

Agents-MCP-Hackathon
/

Txt_summarizer

Running

File size: 8,576 Bytes

f985823

# app.py - For Hugging Face Spaces (without Modal)
import gradio as gr
from transformers import pipeline
import torch
from functools import lru_cache
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TextAnalyzer:
    def __init__(self):
        """Initialize models"""
        self.device = 0 if torch.cuda.is_available() else -1
        logger.info(f"Using device: {'GPU' if self.device == 0 else 'CPU'}")
        
        # Load models
        logger.info("Loading models...")
        self.load_models()
        logger.info("✅ All models loaded successfully!")
    
    def load_models(self):
        """Load all required models"""
        try:
            # Use smaller, faster models for Hugging Face Spaces
            self.sentiment_analyzer = pipeline(
                "sentiment-analysis",
                model="distilbert-base-uncased-finetuned-sst-2-english",
                device=self.device
            )
            
            # Use a smaller summarization model
            self.summarizer = pipeline(
                "summarization",
                model="sshleifer/distilbart-cnn-12-6",
                device=self.device
            )
            
            # Simple language detection (or skip if too slow)
            try:
                self.language_detector = pipeline(
                    "text-classification",
                    model="papluca/xlm-roberta-base-language-detection",
                    device=self.device
                )
                self.has_language_detection = True
            except:
                self.has_language_detection = False
                logger.warning("Language detection model not loaded")
            
        except Exception as e:
            logger.error(f"Error loading models: {e}")
            raise
    
    @lru_cache(maxsize=64)
    def cached_analyze(self, text_hash: str, text: str):
        """Cache results for identical inputs"""
        return self._analyze_text(text)
    
    def _analyze_text(self, text: str):
        """Core analysis logic"""
        # Basic statistics
        words = text.split()
        word_count = len(words)
        char_count = len(text)
        
        # Limit text length for models
        text_limited = text[:512]
        
        try:
            # Sentiment analysis
            sentiment_result = self.sentiment_analyzer(text_limited)[0]
            
            # Language detection (if available)
            language_result = None
            if self.has_language_detection:
                try:
                    language_result = self.language_detector(text_limited)[0]
                except:
                    language_result = None
            
            # Summarization (only for longer texts)
            summary = ""
            if word_count > 50:
                try:
                    summary_result = self.summarizer(
                        text, 
                        max_length=min(100, word_count // 3),
                        min_length=20,
                        do_sample=False
                    )
                    summary = summary_result[0]["summary_text"]
                except Exception as e:
                    summary = f"Unable to generate summary: {str(e)}"
            else:
                summary = "Text too short for summarization (minimum 50 words)"
            
            return {
                "sentiment": {
                    "label": sentiment_result["label"],
                    "confidence": round(sentiment_result["score"], 3)
                },
                "language": {
                    "language": language_result["label"] if language_result else "Unknown",
                    "confidence": round(language_result["score"], 3) if language_result else 0
                } if self.has_language_detection else {"language": "Detection disabled", "confidence": 0},
                "summary": summary,
                "stats": {
                    "word_count": word_count,
                    "char_count": char_count,
                    "sentence_count": len([s for s in text.split('.') if s.strip()])
                }
            }
            
        except Exception as e:
            logger.error(f"Analysis error: {e}")
            return {
                "error": f"Analysis failed: {str(e)}",
                "stats": {"word_count": word_count, "char_count": char_count}
            }
    
    def analyze(self, text: str):
        """Public analyze method with caching"""
        if not text or not text.strip():
            return None
            
        # Create hash for caching
        text_hash = str(hash(text.strip()))
        return self.cached_analyze(text_hash, text.strip())

# Initialize analyzer
logger.info("Initializing Text Analyzer...")
try:
    analyzer = TextAnalyzer()
    analyzer_loaded = True
except Exception as e:
    logger.error(f"Failed to load analyzer: {e}")
    analyzer_loaded = False

def gradio_interface(text):
    """Gradio interface function"""
    if not analyzer_loaded:
        return (
            "❌ Models failed to load. Please try again later.",
            "❌ Error",
            "❌ Error", 
            "❌ Error",
            "❌ Error"
        )
    
    if not text or not text.strip():
        return (
            "Please enter some text to analyze.",
            "No text provided",
            "No text provided",
            "No text provided",
            "No text provided"
        )
    
    # Analyze text
    results = analyzer.analyze(text)
    
    if not results or "error" in results:
        error_msg = results.get("error", "Unknown error occurred") if results else "Analysis failed"
        return error_msg, "Error", "Error", "Error", "Error"
    
    # Format results
    sentiment_text = f"**{results['sentiment']['label']}** (confidence: {results['sentiment']['confidence']})"
    
    language_text = f"**{results['language']['language']}**"
    if results['language']['confidence'] > 0:
        language_text += f" (confidence: {results['language']['confidence']})"
    
    summary_text = results['summary']
    
    stats_text = f"Words: {results['stats']['word_count']} | Characters: {results['stats']['char_count']} | Sentences: {results['stats'].get('sentence_count', 'N/A')}"
    
    return sentiment_text, language_text, summary_text, stats_text, "✅ Analysis complete!"

# Create Gradio interface
def create_app():
    """Create the Gradio application"""
    with gr.Blocks(
        title="Smart Text Analyzer",
        theme=gr.themes.Soft()
    ) as demo:
        
        gr.Markdown("""
        # 🧠 Smart Text Analyzer
        **Analyze text for sentiment, language, and generate summaries**
        
        *Powered by Hugging Face Transformers*
        """)
        
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="📝 Enter your text",
                    placeholder="Type or paste your text here for analysis...",
                    lines=6
                )
                analyze_btn = gr.Button("🔍 Analyze Text", variant="primary")
        
        with gr.Row():
            with gr.Column():
                sentiment_output = gr.Markdown(label="😊 Sentiment")
                language_output = gr.Markdown(label="🌍 Language")
            with gr.Column():
                stats_output = gr.Markdown(label="📊 Statistics")
                status_output = gr.Textbox(label="Status", interactive=False)
        
        summary_output = gr.Textbox(
            label="📝 Summary",
            lines=3,
            interactive=False
        )
        
        # Examples
        gr.Examples(
            examples=[
                ["I absolutely love this new restaurant! The food was incredible and the service was outstanding."],
                ["Climate change represents one of the most significant challenges of our time. Rising global temperatures are causing widespread environmental disruption."],
                ["This movie was disappointing. The plot was confusing and the acting was poor."]
            ],
            inputs=text_input
        )
        
        analyze_btn.click(
            fn=gradio_interface,
            inputs=text_input,
            outputs=[sentiment_output, language_output, summary_output, stats_output, status_output]
        )
    
    return demo

if __name__ == "__main__":
    # Create and launch the app
    app = create_app()
    app.launch()