Spaces:

Agents-MCP-Hackathon
/

Txt_summarizer

Running

App Files Files Community

Txt_summarizer / app.py

gourav246

add app.py / main file

f985823 verified 20 days ago

raw

history blame contribute delete

8.58 kB

	# app.py - For Hugging Face Spaces (without Modal)
	import gradio as gr
	from transformers import pipeline
	import torch
	from functools import lru_cache
	import logging

	# Setup logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class TextAnalyzer:
	def __init__(self):
	"""Initialize models"""
	self.device = 0 if torch.cuda.is_available() else -1
	logger.info(f"Using device: {'GPU' if self.device == 0 else 'CPU'}")

	# Load models
	logger.info("Loading models...")
	self.load_models()
	logger.info("✅ All models loaded successfully!")

	def load_models(self):
	"""Load all required models"""
	try:
	# Use smaller, faster models for Hugging Face Spaces
	self.sentiment_analyzer = pipeline(
	"sentiment-analysis",
	model="distilbert-base-uncased-finetuned-sst-2-english",
	device=self.device
	)

	# Use a smaller summarization model
	self.summarizer = pipeline(
	"summarization",
	model="sshleifer/distilbart-cnn-12-6",
	device=self.device
	)

	# Simple language detection (or skip if too slow)
	try:
	self.language_detector = pipeline(
	"text-classification",
	model="papluca/xlm-roberta-base-language-detection",
	device=self.device
	)
	self.has_language_detection = True
	except:
	self.has_language_detection = False
	logger.warning("Language detection model not loaded")

	except Exception as e:
	logger.error(f"Error loading models: {e}")
	raise

	@lru_cache(maxsize=64)
	def cached_analyze(self, text_hash: str, text: str):
	"""Cache results for identical inputs"""
	return self._analyze_text(text)

	def _analyze_text(self, text: str):
	"""Core analysis logic"""
	# Basic statistics
	words = text.split()
	word_count = len(words)
	char_count = len(text)

	# Limit text length for models
	text_limited = text[:512]

	try:
	# Sentiment analysis
	sentiment_result = self.sentiment_analyzer(text_limited)[0]

	# Language detection (if available)
	language_result = None
	if self.has_language_detection:
	try:
	language_result = self.language_detector(text_limited)[0]
	except:
	language_result = None

	# Summarization (only for longer texts)
	summary = ""
	if word_count > 50:
	try:
	summary_result = self.summarizer(
	text,
	max_length=min(100, word_count // 3),
	min_length=20,
	do_sample=False
	)
	summary = summary_result[0]["summary_text"]
	except Exception as e:
	summary = f"Unable to generate summary: {str(e)}"
	else:
	summary = "Text too short for summarization (minimum 50 words)"

	return {
	"sentiment": {
	"label": sentiment_result["label"],
	"confidence": round(sentiment_result["score"], 3)
	},
	"language": {
	"language": language_result["label"] if language_result else "Unknown",
	"confidence": round(language_result["score"], 3) if language_result else 0
	} if self.has_language_detection else {"language": "Detection disabled", "confidence": 0},
	"summary": summary,
	"stats": {
	"word_count": word_count,
	"char_count": char_count,
	"sentence_count": len([s for s in text.split('.') if s.strip()])
	}
	}

	except Exception as e:
	logger.error(f"Analysis error: {e}")
	return {
	"error": f"Analysis failed: {str(e)}",
	"stats": {"word_count": word_count, "char_count": char_count}
	}

	def analyze(self, text: str):
	"""Public analyze method with caching"""
	if not text or not text.strip():
	return None

	# Create hash for caching
	text_hash = str(hash(text.strip()))
	return self.cached_analyze(text_hash, text.strip())

	# Initialize analyzer
	logger.info("Initializing Text Analyzer...")
	try:
	analyzer = TextAnalyzer()
	analyzer_loaded = True
	except Exception as e:
	logger.error(f"Failed to load analyzer: {e}")
	analyzer_loaded = False

	def gradio_interface(text):
	"""Gradio interface function"""
	if not analyzer_loaded:
	return (
	"❌ Models failed to load. Please try again later.",
	"❌ Error",
	"❌ Error",
	"❌ Error",
	"❌ Error"
	)

	if not text or not text.strip():
	return (
	"Please enter some text to analyze.",
	"No text provided",
	"No text provided",
	"No text provided",
	"No text provided"
	)

	# Analyze text
	results = analyzer.analyze(text)

	if not results or "error" in results:
	error_msg = results.get("error", "Unknown error occurred") if results else "Analysis failed"
	return error_msg, "Error", "Error", "Error", "Error"

	# Format results
	sentiment_text = f"{results['sentiment']['label']} (confidence: {results['sentiment']['confidence']})"

	language_text = f"{results['language']['language']}"
	if results['language']['confidence'] > 0:
	language_text += f" (confidence: {results['language']['confidence']})"

	summary_text = results['summary']

	stats_text = f"Words: {results['stats']['word_count']} \| Characters: {results['stats']['char_count']} \| Sentences: {results['stats'].get('sentence_count', 'N/A')}"

	return sentiment_text, language_text, summary_text, stats_text, "✅ Analysis complete!"

	# Create Gradio interface
	def create_app():
	"""Create the Gradio application"""
	with gr.Blocks(
	title="Smart Text Analyzer",
	theme=gr.themes.Soft()
	) as demo:

	gr.Markdown("""
	# 🧠 Smart Text Analyzer
	Analyze text for sentiment, language, and generate summaries

	Powered by Hugging Face Transformers
	""")

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="📝 Enter your text",
	placeholder="Type or paste your text here for analysis...",
	lines=6
	)
	analyze_btn = gr.Button("🔍 Analyze Text", variant="primary")

	with gr.Row():
	with gr.Column():
	sentiment_output = gr.Markdown(label="😊 Sentiment")
	language_output = gr.Markdown(label="🌍 Language")
	with gr.Column():
	stats_output = gr.Markdown(label="📊 Statistics")
	status_output = gr.Textbox(label="Status", interactive=False)

	summary_output = gr.Textbox(
	label="📝 Summary",
	lines=3,
	interactive=False
	)

	# Examples
	gr.Examples(
	examples=[
	["I absolutely love this new restaurant! The food was incredible and the service was outstanding."],
	["Climate change represents one of the most significant challenges of our time. Rising global temperatures are causing widespread environmental disruption."],
	["This movie was disappointing. The plot was confusing and the acting was poor."]
	],
	inputs=text_input
	)

	analyze_btn.click(
	fn=gradio_interface,
	inputs=text_input,
	outputs=[sentiment_output, language_output, summary_output, stats_output, status_output]
	)

	return demo

	if __name__ == "__main__":
	# Create and launch the app
	app = create_app()
	app.launch()