|
|
|
import gradio as gr |
|
from transformers import pipeline |
|
import torch |
|
from functools import lru_cache |
|
import logging |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class TextAnalyzer: |
|
def __init__(self): |
|
"""Initialize models""" |
|
self.device = 0 if torch.cuda.is_available() else -1 |
|
logger.info(f"Using device: {'GPU' if self.device == 0 else 'CPU'}") |
|
|
|
|
|
logger.info("Loading models...") |
|
self.load_models() |
|
logger.info("β
All models loaded successfully!") |
|
|
|
def load_models(self): |
|
"""Load all required models""" |
|
try: |
|
|
|
self.sentiment_analyzer = pipeline( |
|
"sentiment-analysis", |
|
model="distilbert-base-uncased-finetuned-sst-2-english", |
|
device=self.device |
|
) |
|
|
|
|
|
self.summarizer = pipeline( |
|
"summarization", |
|
model="sshleifer/distilbart-cnn-12-6", |
|
device=self.device |
|
) |
|
|
|
|
|
try: |
|
self.language_detector = pipeline( |
|
"text-classification", |
|
model="papluca/xlm-roberta-base-language-detection", |
|
device=self.device |
|
) |
|
self.has_language_detection = True |
|
except: |
|
self.has_language_detection = False |
|
logger.warning("Language detection model not loaded") |
|
|
|
except Exception as e: |
|
logger.error(f"Error loading models: {e}") |
|
raise |
|
|
|
@lru_cache(maxsize=64) |
|
def cached_analyze(self, text_hash: str, text: str): |
|
"""Cache results for identical inputs""" |
|
return self._analyze_text(text) |
|
|
|
def _analyze_text(self, text: str): |
|
"""Core analysis logic""" |
|
|
|
words = text.split() |
|
word_count = len(words) |
|
char_count = len(text) |
|
|
|
|
|
text_limited = text[:512] |
|
|
|
try: |
|
|
|
sentiment_result = self.sentiment_analyzer(text_limited)[0] |
|
|
|
|
|
language_result = None |
|
if self.has_language_detection: |
|
try: |
|
language_result = self.language_detector(text_limited)[0] |
|
except: |
|
language_result = None |
|
|
|
|
|
summary = "" |
|
if word_count > 50: |
|
try: |
|
summary_result = self.summarizer( |
|
text, |
|
max_length=min(100, word_count // 3), |
|
min_length=20, |
|
do_sample=False |
|
) |
|
summary = summary_result[0]["summary_text"] |
|
except Exception as e: |
|
summary = f"Unable to generate summary: {str(e)}" |
|
else: |
|
summary = "Text too short for summarization (minimum 50 words)" |
|
|
|
return { |
|
"sentiment": { |
|
"label": sentiment_result["label"], |
|
"confidence": round(sentiment_result["score"], 3) |
|
}, |
|
"language": { |
|
"language": language_result["label"] if language_result else "Unknown", |
|
"confidence": round(language_result["score"], 3) if language_result else 0 |
|
} if self.has_language_detection else {"language": "Detection disabled", "confidence": 0}, |
|
"summary": summary, |
|
"stats": { |
|
"word_count": word_count, |
|
"char_count": char_count, |
|
"sentence_count": len([s for s in text.split('.') if s.strip()]) |
|
} |
|
} |
|
|
|
except Exception as e: |
|
logger.error(f"Analysis error: {e}") |
|
return { |
|
"error": f"Analysis failed: {str(e)}", |
|
"stats": {"word_count": word_count, "char_count": char_count} |
|
} |
|
|
|
def analyze(self, text: str): |
|
"""Public analyze method with caching""" |
|
if not text or not text.strip(): |
|
return None |
|
|
|
|
|
text_hash = str(hash(text.strip())) |
|
return self.cached_analyze(text_hash, text.strip()) |
|
|
|
|
|
logger.info("Initializing Text Analyzer...") |
|
try: |
|
analyzer = TextAnalyzer() |
|
analyzer_loaded = True |
|
except Exception as e: |
|
logger.error(f"Failed to load analyzer: {e}") |
|
analyzer_loaded = False |
|
|
|
def gradio_interface(text): |
|
"""Gradio interface function""" |
|
if not analyzer_loaded: |
|
return ( |
|
"β Models failed to load. Please try again later.", |
|
"β Error", |
|
"β Error", |
|
"β Error", |
|
"β Error" |
|
) |
|
|
|
if not text or not text.strip(): |
|
return ( |
|
"Please enter some text to analyze.", |
|
"No text provided", |
|
"No text provided", |
|
"No text provided", |
|
"No text provided" |
|
) |
|
|
|
|
|
results = analyzer.analyze(text) |
|
|
|
if not results or "error" in results: |
|
error_msg = results.get("error", "Unknown error occurred") if results else "Analysis failed" |
|
return error_msg, "Error", "Error", "Error", "Error" |
|
|
|
|
|
sentiment_text = f"**{results['sentiment']['label']}** (confidence: {results['sentiment']['confidence']})" |
|
|
|
language_text = f"**{results['language']['language']}**" |
|
if results['language']['confidence'] > 0: |
|
language_text += f" (confidence: {results['language']['confidence']})" |
|
|
|
summary_text = results['summary'] |
|
|
|
stats_text = f"Words: {results['stats']['word_count']} | Characters: {results['stats']['char_count']} | Sentences: {results['stats'].get('sentence_count', 'N/A')}" |
|
|
|
return sentiment_text, language_text, summary_text, stats_text, "β
Analysis complete!" |
|
|
|
|
|
def create_app(): |
|
"""Create the Gradio application""" |
|
with gr.Blocks( |
|
title="Smart Text Analyzer", |
|
theme=gr.themes.Soft() |
|
) as demo: |
|
|
|
gr.Markdown(""" |
|
# π§ Smart Text Analyzer |
|
**Analyze text for sentiment, language, and generate summaries** |
|
|
|
*Powered by Hugging Face Transformers* |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
text_input = gr.Textbox( |
|
label="π Enter your text", |
|
placeholder="Type or paste your text here for analysis...", |
|
lines=6 |
|
) |
|
analyze_btn = gr.Button("π Analyze Text", variant="primary") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
sentiment_output = gr.Markdown(label="π Sentiment") |
|
language_output = gr.Markdown(label="π Language") |
|
with gr.Column(): |
|
stats_output = gr.Markdown(label="π Statistics") |
|
status_output = gr.Textbox(label="Status", interactive=False) |
|
|
|
summary_output = gr.Textbox( |
|
label="π Summary", |
|
lines=3, |
|
interactive=False |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
["I absolutely love this new restaurant! The food was incredible and the service was outstanding."], |
|
["Climate change represents one of the most significant challenges of our time. Rising global temperatures are causing widespread environmental disruption."], |
|
["This movie was disappointing. The plot was confusing and the acting was poor."] |
|
], |
|
inputs=text_input |
|
) |
|
|
|
analyze_btn.click( |
|
fn=gradio_interface, |
|
inputs=text_input, |
|
outputs=[sentiment_output, language_output, summary_output, stats_output, status_output] |
|
) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
|
|
app = create_app() |
|
app.launch() |