entropy25's picture
Update app.py
5eb9344 verified
raw
history blame
46.7 kB
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
from collections import Counter, defaultdict
import re
import json
import csv
import io
import tempfile
from datetime import datetime
import logging
from functools import lru_cache
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple
import pandas as pd
# 设置日志 - 提前初始化
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 尝试导入可选依赖
try:
from wordcloud import WordCloud
WORDCLOUD_AVAILABLE = True
except ImportError:
WORDCLOUD_AVAILABLE = False
logger.warning("WordCloud not available")
try:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)
nltk.download('punkt', quiet=True)
STOP_WORDS = set(stopwords.words('english'))
NLTK_AVAILABLE = True
except:
NLTK_AVAILABLE = False
STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
logger.warning("NLTK not available, using basic stopwords")
try:
import langdetect
LANGDETECT_AVAILABLE = True
except ImportError:
LANGDETECT_AVAILABLE = False
logger.warning("langdetect not available, using fallback language detection")
# 尝试导入SHAP和LIME
try:
import shap
SHAP_AVAILABLE = True
except ImportError:
SHAP_AVAILABLE = False
logger.warning("SHAP not available, using basic analysis")
try:
from lime.lime_text import LimeTextExplainer
LIME_AVAILABLE = True
except ImportError:
LIME_AVAILABLE = False
logger.warning("LIME not available, using basic analysis")
# Configuration
@dataclass
class Config:
MAX_HISTORY_SIZE: int = 500
BATCH_SIZE_LIMIT: int = 30
MAX_TEXT_LENGTH: int = 512
CACHE_SIZE: int = 64
# Supported languages and models
SUPPORTED_LANGUAGES = {
'auto': 'Auto Detect',
'en': 'English',
'zh': 'Chinese',
'es': 'Spanish',
'fr': 'French',
'de': 'German',
'sv': 'Swedish'
}
# 使用更稳定的模型
MODELS = {
'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
'zh': "uer/roberta-base-finetuned-dianping-chinese",
# 备用模型
'fallback': "distilbert-base-uncased-finetuned-sst-2-english"
}
# Color themes
THEMES = {
'default': {'pos': '#4CAF50', 'neg': '#F44336', 'neu': '#FF9800'},
'ocean': {'pos': '#0077BE', 'neg': '#FF6B35', 'neu': '#00BCD4'},
'dark': {'pos': '#66BB6A', 'neg': '#EF5350', 'neu': '#FFA726'},
'rainbow': {'pos': '#9C27B0', 'neg': '#E91E63', 'neu': '#FF5722'}
}
config = Config()
class ModelManager:
"""Manages multiple language models with better error handling"""
def __init__(self):
self.models = {}
self.tokenizers = {}
self.device = self._get_device()
self.model_loaded = False
self._load_models()
def _get_device(self):
"""安全的设备选择"""
try:
if torch.cuda.is_available():
return torch.device("cuda")
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
return torch.device("mps")
else:
return torch.device("cpu")
except:
return torch.device("cpu")
def _load_models(self):
"""Load models with error handling"""
try:
# 首先尝试加载多语言模型
model_name = config.MODELS['multilingual']
logger.info(f"Loading model: {model_name}")
self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
self.models['default'].to(self.device)
logger.info(f"Successfully loaded default model: {model_name}")
self.model_loaded = True
except Exception as e:
logger.error(f"Failed to load multilingual model: {e}")
# 尝试备用模型
try:
fallback_model = config.MODELS['fallback']
logger.info(f"Trying fallback model: {fallback_model}")
self.tokenizers['default'] = AutoTokenizer.from_pretrained(fallback_model)
self.models['default'] = AutoModelForSequenceClassification.from_pretrained(fallback_model)
self.models['default'].to(self.device)
logger.info(f"Successfully loaded fallback model: {fallback_model}")
self.model_loaded = True
except Exception as e2:
logger.error(f"Failed to load fallback model: {e2}")
self.model_loaded = False
raise RuntimeError("Failed to load any sentiment analysis model")
def get_model(self, language='en'):
"""Get model for specific language"""
if not self.model_loaded:
raise RuntimeError("No models are loaded")
# 简化:所有语言都使用默认模型
return self.models['default'], self.tokenizers['default']
@staticmethod
def detect_language(text: str) -> str:
"""Detect text language with fallback"""
if not LANGDETECT_AVAILABLE:
# 简单的语言检测
if re.search(r'[\u4e00-\u9fff]', text):
return 'zh'
else:
return 'en'
try:
detected = langdetect.detect(text)
language_mapping = {
'zh-cn': 'zh',
'zh-tw': 'zh'
}
detected = language_mapping.get(detected, detected)
return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
except:
return 'en'
# 全局模型管理器
model_manager = None
def initialize_models():
"""延迟初始化模型"""
global model_manager
if model_manager is None:
try:
model_manager = ModelManager()
return True
except Exception as e:
logger.error(f"Model initialization failed: {e}")
return False
return True
class HistoryManager:
"""Enhanced history manager"""
def __init__(self):
self._history = []
def add_entry(self, entry: Dict):
self._history.append(entry)
if len(self._history) > config.MAX_HISTORY_SIZE:
self._history = self._history[-config.MAX_HISTORY_SIZE:]
def add_batch_entries(self, entries: List[Dict]):
for entry in entries:
self.add_entry(entry)
def get_history(self) -> List[Dict]:
return self._history.copy()
def get_recent_history(self, n: int = 10) -> List[Dict]:
return self._history[-n:] if self._history else []
def filter_history(self, sentiment: str = None, language: str = None,
min_confidence: float = None) -> List[Dict]:
filtered = self._history
if sentiment:
filtered = [h for h in filtered if h['sentiment'] == sentiment]
if language:
filtered = [h for h in filtered if h.get('language', 'en') == language]
if min_confidence:
filtered = [h for h in filtered if h['confidence'] >= min_confidence]
return filtered
def clear(self) -> int:
count = len(self._history)
self._history.clear()
return count
def get_stats(self) -> Dict:
if not self._history:
return {}
sentiments = [item['sentiment'] for item in self._history]
confidences = [item['confidence'] for item in self._history]
languages = [item.get('language', 'en') for item in self._history]
return {
'total_analyses': len(self._history),
'positive_count': sentiments.count('Positive'),
'negative_count': sentiments.count('Negative'),
'neutral_count': sentiments.count('Neutral'),
'avg_confidence': np.mean(confidences) if confidences else 0,
'max_confidence': np.max(confidences) if confidences else 0,
'min_confidence': np.min(confidences) if confidences else 0,
'languages_detected': len(set(languages)),
'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history]) if self._history else 0
}
history_manager = HistoryManager()
class TextProcessor:
"""Enhanced text processing"""
@staticmethod
@lru_cache(maxsize=config.CACHE_SIZE)
def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str:
"""Clean text with options"""
text = text.lower().strip()
if remove_numbers:
text = re.sub(r'\d+', '', text)
if remove_punctuation:
text = re.sub(r'[^\w\s]', '', text)
words = text.split()
cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) > 2]
return ' '.join(cleaned_words)
@staticmethod
def extract_keywords(text: str, top_k: int = 5) -> List[str]:
"""Extract key words from text"""
if re.search(r'[\u4e00-\u9fff]', text):
words = re.findall(r'[\u4e00-\u9fff]+', text)
all_chars = ''.join(words)
char_freq = Counter(all_chars)
return [char for char, _ in char_freq.most_common(top_k)]
else:
cleaned = TextProcessor.clean_text(text)
words = cleaned.split()
word_freq = Counter(words)
return [word for word, _ in word_freq.most_common(top_k)]
@staticmethod
def parse_batch_input(text: str) -> List[str]:
"""Parse batch input from textarea"""
lines = text.strip().split('\n')
return [line.strip() for line in lines if line.strip()]
class SentimentAnalyzer:
"""Enhanced sentiment analysis with better error handling"""
@staticmethod
def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
"""Analyze single text with language support"""
if not text.strip():
raise ValueError("Empty text provided")
# 确保模型已加载
if not initialize_models():
raise RuntimeError("Failed to initialize sentiment analysis models")
# Detect language if auto
if language == 'auto':
detected_lang = model_manager.detect_language(text)
else:
detected_lang = language
# Get appropriate model
try:
model, tokenizer = model_manager.get_model(detected_lang)
except Exception as e:
logger.error(f"Failed to get model: {e}")
raise RuntimeError(f"Model loading failed: {e}")
# Preprocessing
options = preprocessing_options or {}
processed_text = text
if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
processed_text = TextProcessor.clean_text(
text,
options.get('remove_punctuation', True),
options.get('remove_numbers', False)
)
try:
# Tokenize and analyze
inputs = tokenizer(
processed_text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=config.MAX_TEXT_LENGTH
).to(model_manager.device)
with torch.no_grad():
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
# Handle different model outputs
if len(probs) == 3: # negative, neutral, positive
sentiment_idx = np.argmax(probs)
sentiment_labels = ['Negative', 'Neutral', 'Positive']
sentiment = sentiment_labels[sentiment_idx]
confidence = float(probs[sentiment_idx])
result = {
'sentiment': sentiment,
'confidence': confidence,
'neg_prob': float(probs[0]),
'neu_prob': float(probs[1]),
'pos_prob': float(probs[2]),
'has_neutral': True
}
else: # negative, positive
pred = np.argmax(probs)
sentiment = "Positive" if pred == 1 else "Negative"
confidence = float(probs[pred])
result = {
'sentiment': sentiment,
'confidence': confidence,
'neg_prob': float(probs[0]),
'pos_prob': float(probs[1]),
'neu_prob': 0.0,
'has_neutral': False
}
# Add metadata
result.update({
'language': detected_lang,
'keywords': TextProcessor.extract_keywords(text),
'word_count': len(text.split()),
'char_count': len(text)
})
return result
except Exception as e:
logger.error(f"Analysis failed: {e}")
raise RuntimeError(f"Sentiment analysis failed: {e}")
@staticmethod
def analyze_batch(texts: List[str], language: str = 'auto',
preprocessing_options: Dict = None) -> List[Dict]:
"""Analyze multiple texts"""
results = []
for i, text in enumerate(texts):
try:
result = SentimentAnalyzer.analyze_text(text, language, preprocessing_options)
result['batch_index'] = i
results.append(result)
except Exception as e:
results.append({
'sentiment': 'Error',
'confidence': 0.0,
'error': str(e),
'batch_index': i,
'text': text[:50] + '...' if len(text) > 50 else text
})
return results
class AdvancedVisualizer:
"""Enhanced visualizations with Plotly - 修复了类名"""
@staticmethod
def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
"""Create an animated sentiment gauge"""
colors = config.THEMES.get(theme, config.THEMES['default'])
try:
if result.get('has_neutral', False):
# Three-way gauge
fig = go.Figure(go.Indicator(
mode="gauge+number+delta",
value=result['pos_prob'] * 100,
domain={'x': [0, 1], 'y': [0, 1]},
title={'text': f"Sentiment: {result['sentiment']}"},
delta={'reference': 50},
gauge={
'axis': {'range': [None, 100]},
'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
'steps': [
{'range': [0, 33], 'color': colors['neg']},
{'range': [33, 67], 'color': colors['neu']},
{'range': [67, 100], 'color': colors['pos']}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 90
}
}
))
else:
# Two-way gauge
fig = go.Figure(go.Indicator(
mode="gauge+number",
value=result['confidence'] * 100,
domain={'x': [0, 1], 'y': [0, 1]},
title={'text': f"Confidence: {result['sentiment']}"},
gauge={
'axis': {'range': [None, 100]},
'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
'steps': [
{'range': [0, 50], 'color': "lightgray"},
{'range': [50, 100], 'color': "gray"}
]
}
))
fig.update_layout(height=400, font={'size': 16})
return fig
except Exception as e:
logger.error(f"Failed to create gauge: {e}")
# 返回错误图表
fig = go.Figure()
fig.add_annotation(
text=f"Visualization Error: {str(e)}",
x=0.5, y=0.5,
xref="paper", yref="paper",
showarrow=False,
font=dict(size=14)
)
fig.update_layout(height=400)
return fig
@staticmethod
def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
"""Create probability bar chart"""
colors = config.THEMES.get(theme, config.THEMES['default'])
try:
if result.get('has_neutral', False):
labels = ['Negative', 'Neutral', 'Positive']
values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
bar_colors = [colors['neg'], colors['neu'], colors['pos']]
else:
labels = ['Negative', 'Positive']
values = [result['neg_prob'], result['pos_prob']]
bar_colors = [colors['neg'], colors['pos']]
fig = go.Figure(data=[
go.Bar(x=labels, y=values, marker_color=bar_colors,
text=[f'{v:.3f}' for v in values])
])
fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(
title="Sentiment Probabilities",
yaxis_title="Probability",
height=400,
showlegend=False
)
return fig
except Exception as e:
logger.error(f"Failed to create bars: {e}")
fig = go.Figure()
fig.add_annotation(
text=f"Visualization Error: {str(e)}",
x=0.5, y=0.5,
xref="paper", yref="paper",
showarrow=False
)
fig.update_layout(height=400)
return fig
@staticmethod
def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
"""Create batch analysis summary"""
colors = config.THEMES.get(theme, config.THEMES['default'])
try:
# Count sentiments
sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
if not sentiments:
fig = go.Figure()
fig.add_annotation(text="No valid results to display", x=0.5, y=0.5,
xref="paper", yref="paper", showarrow=False)
fig.update_layout(height=400, title="Batch Summary")
return fig
sentiment_counts = Counter(sentiments)
# Create pie chart
fig = go.Figure(data=[go.Pie(
labels=list(sentiment_counts.keys()),
values=list(sentiment_counts.values()),
marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
textinfo='label+percent',
hole=0.3
)])
fig.update_layout(
title=f"Batch Analysis Summary ({len(results)} texts)",
height=400
)
return fig
except Exception as e:
logger.error(f"Failed to create batch summary: {e}")
fig = go.Figure()
fig.add_annotation(text=f"Error: {str(e)}", x=0.5, y=0.5,
xref="paper", yref="paper", showarrow=False)
fig.update_layout(height=400)
return fig
@staticmethod
def create_confidence_distribution(results: List[Dict]) -> go.Figure:
"""Create confidence distribution plot"""
try:
confidences = [r['confidence'] for r in results
if 'confidence' in r and r['sentiment'] != 'Error']
if not confidences:
fig = go.Figure()
fig.add_annotation(text="No confidence data to display", x=0.5, y=0.5,
xref="paper", yref="paper", showarrow=False)
fig.update_layout(height=400, title="Confidence Distribution")
return fig
fig = go.Figure(data=[go.Histogram(
x=confidences,
nbinsx=20,
marker_color='skyblue',
opacity=0.7
)])
fig.update_layout(
title="Confidence Distribution",
xaxis_title="Confidence Score",
yaxis_title="Frequency",
height=400
)
return fig
except Exception as e:
logger.error(f"Failed to create confidence distribution: {e}")
fig = go.Figure()
fig.add_annotation(text=f"Error: {str(e)}", x=0.5, y=0.5,
xref="paper", yref="paper", showarrow=False)
fig.update_layout(height=400)
return fig
# Main application functions with better error handling
def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
remove_punct: bool, remove_nums: bool):
"""Enhanced single text analysis with error handling"""
try:
if not text.strip():
return "❌ Please enter text to analyze", None, None
# 初始化检查
if not initialize_models():
return "❌ Failed to load sentiment analysis models. Please check your internet connection and try again.", None, None
# Map display names back to language codes
language_map = {
'Auto Detect': 'auto',
'English': 'en',
'Chinese': 'zh',
'Spanish': 'es',
'French': 'fr',
'German': 'de',
'Swedish': 'sv'
}
language_code = language_map.get(language, 'auto')
preprocessing_options = {
'clean_text': clean_text,
'remove_punctuation': remove_punct,
'remove_numbers': remove_nums
}
# 分析文本
result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)
# Add to history
history_entry = {
'text': text[:100] + '...' if len(text) > 100 else text,
'full_text': text,
'sentiment': result['sentiment'],
'confidence': result['confidence'],
'pos_prob': result['pos_prob'],
'neg_prob': result['neg_prob'],
'neu_prob': result.get('neu_prob', 0),
'language': result['language'],
'timestamp': datetime.now().isoformat(),
'analysis_type': 'single'
}
history_manager.add_entry(history_entry)
# Create visualizations
gauge_fig = AdvancedVisualizer.create_sentiment_gauge(result, theme)
bars_fig = AdvancedVisualizer.create_probability_bars(result, theme)
# Create info text
info_text = f"""
✅ **Analysis Results:**
- **Sentiment:** {result['sentiment']} (Confidence: {result['confidence']:.3f})
- **Language:** {result['language'].upper()}
- **Keywords:** {', '.join(result['keywords']) if result['keywords'] else 'None'}
- **Text Stats:** {result['word_count']} words, {result['char_count']} characters
📊 **Probability Scores:**
- Positive: {result['pos_prob']:.3f}
- Negative: {result['neg_prob']:.3f}
- Neutral: {result.get('neu_prob', 0):.3f}
"""
return info_text, gauge_fig, bars_fig
except Exception as e:
logger.error(f"Single text analysis failed: {e}")
error_msg = f"❌ **Analysis Failed:** {str(e)}\n\nPlease check your input and try again."
return error_msg, None, None
def analyze_batch_texts(batch_text: str, language: str, theme: str,
clean_text: bool, remove_punct: bool, remove_nums: bool):
"""Batch text analysis with improved error handling"""
try:
if not batch_text.strip():
return "❌ Please enter texts to analyze (one per line)", None, None, None
# 初始化检查
if not initialize_models():
return "❌ Failed to load sentiment analysis models", None, None, None
# Parse batch input
texts = TextProcessor.parse_batch_input(batch_text)
if len(texts) > config.BATCH_SIZE_LIMIT:
return f"❌ Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
if not texts:
return "❌ No valid texts found", None, None, None
# Map display names back to language codes
language_map = {
'Auto Detect': 'auto',
'English': 'en',
'Chinese': 'zh',
'Spanish': 'es',
'French': 'fr',
'German': 'de',
'Swedish': 'sv'
}
language_code = language_map.get(language, 'auto')
preprocessing_options = {
'clean_text': clean_text,
'remove_punctuation': remove_punct,
'remove_numbers': remove_nums
}
# Analyze all texts
results = SentimentAnalyzer.analyze_batch(texts, language_code, preprocessing_options)
# Add to history
batch_entries = []
for i, (text, result) in enumerate(zip(texts, results)):
if 'error' not in result:
entry = {
'text': text[:100] + '...' if len(text) > 100 else text,
'full_text': text,
'sentiment': result['sentiment'],
'confidence': result['confidence'],
'pos_prob': result['pos_prob'],
'neg_prob': result['neg_prob'],
'neu_prob': result.get('neu_prob', 0),
'language': result['language'],
'timestamp': datetime.now().isoformat(),
'analysis_type': 'batch',
'batch_index': i
}
batch_entries.append(entry)
history_manager.add_batch_entries(batch_entries)
# Create visualizations
summary_fig = AdvancedVisualizer.create_batch_summary(results, theme)
confidence_fig = AdvancedVisualizer.create_confidence_distribution(results)
# Create results table
df_data = []
for i, (text, result) in enumerate(zip(texts, results)):
if 'error' in result:
df_data.append({
'Index': i+1,
'Text': text[:50] + '...' if len(text) > 50 else text,
'Sentiment': 'Error',
'Confidence': '0.000',
'Language': 'Unknown',
'Error': result.get('error', 'Unknown error')
})
else:
df_data.append({
'Index': i+1,
'Text': text[:50] + '...' if len(text) > 50 else text,
'Sentiment': result['sentiment'],
'Confidence': f"{result['confidence']:.3f}",
'Language': result['language'].upper(),
'Keywords': ', '.join(result.get('keywords', [])[:3])
})
df = pd.DataFrame(df_data)
# Summary info
successful_results = [r for r in results if 'error' not in r]
error_count = len(results) - len(successful_results)
if successful_results:
sentiment_counts = Counter([r['sentiment'] for r in successful_results])
avg_confidence = np.mean([r['confidence'] for r in successful_results])
summary_text = f"""
✅ **Batch Analysis Summary:**
- **Total Texts:** {len(texts)}
- **Successful:** {len(successful_results)}
- **Errors:** {error_count}
- **Average Confidence:** {avg_confidence:.3f}
- **Sentiment Distribution:**
- Positive: {sentiment_counts.get('Positive', 0)}
- Negative: {sentiment_counts.get('Negative', 0)}
- Neutral: {sentiment_counts.get('Neutral', 0)}
"""
else:
summary_text = f"❌ All {len(texts)} texts failed to analyze."
return summary_text, df, summary_fig, confidence_fig
except Exception as e:
logger.error(f"Batch analysis failed: {e}")
return f"❌ Error: {str(e)}", None, None, None
def get_history_stats():
"""Get enhanced history statistics"""
try:
stats = history_manager.get_stats()
if not stats:
return "📊 No analysis history available yet. Analyze some texts to see statistics!"
return f"""
📊 **Comprehensive History Statistics:**
**📈 Analysis Counts:**
- Total Analyses: {stats['total_analyses']}
- 😊 Positive: {stats['positive_count']}
- 😞 Negative: {stats['negative_count']}
- 😐 Neutral: {stats['neutral_count']}
**🎯 Confidence Metrics:**
- Average Confidence: {stats['avg_confidence']:.3f}
- Highest Confidence: {stats['max_confidence']:.3f}
- Lowest Confidence: {stats['min_confidence']:.3f}
**🌍 Language Statistics:**
- Languages Detected: {stats['languages_detected']}
- Most Common Language: {stats['most_common_language'].upper()}
**📝 Text Statistics:**
- Average Text Length: {stats['avg_text_length']:.1f} characters
"""
except Exception as e:
logger.error(f"Failed to get history stats: {e}")
return f"❌ Error getting statistics: {str(e)}"
def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
"""Display filtered history"""
try:
# Convert filters
sentiment = sentiment_filter if sentiment_filter != "All" else None
language = language_filter.lower() if language_filter != "All" else None
filtered_history = history_manager.filter_history(
sentiment=sentiment,
language=language,
min_confidence=min_confidence if min_confidence > 0 else None
)
if not filtered_history:
return "🔍 No entries match the filter criteria", None
# Create DataFrame for display
df_data = []
for entry in filtered_history[-20:]: # Show last 20 entries
df_data.append({
'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM
'Text': entry['text'],
'Sentiment': entry['sentiment'],
'Confidence': f"{entry['confidence']:.3f}",
'Language': entry['language'].upper(),
'Type': entry.get('analysis_type', 'single')
})
df = pd.DataFrame(df_data)
summary = f"""
🔍 **Filtered Results:**
- Found {len(filtered_history)} entries matching criteria
- Showing most recent {min(20, len(filtered_history))} entries
"""
return summary, df
except Exception as e:
logger.error(f"Failed to filter history: {e}")
return f"❌ Error filtering history: {str(e)}", None
def plot_history_dashboard():
"""Create history dashboard with error handling"""
try:
history = history_manager.get_history()
if len(history) < 2:
return None, "📊 Need at least 2 analyses to create dashboard. Analyze more texts!"
# Create subplots
fig = make_subplots(
rows=2, cols=2,
subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
'Language Distribution', 'Sentiment Summary'],
specs=[[{"secondary_y": False}, {"secondary_y": False}],
[{"type": "pie"}, {"type": "bar"}]]
)
# Extract data
indices = list(range(len(history)))
pos_probs = [item.get('pos_prob', 0) for item in history]
confidences = [item['confidence'] for item in history]
sentiments = [item['sentiment'] for item in history]
languages = [item.get('language', 'en') for item in history]
# Sentiment timeline
colors = ['#4CAF50' if s == 'Positive' else '#F44336' if s == 'Negative' else '#FF9800'
for s in sentiments]
fig.add_trace(
go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
marker=dict(color=colors, size=6),
name='Positive Probability'),
row=1, col=1
)
# Confidence distribution
fig.add_trace(
go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
row=1, col=2
)
# Language distribution
lang_counts = Counter(languages)
fig.add_trace(
go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
name="Languages"),
row=2, col=1
)
# Sentiment summary
sent_counts = Counter(sentiments)
colors_dict = {'Positive': '#4CAF50', 'Negative': '#F44336', 'Neutral': '#FF9800'}
fig.add_trace(
go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
marker_color=[colors_dict.get(k, '#999999') for k in sent_counts.keys()]),
row=2, col=2
)
fig.update_layout(height=800, showlegend=False, title_text="Analysis Dashboard")
return fig, f"📊 Dashboard showing {len(history)} analyses"
except Exception as e:
logger.error(f"Failed to create dashboard: {e}")
return None, f"❌ Error creating dashboard: {str(e)}"
def export_history_csv():
"""Export history to CSV"""
try:
history = history_manager.get_history()
if not history:
return None, "📊 No history to export"
df = pd.DataFrame(history)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
df.to_csv(temp_file.name, index=False)
return temp_file.name, f"✅ Exported {len(history)} entries to CSV"
except Exception as e:
logger.error(f"CSV export failed: {e}")
return None, f"❌ Export failed: {str(e)}"
def export_history_excel():
"""Export history to Excel"""
try:
history = history_manager.get_history()
if not history:
return None, "📊 No history to export"
df = pd.DataFrame(history)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
df.to_excel(temp_file.name, index=False)
return temp_file.name, f"✅ Exported {len(history)} entries to Excel"
except Exception as e:
logger.error(f"Excel export failed: {e}")
return None, f"❌ Export failed: {str(e)}"
def clear_all_history():
"""Clear analysis history"""
try:
count = history_manager.clear()
return f"🗑️ Cleared {count} entries from history"
except Exception as e:
logger.error(f"Failed to clear history: {e}")
return f"❌ Error clearing history: {str(e)}"
def get_recent_analyses():
"""Get recent analysis summary"""
try:
recent = history_manager.get_recent_history(10)
if not recent:
return "📊 No recent analyses available. Start analyzing some texts!"
summary_text = "🕒 **Recent Analyses (Last 10):**\n\n"
for i, entry in enumerate(recent, 1):
emoji = "😊" if entry['sentiment'] == 'Positive' else "😞" if entry['sentiment'] == 'Negative' else "😐"
summary_text += f"{i}. {emoji} **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n"
return summary_text
except Exception as e:
logger.error(f"Failed to get recent analyses: {e}")
return f"❌ Error getting recent analyses: {str(e)}"
# Sample data
SAMPLE_TEXTS = [
# Auto Detect
["The film had its moments, but overall it felt a bit too long and lacked emotional depth."],
# English
["I was completely blown away by the movie — the performances were raw and powerful, and the story stayed with me long after the credits rolled."],
# Chinese
["这部电影节奏拖沓,剧情老套,完全没有让我产生任何共鸣,是一次失望的观影体验。"],
# Spanish
["Una obra maestra del cine contemporáneo, con actuaciones sobresalientes, un guion bien escrito y una dirección impecable."],
# French
["Je m'attendais à beaucoup mieux. Le scénario était confus, les dialogues ennuyeux, et je me suis presque endormi au milieu du film."],
# German
["Der Film war ein emotionales Erlebnis mit großartigen Bildern, einem mitreißenden Soundtrack und einer Geschichte, die zum Nachdenken anregt."],
# Swedish
["Filmen var en besvikelse – tråkig handling, överdrivet skådespeleri och ett slut som inte gav något avslut alls."]
]
BATCH_SAMPLE = """I love this product! It works perfectly.
The service was terrible and slow.
Not sure if I like it or not.
Amazing quality and fast delivery!
Could be better, but it's okay."""
# Gradio Interface - 简化版本,专注于核心功能
with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
gr.Markdown("# 🎭 Multilingual Sentiment Analyzer")
gr.Markdown("Comprehensive sentiment analysis with batch processing and multilingual support")
with gr.Tab("📝 Single Analysis"):
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(
label="Text to Analyze",
placeholder="Enter your text here... (supports multiple languages)",
lines=4
)
with gr.Row():
language_select = gr.Dropdown(
choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
value='Auto Detect',
label="Language"
)
theme_select = gr.Dropdown(
choices=list(config.THEMES.keys()),
value='default',
label="Theme"
)
with gr.Row():
clean_text = gr.Checkbox(label="Clean Text", value=False)
remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
gr.Examples(
examples=SAMPLE_TEXTS,
inputs=text_input,
label="Sample Texts (Multiple Languages)"
)
with gr.Column(scale=1):
result_info = gr.Markdown("Enter text and click Analyze to see results")
with gr.Row():
gauge_plot = gr.Plot(label="Sentiment Gauge")
bars_plot = gr.Plot(label="Probability Distribution")
with gr.Tab("📊 Batch Analysis"):
with gr.Row():
with gr.Column(scale=2):
batch_input = gr.Textbox(
label="Batch Text Input (One text per line)",
placeholder="Enter multiple texts, one per line...",
lines=8
)
with gr.Row():
batch_language = gr.Dropdown(
choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
value='Auto Detect',
label="Language"
)
batch_theme = gr.Dropdown(
choices=list(config.THEMES.keys()),
value='default',
label="Theme"
)
with gr.Row():
batch_clean = gr.Checkbox(label="Clean Text", value=False)
batch_remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
batch_remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
batch_analyze_btn = gr.Button("🔍 Analyze Batch", variant="primary", size="lg")
gr.Examples(
examples=[[BATCH_SAMPLE]],
inputs=batch_input,
label="Sample Batch Input"
)
with gr.Column(scale=1):
batch_summary = gr.Markdown("Enter texts and click Analyze Batch to see results")
with gr.Row():
batch_results_table = gr.DataFrame(
label="Detailed Results",
interactive=False
)
with gr.Row():
batch_summary_plot = gr.Plot(label="Sentiment Summary")
batch_confidence_plot = gr.Plot(label="Confidence Distribution")
with gr.Tab("📈 History & Analytics"):
with gr.Row():
with gr.Column():
gr.Markdown("### 📊 Statistics")
stats_btn = gr.Button("📈 Get Statistics")
recent_btn = gr.Button("🕒 Recent Analyses")
stats_output = gr.Markdown("Click 'Get Statistics' to view analysis history")
with gr.Column():
gr.Markdown("### 🔍 Filter History")
with gr.Row():
sentiment_filter = gr.Dropdown(
choices=["All", "Positive", "Negative", "Neutral"],
value="All",
label="Filter by Sentiment"
)
language_filter = gr.Dropdown(
choices=["All", "English", "Chinese", "Spanish", "French", "German", "Swedish"],
value="All",
label="Filter by Language"
)
confidence_filter = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.0,
step=0.1,
label="Minimum Confidence"
)
filter_btn = gr.Button("🔍 Filter History")
with gr.Row():
dashboard_btn = gr.Button("📊 View Dashboard")
clear_btn = gr.Button("🗑️ Clear History", variant="stop")
with gr.Row():
export_csv_btn = gr.Button("📄 Export CSV")
export_excel_btn = gr.Button("📊 Export Excel")
dashboard_plot = gr.Plot(label="Analytics Dashboard")
with gr.Row():
filtered_results = gr.Markdown("Use filters to view specific entries")
filtered_table = gr.DataFrame(label="Filtered History", interactive=False)
csv_file = gr.File(label="Download CSV Report")
excel_file = gr.File(label="Download Excel Report")
history_status = gr.Textbox(label="Status", interactive=False)
# Event handlers
# Single Analysis
analyze_btn.click(
analyze_single_text,
inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
outputs=[result_info, gauge_plot, bars_plot]
)
# Batch Analysis
batch_analyze_btn.click(
analyze_batch_texts,
inputs=[batch_input, batch_language, batch_theme, batch_clean, batch_remove_punct, batch_remove_nums],
outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
)
# History & Analytics
stats_btn.click(
get_history_stats,
outputs=stats_output
)
recent_btn.click(
get_recent_analyses,
outputs=stats_output
)
filter_btn.click(
filter_history_display,
inputs=[sentiment_filter, language_filter, confidence_filter],
outputs=[filtered_results, filtered_table]
)
dashboard_btn.click(
plot_history_dashboard,
outputs=[dashboard_plot, history_status]
)
export_csv_btn.click(
export_history_csv,
outputs=[csv_file, history_status]
)
export_excel_btn.click(
export_history_excel,
outputs=[excel_file, history_status]
)
clear_btn.click(
clear_all_history,
outputs=history_status
)
# 启动应用
if __name__ == "__main__":
try:
logger.info("Starting Multilingual Sentiment Analyzer...")
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)
except Exception as e:
logger.error(f"Failed to launch application: {e}")
print(f"❌ Application failed to start: {e}")
print("Please check your dependencies and try again.")