Spaces:

entropy25
/

multilingual-sentiment-analyzer

Sleeping

App Files Files Community

multilingual-sentiment-analyzer / app.py

entropy25

Update app.py

5eb9344 verified about 2 months ago

raw

history blame

46.7 kB

	import torch
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import plotly.graph_objects as go
	import plotly.express as px
	from plotly.subplots import make_subplots
	import numpy as np
	from collections import Counter, defaultdict
	import re
	import json
	import csv
	import io
	import tempfile
	from datetime import datetime
	import logging
	from functools import lru_cache
	from dataclasses import dataclass
	from typing import List, Dict, Optional, Tuple
	import pandas as pd

	# 设置日志 - 提前初始化
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# 尝试导入可选依赖
	try:
	from wordcloud import WordCloud
	WORDCLOUD_AVAILABLE = True
	except ImportError:
	WORDCLOUD_AVAILABLE = False
	logger.warning("WordCloud not available")

	try:
	import nltk
	from nltk.corpus import stopwords
	nltk.download('stopwords', quiet=True)
	nltk.download('punkt', quiet=True)
	STOP_WORDS = set(stopwords.words('english'))
	NLTK_AVAILABLE = True
	except:
	NLTK_AVAILABLE = False
	STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
	logger.warning("NLTK not available, using basic stopwords")

	try:
	import langdetect
	LANGDETECT_AVAILABLE = True
	except ImportError:
	LANGDETECT_AVAILABLE = False
	logger.warning("langdetect not available, using fallback language detection")

	# 尝试导入SHAP和LIME
	try:
	import shap
	SHAP_AVAILABLE = True
	except ImportError:
	SHAP_AVAILABLE = False
	logger.warning("SHAP not available, using basic analysis")

	try:
	from lime.lime_text import LimeTextExplainer
	LIME_AVAILABLE = True
	except ImportError:
	LIME_AVAILABLE = False
	logger.warning("LIME not available, using basic analysis")

	# Configuration
	@dataclass
	class Config:
	MAX_HISTORY_SIZE: int = 500
	BATCH_SIZE_LIMIT: int = 30
	MAX_TEXT_LENGTH: int = 512
	CACHE_SIZE: int = 64

	# Supported languages and models
	SUPPORTED_LANGUAGES = {
	'auto': 'Auto Detect',
	'en': 'English',
	'zh': 'Chinese',
	'es': 'Spanish',
	'fr': 'French',
	'de': 'German',
	'sv': 'Swedish'
	}

	# 使用更稳定的模型
	MODELS = {
	'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
	'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
	'zh': "uer/roberta-base-finetuned-dianping-chinese",
	# 备用模型
	'fallback': "distilbert-base-uncased-finetuned-sst-2-english"
	}

	# Color themes
	THEMES = {
	'default': {'pos': '#4CAF50', 'neg': '#F44336', 'neu': '#FF9800'},
	'ocean': {'pos': '#0077BE', 'neg': '#FF6B35', 'neu': '#00BCD4'},
	'dark': {'pos': '#66BB6A', 'neg': '#EF5350', 'neu': '#FFA726'},
	'rainbow': {'pos': '#9C27B0', 'neg': '#E91E63', 'neu': '#FF5722'}
	}

	config = Config()

	class ModelManager:
	"""Manages multiple language models with better error handling"""
	def __init__(self):
	self.models = {}
	self.tokenizers = {}
	self.device = self._get_device()
	self.model_loaded = False
	self._load_models()

	def _get_device(self):
	"""安全的设备选择"""
	try:
	if torch.cuda.is_available():
	return torch.device("cuda")
	elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
	return torch.device("mps")
	else:
	return torch.device("cpu")
	except:
	return torch.device("cpu")

	def _load_models(self):
	"""Load models with error handling"""
	try:
	# 首先尝试加载多语言模型
	model_name = config.MODELS['multilingual']
	logger.info(f"Loading model: {model_name}")

	self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
	self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
	self.models['default'].to(self.device)

	logger.info(f"Successfully loaded default model: {model_name}")
	self.model_loaded = True

	except Exception as e:
	logger.error(f"Failed to load multilingual model: {e}")
	# 尝试备用模型
	try:
	fallback_model = config.MODELS['fallback']
	logger.info(f"Trying fallback model: {fallback_model}")

	self.tokenizers['default'] = AutoTokenizer.from_pretrained(fallback_model)
	self.models['default'] = AutoModelForSequenceClassification.from_pretrained(fallback_model)
	self.models['default'].to(self.device)

	logger.info(f"Successfully loaded fallback model: {fallback_model}")
	self.model_loaded = True

	except Exception as e2:
	logger.error(f"Failed to load fallback model: {e2}")
	self.model_loaded = False
	raise RuntimeError("Failed to load any sentiment analysis model")

	def get_model(self, language='en'):
	"""Get model for specific language"""
	if not self.model_loaded:
	raise RuntimeError("No models are loaded")

	# 简化：所有语言都使用默认模型
	return self.models['default'], self.tokenizers['default']

	@staticmethod
	def detect_language(text: str) -> str:
	"""Detect text language with fallback"""
	if not LANGDETECT_AVAILABLE:
	# 简单的语言检测
	if re.search(r'[\u4e00-\u9fff]', text):
	return 'zh'
	else:
	return 'en'

	try:
	detected = langdetect.detect(text)
	language_mapping = {
	'zh-cn': 'zh',
	'zh-tw': 'zh'
	}
	detected = language_mapping.get(detected, detected)
	return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
	except:
	return 'en'

	# 全局模型管理器
	model_manager = None

	def initialize_models():
	"""延迟初始化模型"""
	global model_manager
	if model_manager is None:
	try:
	model_manager = ModelManager()
	return True
	except Exception as e:
	logger.error(f"Model initialization failed: {e}")
	return False
	return True

	class HistoryManager:
	"""Enhanced history manager"""
	def __init__(self):
	self._history = []

	def add_entry(self, entry: Dict):
	self._history.append(entry)
	if len(self._history) > config.MAX_HISTORY_SIZE:
	self._history = self._history[-config.MAX_HISTORY_SIZE:]

	def add_batch_entries(self, entries: List[Dict]):
	for entry in entries:
	self.add_entry(entry)

	def get_history(self) -> List[Dict]:
	return self._history.copy()

	def get_recent_history(self, n: int = 10) -> List[Dict]:
	return self._history[-n:] if self._history else []

	def filter_history(self, sentiment: str = None, language: str = None,
	min_confidence: float = None) -> List[Dict]:
	filtered = self._history

	if sentiment:
	filtered = [h for h in filtered if h['sentiment'] == sentiment]
	if language:
	filtered = [h for h in filtered if h.get('language', 'en') == language]
	if min_confidence:
	filtered = [h for h in filtered if h['confidence'] >= min_confidence]

	return filtered

	def clear(self) -> int:
	count = len(self._history)
	self._history.clear()
	return count

	def get_stats(self) -> Dict:
	if not self._history:
	return {}

	sentiments = [item['sentiment'] for item in self._history]
	confidences = [item['confidence'] for item in self._history]
	languages = [item.get('language', 'en') for item in self._history]

	return {
	'total_analyses': len(self._history),
	'positive_count': sentiments.count('Positive'),
	'negative_count': sentiments.count('Negative'),
	'neutral_count': sentiments.count('Neutral'),
	'avg_confidence': np.mean(confidences) if confidences else 0,
	'max_confidence': np.max(confidences) if confidences else 0,
	'min_confidence': np.min(confidences) if confidences else 0,
	'languages_detected': len(set(languages)),
	'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
	'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history]) if self._history else 0
	}

	history_manager = HistoryManager()

	class TextProcessor:
	"""Enhanced text processing"""

	@staticmethod
	@lru_cache(maxsize=config.CACHE_SIZE)
	def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str:
	"""Clean text with options"""
	text = text.lower().strip()

	if remove_numbers:
	text = re.sub(r'\d+', '', text)

	if remove_punctuation:
	text = re.sub(r'[^\w\s]', '', text)

	words = text.split()
	cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) > 2]
	return ' '.join(cleaned_words)

	@staticmethod
	def extract_keywords(text: str, top_k: int = 5) -> List[str]:
	"""Extract key words from text"""
	if re.search(r'[\u4e00-\u9fff]', text):
	words = re.findall(r'[\u4e00-\u9fff]+', text)
	all_chars = ''.join(words)
	char_freq = Counter(all_chars)
	return [char for char, _ in char_freq.most_common(top_k)]
	else:
	cleaned = TextProcessor.clean_text(text)
	words = cleaned.split()
	word_freq = Counter(words)
	return [word for word, _ in word_freq.most_common(top_k)]

	@staticmethod
	def parse_batch_input(text: str) -> List[str]:
	"""Parse batch input from textarea"""
	lines = text.strip().split('\n')
	return [line.strip() for line in lines if line.strip()]

	class SentimentAnalyzer:
	"""Enhanced sentiment analysis with better error handling"""

	@staticmethod
	def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
	"""Analyze single text with language support"""
	if not text.strip():
	raise ValueError("Empty text provided")

	# 确保模型已加载
	if not initialize_models():
	raise RuntimeError("Failed to initialize sentiment analysis models")

	# Detect language if auto
	if language == 'auto':
	detected_lang = model_manager.detect_language(text)
	else:
	detected_lang = language

	# Get appropriate model
	try:
	model, tokenizer = model_manager.get_model(detected_lang)
	except Exception as e:
	logger.error(f"Failed to get model: {e}")
	raise RuntimeError(f"Model loading failed: {e}")

	# Preprocessing
	options = preprocessing_options or {}
	processed_text = text
	if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
	processed_text = TextProcessor.clean_text(
	text,
	options.get('remove_punctuation', True),
	options.get('remove_numbers', False)
	)

	try:
	# Tokenize and analyze
	inputs = tokenizer(
	processed_text,
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=config.MAX_TEXT_LENGTH
	).to(model_manager.device)

	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]

	# Handle different model outputs
	if len(probs) == 3: # negative, neutral, positive
	sentiment_idx = np.argmax(probs)
	sentiment_labels = ['Negative', 'Neutral', 'Positive']
	sentiment = sentiment_labels[sentiment_idx]
	confidence = float(probs[sentiment_idx])

	result = {
	'sentiment': sentiment,
	'confidence': confidence,
	'neg_prob': float(probs[0]),
	'neu_prob': float(probs[1]),
	'pos_prob': float(probs[2]),
	'has_neutral': True
	}
	else: # negative, positive
	pred = np.argmax(probs)
	sentiment = "Positive" if pred == 1 else "Negative"
	confidence = float(probs[pred])

	result = {
	'sentiment': sentiment,
	'confidence': confidence,
	'neg_prob': float(probs[0]),
	'pos_prob': float(probs[1]),
	'neu_prob': 0.0,
	'has_neutral': False
	}

	# Add metadata
	result.update({
	'language': detected_lang,
	'keywords': TextProcessor.extract_keywords(text),
	'word_count': len(text.split()),
	'char_count': len(text)
	})

	return result

	except Exception as e:
	logger.error(f"Analysis failed: {e}")
	raise RuntimeError(f"Sentiment analysis failed: {e}")

	@staticmethod
	def analyze_batch(texts: List[str], language: str = 'auto',
	preprocessing_options: Dict = None) -> List[Dict]:
	"""Analyze multiple texts"""
	results = []
	for i, text in enumerate(texts):
	try:
	result = SentimentAnalyzer.analyze_text(text, language, preprocessing_options)
	result['batch_index'] = i
	results.append(result)
	except Exception as e:
	results.append({
	'sentiment': 'Error',
	'confidence': 0.0,
	'error': str(e),
	'batch_index': i,
	'text': text[:50] + '...' if len(text) > 50 else text
	})
	return results

	class AdvancedVisualizer:
	"""Enhanced visualizations with Plotly - 修复了类名"""

	@staticmethod
	def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
	"""Create an animated sentiment gauge"""
	colors = config.THEMES.get(theme, config.THEMES['default'])

	try:
	if result.get('has_neutral', False):
	# Three-way gauge
	fig = go.Figure(go.Indicator(
	mode="gauge+number+delta",
	value=result['pos_prob'] * 100,
	domain={'x': [0, 1], 'y': [0, 1]},
	title={'text': f"Sentiment: {result['sentiment']}"},
	delta={'reference': 50},
	gauge={
	'axis': {'range': [None, 100]},
	'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
	'steps': [
	{'range': [0, 33], 'color': colors['neg']},
	{'range': [33, 67], 'color': colors['neu']},
	{'range': [67, 100], 'color': colors['pos']}
	],
	'threshold': {
	'line': {'color': "red", 'width': 4},
	'thickness': 0.75,
	'value': 90
	}
	}
	))
	else:
	# Two-way gauge
	fig = go.Figure(go.Indicator(
	mode="gauge+number",
	value=result['confidence'] * 100,
	domain={'x': [0, 1], 'y': [0, 1]},
	title={'text': f"Confidence: {result['sentiment']}"},
	gauge={
	'axis': {'range': [None, 100]},
	'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
	'steps': [
	{'range': [0, 50], 'color': "lightgray"},
	{'range': [50, 100], 'color': "gray"}
	]
	}
	))

	fig.update_layout(height=400, font={'size': 16})
	return fig

	except Exception as e:
	logger.error(f"Failed to create gauge: {e}")
	# 返回错误图表
	fig = go.Figure()
	fig.add_annotation(
	text=f"Visualization Error: {str(e)}",
	x=0.5, y=0.5,
	xref="paper", yref="paper",
	showarrow=False,
	font=dict(size=14)
	)
	fig.update_layout(height=400)
	return fig

	@staticmethod
	def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
	"""Create probability bar chart"""
	colors = config.THEMES.get(theme, config.THEMES['default'])

	try:
	if result.get('has_neutral', False):
	labels = ['Negative', 'Neutral', 'Positive']
	values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
	bar_colors = [colors['neg'], colors['neu'], colors['pos']]
	else:
	labels = ['Negative', 'Positive']
	values = [result['neg_prob'], result['pos_prob']]
	bar_colors = [colors['neg'], colors['pos']]

	fig = go.Figure(data=[
	go.Bar(x=labels, y=values, marker_color=bar_colors,
	text=[f'{v:.3f}' for v in values])
	])

	fig.update_traces(texttemplate='%{text}', textposition='outside')
	fig.update_layout(
	title="Sentiment Probabilities",
	yaxis_title="Probability",
	height=400,
	showlegend=False
	)

	return fig

	except Exception as e:
	logger.error(f"Failed to create bars: {e}")
	fig = go.Figure()
	fig.add_annotation(
	text=f"Visualization Error: {str(e)}",
	x=0.5, y=0.5,
	xref="paper", yref="paper",
	showarrow=False
	)
	fig.update_layout(height=400)
	return fig

	@staticmethod
	def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
	"""Create batch analysis summary"""
	colors = config.THEMES.get(theme, config.THEMES['default'])

	try:
	# Count sentiments
	sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
	if not sentiments:
	fig = go.Figure()
	fig.add_annotation(text="No valid results to display", x=0.5, y=0.5,
	xref="paper", yref="paper", showarrow=False)
	fig.update_layout(height=400, title="Batch Summary")
	return fig

	sentiment_counts = Counter(sentiments)

	# Create pie chart
	fig = go.Figure(data=[go.Pie(
	labels=list(sentiment_counts.keys()),
	values=list(sentiment_counts.values()),
	marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
	textinfo='label+percent',
	hole=0.3
	)])

	fig.update_layout(
	title=f"Batch Analysis Summary ({len(results)} texts)",
	height=400
	)

	return fig

	except Exception as e:
	logger.error(f"Failed to create batch summary: {e}")
	fig = go.Figure()
	fig.add_annotation(text=f"Error: {str(e)}", x=0.5, y=0.5,
	xref="paper", yref="paper", showarrow=False)
	fig.update_layout(height=400)
	return fig

	@staticmethod
	def create_confidence_distribution(results: List[Dict]) -> go.Figure:
	"""Create confidence distribution plot"""
	try:
	confidences = [r['confidence'] for r in results
	if 'confidence' in r and r['sentiment'] != 'Error']

	if not confidences:
	fig = go.Figure()
	fig.add_annotation(text="No confidence data to display", x=0.5, y=0.5,
	xref="paper", yref="paper", showarrow=False)
	fig.update_layout(height=400, title="Confidence Distribution")
	return fig

	fig = go.Figure(data=[go.Histogram(
	x=confidences,
	nbinsx=20,
	marker_color='skyblue',
	opacity=0.7
	)])

	fig.update_layout(
	title="Confidence Distribution",
	xaxis_title="Confidence Score",
	yaxis_title="Frequency",
	height=400
	)

	return fig

	except Exception as e:
	logger.error(f"Failed to create confidence distribution: {e}")
	fig = go.Figure()
	fig.add_annotation(text=f"Error: {str(e)}", x=0.5, y=0.5,
	xref="paper", yref="paper", showarrow=False)
	fig.update_layout(height=400)
	return fig

	# Main application functions with better error handling
	def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
	remove_punct: bool, remove_nums: bool):
	"""Enhanced single text analysis with error handling"""
	try:
	if not text.strip():
	return "❌ Please enter text to analyze", None, None

	# 初始化检查
	if not initialize_models():
	return "❌ Failed to load sentiment analysis models. Please check your internet connection and try again.", None, None

	# Map display names back to language codes
	language_map = {
	'Auto Detect': 'auto',
	'English': 'en',
	'Chinese': 'zh',
	'Spanish': 'es',
	'French': 'fr',
	'German': 'de',
	'Swedish': 'sv'
	}
	language_code = language_map.get(language, 'auto')

	preprocessing_options = {
	'clean_text': clean_text,
	'remove_punctuation': remove_punct,
	'remove_numbers': remove_nums
	}

	# 分析文本
	result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)

	# Add to history
	history_entry = {
	'text': text[:100] + '...' if len(text) > 100 else text,
	'full_text': text,
	'sentiment': result['sentiment'],
	'confidence': result['confidence'],
	'pos_prob': result['pos_prob'],
	'neg_prob': result['neg_prob'],
	'neu_prob': result.get('neu_prob', 0),
	'language': result['language'],
	'timestamp': datetime.now().isoformat(),
	'analysis_type': 'single'
	}
	history_manager.add_entry(history_entry)

	# Create visualizations
	gauge_fig = AdvancedVisualizer.create_sentiment_gauge(result, theme)
	bars_fig = AdvancedVisualizer.create_probability_bars(result, theme)

	# Create info text
	info_text = f"""
	✅ Analysis Results:
	- Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.3f})
	- Language: {result['language'].upper()}
	- Keywords: {', '.join(result['keywords']) if result['keywords'] else 'None'}
	- Text Stats: {result['word_count']} words, {result['char_count']} characters

	📊 Probability Scores:
	- Positive: {result['pos_prob']:.3f}
	- Negative: {result['neg_prob']:.3f}
	- Neutral: {result.get('neu_prob', 0):.3f}
	"""

	return info_text, gauge_fig, bars_fig

	except Exception as e:
	logger.error(f"Single text analysis failed: {e}")
	error_msg = f"❌ Analysis Failed: {str(e)}\n\nPlease check your input and try again."
	return error_msg, None, None

	def analyze_batch_texts(batch_text: str, language: str, theme: str,
	clean_text: bool, remove_punct: bool, remove_nums: bool):
	"""Batch text analysis with improved error handling"""
	try:
	if not batch_text.strip():
	return "❌ Please enter texts to analyze (one per line)", None, None, None

	# 初始化检查
	if not initialize_models():
	return "❌ Failed to load sentiment analysis models", None, None, None

	# Parse batch input
	texts = TextProcessor.parse_batch_input(batch_text)

	if len(texts) > config.BATCH_SIZE_LIMIT:
	return f"❌ Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None

	if not texts:
	return "❌ No valid texts found", None, None, None

	# Map display names back to language codes
	language_map = {
	'Auto Detect': 'auto',
	'English': 'en',
	'Chinese': 'zh',
	'Spanish': 'es',
	'French': 'fr',
	'German': 'de',
	'Swedish': 'sv'
	}
	language_code = language_map.get(language, 'auto')

	preprocessing_options = {
	'clean_text': clean_text,
	'remove_punctuation': remove_punct,
	'remove_numbers': remove_nums
	}

	# Analyze all texts
	results = SentimentAnalyzer.analyze_batch(texts, language_code, preprocessing_options)

	# Add to history
	batch_entries = []
	for i, (text, result) in enumerate(zip(texts, results)):
	if 'error' not in result:
	entry = {
	'text': text[:100] + '...' if len(text) > 100 else text,
	'full_text': text,
	'sentiment': result['sentiment'],
	'confidence': result['confidence'],
	'pos_prob': result['pos_prob'],
	'neg_prob': result['neg_prob'],
	'neu_prob': result.get('neu_prob', 0),
	'language': result['language'],
	'timestamp': datetime.now().isoformat(),
	'analysis_type': 'batch',
	'batch_index': i
	}
	batch_entries.append(entry)

	history_manager.add_batch_entries(batch_entries)

	# Create visualizations
	summary_fig = AdvancedVisualizer.create_batch_summary(results, theme)
	confidence_fig = AdvancedVisualizer.create_confidence_distribution(results)

	# Create results table
	df_data = []
	for i, (text, result) in enumerate(zip(texts, results)):
	if 'error' in result:
	df_data.append({
	'Index': i+1,
	'Text': text[:50] + '...' if len(text) > 50 else text,
	'Sentiment': 'Error',
	'Confidence': '0.000',
	'Language': 'Unknown',
	'Error': result.get('error', 'Unknown error')
	})
	else:
	df_data.append({
	'Index': i+1,
	'Text': text[:50] + '...' if len(text) > 50 else text,
	'Sentiment': result['sentiment'],
	'Confidence': f"{result['confidence']:.3f}",
	'Language': result['language'].upper(),
	'Keywords': ', '.join(result.get('keywords', [])[:3])
	})

	df = pd.DataFrame(df_data)

	# Summary info
	successful_results = [r for r in results if 'error' not in r]
	error_count = len(results) - len(successful_results)

	if successful_results:
	sentiment_counts = Counter([r['sentiment'] for r in successful_results])
	avg_confidence = np.mean([r['confidence'] for r in successful_results])

	summary_text = f"""
	✅ Batch Analysis Summary:
	- Total Texts: {len(texts)}
	- Successful: {len(successful_results)}
	- Errors: {error_count}
	- Average Confidence: {avg_confidence:.3f}
	- Sentiment Distribution:
	- Positive: {sentiment_counts.get('Positive', 0)}
	- Negative: {sentiment_counts.get('Negative', 0)}
	- Neutral: {sentiment_counts.get('Neutral', 0)}
	"""
	else:
	summary_text = f"❌ All {len(texts)} texts failed to analyze."

	return summary_text, df, summary_fig, confidence_fig

	except Exception as e:
	logger.error(f"Batch analysis failed: {e}")
	return f"❌ Error: {str(e)}", None, None, None

	def get_history_stats():
	"""Get enhanced history statistics"""
	try:
	stats = history_manager.get_stats()
	if not stats:
	return "📊 No analysis history available yet. Analyze some texts to see statistics!"

	return f"""
	📊 Comprehensive History Statistics:

	📈 Analysis Counts:
	- Total Analyses: {stats['total_analyses']}
	- 😊 Positive: {stats['positive_count']}
	- 😞 Negative: {stats['negative_count']}
	- 😐 Neutral: {stats['neutral_count']}

	🎯 Confidence Metrics:
	- Average Confidence: {stats['avg_confidence']:.3f}
	- Highest Confidence: {stats['max_confidence']:.3f}
	- Lowest Confidence: {stats['min_confidence']:.3f}

	🌍 Language Statistics:
	- Languages Detected: {stats['languages_detected']}
	- Most Common Language: {stats['most_common_language'].upper()}

	📝 Text Statistics:
	- Average Text Length: {stats['avg_text_length']:.1f} characters
	"""
	except Exception as e:
	logger.error(f"Failed to get history stats: {e}")
	return f"❌ Error getting statistics: {str(e)}"

	def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
	"""Display filtered history"""
	try:
	# Convert filters
	sentiment = sentiment_filter if sentiment_filter != "All" else None
	language = language_filter.lower() if language_filter != "All" else None

	filtered_history = history_manager.filter_history(
	sentiment=sentiment,
	language=language,
	min_confidence=min_confidence if min_confidence > 0 else None
	)

	if not filtered_history:
	return "🔍 No entries match the filter criteria", None

	# Create DataFrame for display
	df_data = []
	for entry in filtered_history[-20:]: # Show last 20 entries
	df_data.append({
	'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM
	'Text': entry['text'],
	'Sentiment': entry['sentiment'],
	'Confidence': f"{entry['confidence']:.3f}",
	'Language': entry['language'].upper(),
	'Type': entry.get('analysis_type', 'single')
	})

	df = pd.DataFrame(df_data)

	summary = f"""
	🔍 Filtered Results:
	- Found {len(filtered_history)} entries matching criteria
	- Showing most recent {min(20, len(filtered_history))} entries
	"""

	return summary, df

	except Exception as e:
	logger.error(f"Failed to filter history: {e}")
	return f"❌ Error filtering history: {str(e)}", None

	def plot_history_dashboard():
	"""Create history dashboard with error handling"""
	try:
	history = history_manager.get_history()
	if len(history) < 2:
	return None, "📊 Need at least 2 analyses to create dashboard. Analyze more texts!"

	# Create subplots
	fig = make_subplots(
	rows=2, cols=2,
	subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
	'Language Distribution', 'Sentiment Summary'],
	specs=[[{"secondary_y": False}, {"secondary_y": False}],
	[{"type": "pie"}, {"type": "bar"}]]
	)

	# Extract data
	indices = list(range(len(history)))
	pos_probs = [item.get('pos_prob', 0) for item in history]
	confidences = [item['confidence'] for item in history]
	sentiments = [item['sentiment'] for item in history]
	languages = [item.get('language', 'en') for item in history]

	# Sentiment timeline
	colors = ['#4CAF50' if s == 'Positive' else '#F44336' if s == 'Negative' else '#FF9800'
	for s in sentiments]
	fig.add_trace(
	go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
	marker=dict(color=colors, size=6),
	name='Positive Probability'),
	row=1, col=1
	)

	# Confidence distribution
	fig.add_trace(
	go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
	row=1, col=2
	)

	# Language distribution
	lang_counts = Counter(languages)
	fig.add_trace(
	go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
	name="Languages"),
	row=2, col=1
	)

	# Sentiment summary
	sent_counts = Counter(sentiments)
	colors_dict = {'Positive': '#4CAF50', 'Negative': '#F44336', 'Neutral': '#FF9800'}
	fig.add_trace(
	go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
	marker_color=[colors_dict.get(k, '#999999') for k in sent_counts.keys()]),
	row=2, col=2
	)

	fig.update_layout(height=800, showlegend=False, title_text="Analysis Dashboard")
	return fig, f"📊 Dashboard showing {len(history)} analyses"

	except Exception as e:
	logger.error(f"Failed to create dashboard: {e}")
	return None, f"❌ Error creating dashboard: {str(e)}"

	def export_history_csv():
	"""Export history to CSV"""
	try:
	history = history_manager.get_history()
	if not history:
	return None, "📊 No history to export"

	df = pd.DataFrame(history)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
	df.to_csv(temp_file.name, index=False)
	return temp_file.name, f"✅ Exported {len(history)} entries to CSV"
	except Exception as e:
	logger.error(f"CSV export failed: {e}")
	return None, f"❌ Export failed: {str(e)}"

	def export_history_excel():
	"""Export history to Excel"""
	try:
	history = history_manager.get_history()
	if not history:
	return None, "📊 No history to export"

	df = pd.DataFrame(history)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
	df.to_excel(temp_file.name, index=False)
	return temp_file.name, f"✅ Exported {len(history)} entries to Excel"
	except Exception as e:
	logger.error(f"Excel export failed: {e}")
	return None, f"❌ Export failed: {str(e)}"

	def clear_all_history():
	"""Clear analysis history"""
	try:
	count = history_manager.clear()
	return f"🗑️ Cleared {count} entries from history"
	except Exception as e:
	logger.error(f"Failed to clear history: {e}")
	return f"❌ Error clearing history: {str(e)}"

	def get_recent_analyses():
	"""Get recent analysis summary"""
	try:
	recent = history_manager.get_recent_history(10)
	if not recent:
	return "📊 No recent analyses available. Start analyzing some texts!"

	summary_text = "🕒 Recent Analyses (Last 10):\n\n"
	for i, entry in enumerate(recent, 1):
	emoji = "😊" if entry['sentiment'] == 'Positive' else "😞" if entry['sentiment'] == 'Negative' else "😐"
	summary_text += f"{i}. {emoji} {entry['sentiment']} ({entry['confidence']:.3f}) - {entry['text']}\n"

	return summary_text
	except Exception as e:
	logger.error(f"Failed to get recent analyses: {e}")
	return f"❌ Error getting recent analyses: {str(e)}"

	# Sample data
	SAMPLE_TEXTS = [
	# Auto Detect
	["The film had its moments, but overall it felt a bit too long and lacked emotional depth."],

	# English
	["I was completely blown away by the movie — the performances were raw and powerful, and the story stayed with me long after the credits rolled."],

	# Chinese
	["这部电影节奏拖沓，剧情老套，完全没有让我产生任何共鸣，是一次失望的观影体验。"],

	# Spanish
	["Una obra maestra del cine contemporáneo, con actuaciones sobresalientes, un guion bien escrito y una dirección impecable."],

	# French
	["Je m'attendais à beaucoup mieux. Le scénario était confus, les dialogues ennuyeux, et je me suis presque endormi au milieu du film."],

	# German
	["Der Film war ein emotionales Erlebnis mit großartigen Bildern, einem mitreißenden Soundtrack und einer Geschichte, die zum Nachdenken anregt."],

	# Swedish
	["Filmen var en besvikelse – tråkig handling, överdrivet skådespeleri och ett slut som inte gav något avslut alls."]
	]

	BATCH_SAMPLE = """I love this product! It works perfectly.
	The service was terrible and slow.
	Not sure if I like it or not.
	Amazing quality and fast delivery!
	Could be better, but it's okay."""

	# Gradio Interface - 简化版本，专注于核心功能
	with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
	gr.Markdown("# 🎭 Multilingual Sentiment Analyzer")
	gr.Markdown("Comprehensive sentiment analysis with batch processing and multilingual support")

	with gr.Tab("📝 Single Analysis"):
	with gr.Row():
	with gr.Column(scale=2):
	text_input = gr.Textbox(
	label="Text to Analyze",
	placeholder="Enter your text here... (supports multiple languages)",
	lines=4
	)

	with gr.Row():
	language_select = gr.Dropdown(
	choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
	value='Auto Detect',
	label="Language"
	)
	theme_select = gr.Dropdown(
	choices=list(config.THEMES.keys()),
	value='default',
	label="Theme"
	)

	with gr.Row():
	clean_text = gr.Checkbox(label="Clean Text", value=False)
	remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
	remove_nums = gr.Checkbox(label="Remove Numbers", value=False)

	analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")

	gr.Examples(
	examples=SAMPLE_TEXTS,
	inputs=text_input,
	label="Sample Texts (Multiple Languages)"
	)

	with gr.Column(scale=1):
	result_info = gr.Markdown("Enter text and click Analyze to see results")

	with gr.Row():
	gauge_plot = gr.Plot(label="Sentiment Gauge")
	bars_plot = gr.Plot(label="Probability Distribution")

	with gr.Tab("📊 Batch Analysis"):
	with gr.Row():
	with gr.Column(scale=2):
	batch_input = gr.Textbox(
	label="Batch Text Input (One text per line)",
	placeholder="Enter multiple texts, one per line...",
	lines=8
	)

	with gr.Row():
	batch_language = gr.Dropdown(
	choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
	value='Auto Detect',
	label="Language"
	)
	batch_theme = gr.Dropdown(
	choices=list(config.THEMES.keys()),
	value='default',
	label="Theme"
	)

	with gr.Row():
	batch_clean = gr.Checkbox(label="Clean Text", value=False)
	batch_remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
	batch_remove_nums = gr.Checkbox(label="Remove Numbers", value=False)

	batch_analyze_btn = gr.Button("🔍 Analyze Batch", variant="primary", size="lg")

	gr.Examples(
	examples=[[BATCH_SAMPLE]],
	inputs=batch_input,
	label="Sample Batch Input"
	)

	with gr.Column(scale=1):
	batch_summary = gr.Markdown("Enter texts and click Analyze Batch to see results")

	with gr.Row():
	batch_results_table = gr.DataFrame(
	label="Detailed Results",
	interactive=False
	)

	with gr.Row():
	batch_summary_plot = gr.Plot(label="Sentiment Summary")
	batch_confidence_plot = gr.Plot(label="Confidence Distribution")

	with gr.Tab("📈 History & Analytics"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📊 Statistics")
	stats_btn = gr.Button("📈 Get Statistics")
	recent_btn = gr.Button("🕒 Recent Analyses")
	stats_output = gr.Markdown("Click 'Get Statistics' to view analysis history")

	with gr.Column():
	gr.Markdown("### 🔍 Filter History")
	with gr.Row():
	sentiment_filter = gr.Dropdown(
	choices=["All", "Positive", "Negative", "Neutral"],
	value="All",
	label="Filter by Sentiment"
	)
	language_filter = gr.Dropdown(
	choices=["All", "English", "Chinese", "Spanish", "French", "German", "Swedish"],
	value="All",
	label="Filter by Language"
	)

	confidence_filter = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.0,
	step=0.1,
	label="Minimum Confidence"
	)

	filter_btn = gr.Button("🔍 Filter History")

	with gr.Row():
	dashboard_btn = gr.Button("📊 View Dashboard")
	clear_btn = gr.Button("🗑️ Clear History", variant="stop")

	with gr.Row():
	export_csv_btn = gr.Button("📄 Export CSV")
	export_excel_btn = gr.Button("📊 Export Excel")

	dashboard_plot = gr.Plot(label="Analytics Dashboard")

	with gr.Row():
	filtered_results = gr.Markdown("Use filters to view specific entries")
	filtered_table = gr.DataFrame(label="Filtered History", interactive=False)

	csv_file = gr.File(label="Download CSV Report")
	excel_file = gr.File(label="Download Excel Report")
	history_status = gr.Textbox(label="Status", interactive=False)

	# Event handlers

	# Single Analysis
	analyze_btn.click(
	analyze_single_text,
	inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
	outputs=[result_info, gauge_plot, bars_plot]
	)

	# Batch Analysis
	batch_analyze_btn.click(
	analyze_batch_texts,
	inputs=[batch_input, batch_language, batch_theme, batch_clean, batch_remove_punct, batch_remove_nums],
	outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
	)

	# History & Analytics
	stats_btn.click(
	get_history_stats,
	outputs=stats_output
	)

	recent_btn.click(
	get_recent_analyses,
	outputs=stats_output
	)

	filter_btn.click(
	filter_history_display,
	inputs=[sentiment_filter, language_filter, confidence_filter],
	outputs=[filtered_results, filtered_table]
	)

	dashboard_btn.click(
	plot_history_dashboard,
	outputs=[dashboard_plot, history_status]
	)

	export_csv_btn.click(
	export_history_csv,
	outputs=[csv_file, history_status]
	)

	export_excel_btn.click(
	export_history_excel,
	outputs=[excel_file, history_status]
	)

	clear_btn.click(
	clear_all_history,
	outputs=history_status
	)

	# 启动应用
	if __name__ == "__main__":
	try:
	logger.info("Starting Multilingual Sentiment Analyzer...")
	demo.launch(
	share=True,
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)
	except Exception as e:
	logger.error(f"Failed to launch application: {e}")
	print(f"❌ Application failed to start: {e}")
	print("Please check your dependencies and try again.")