Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import gradio as gr
|
3 |
+
import json
|
4 |
+
import time
|
5 |
+
import random
|
6 |
+
from transformers import pipeline
|
7 |
+
import pycountry
|
8 |
+
from datetime import datetime
|
9 |
+
from pydantic import BaseModel, PydanticUserError, ConfigDict
|
10 |
+
from pydantic import BaseModel, ConfigDict
|
11 |
+
import json
|
12 |
+
|
13 |
+
class MyModel(BaseModel):
|
14 |
+
request: 'starlette.requests.Request'
|
15 |
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
16 |
+
from pydantic_core import core_schema
|
17 |
+
from starlette.requests import Request
|
18 |
+
|
19 |
+
def get_pydantic_core_schema(request_type, handler):
|
20 |
+
return core_schema.any_schema()
|
21 |
+
|
22 |
+
Request.__get_pydantic_core_schema__ = get_pydantic_core_schema
|
23 |
+
# Initialize pipelines with error handling
|
24 |
+
try:
|
25 |
+
lang_detector = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
|
26 |
+
text_translator = pipeline("translation", model="facebook/nllb-200-distilled-600M", torch_dtype=torch.bfloat16)
|
27 |
+
print("🚀 AI Translation Hub initialized successfully!")
|
28 |
+
except Exception as e:
|
29 |
+
print(f"⚠️ Error initializing models: {e}")
|
30 |
+
|
31 |
+
# Extended language support with emojis
|
32 |
+
LANGUAGES = {
|
33 |
+
'English': '🇺🇸',
|
34 |
+
'Spanish': '🇪🇸',
|
35 |
+
'French': '🇫🇷',
|
36 |
+
'German': '🇩🇪',
|
37 |
+
'Italian': '🇮🇹',
|
38 |
+
'Portuguese': '🇵🇹',
|
39 |
+
'Russian': '🇷🇺',
|
40 |
+
'Chinese (Simplified)': '🇨🇳',
|
41 |
+
'Japanese': '🇯🇵',
|
42 |
+
'Korean': '🇰🇷',
|
43 |
+
'Arabic': '🇸🇦',
|
44 |
+
'Hindi': '🇮🇳',
|
45 |
+
'Dutch': '🇳🇱',
|
46 |
+
'Swedish': '🇸🇪',
|
47 |
+
'Norwegian': '🇳🇴'
|
48 |
+
}
|
49 |
+
|
50 |
+
# Load language data with fallback
|
51 |
+
try:
|
52 |
+
with open('.venv/language.json', 'r') as file:
|
53 |
+
language_data = json.load(file)
|
54 |
+
except FileNotFoundError:
|
55 |
+
print("⚠️ Language data file not found. Using basic mapping.")
|
56 |
+
language_data = {'languages': []}
|
57 |
+
|
58 |
+
# Translation statistics
|
59 |
+
translation_stats = {
|
60 |
+
'total_translations': 0,
|
61 |
+
'languages_detected': set(),
|
62 |
+
'session_start': datetime.now()
|
63 |
+
}
|
64 |
+
|
65 |
+
def get_FLORES_code_from_language(language):
|
66 |
+
"""Enhanced FLORES code lookup with fallback mapping"""
|
67 |
+
# Remove emoji and extract clean language name
|
68 |
+
import re
|
69 |
+
# Remove emoji flags and extra spaces
|
70 |
+
clean_language = re.sub(r'[🇦-🇿]{2}\s*', '', language).strip()
|
71 |
+
|
72 |
+
for entry in language_data.get('languages', []):
|
73 |
+
if entry['Language'].lower() == clean_language.lower():
|
74 |
+
return entry['FLORES-200 code']
|
75 |
+
|
76 |
+
# Fallback mapping for common languages
|
77 |
+
fallback_mapping = {
|
78 |
+
'english': 'eng_Latn',
|
79 |
+
'spanish': 'spa_Latn',
|
80 |
+
'french': 'fra_Latn',
|
81 |
+
'german': 'deu_Latn',
|
82 |
+
'chinese (simplified)': 'zho_Hans',
|
83 |
+
'italian': 'ita_Latn',
|
84 |
+
'portuguese': 'por_Latn',
|
85 |
+
'russian': 'rus_Cyrl',
|
86 |
+
'japanese': 'jpn_Jpan',
|
87 |
+
'korean': 'kor_Hang',
|
88 |
+
'arabic': 'arb_Arab',
|
89 |
+
'hindi': 'hin_Deva',
|
90 |
+
'dutch': 'nld_Latn',
|
91 |
+
'swedish': 'swe_Latn',
|
92 |
+
'norwegian': 'nor_Latn'
|
93 |
+
}
|
94 |
+
|
95 |
+
return fallback_mapping.get(clean_language.lower())
|
96 |
+
|
97 |
+
def detect_language_confidence(text):
|
98 |
+
"""Get language detection with confidence score"""
|
99 |
+
if not text.strip():
|
100 |
+
return "Unknown", 0.0
|
101 |
+
|
102 |
+
try:
|
103 |
+
result = lang_detector(text)[0]
|
104 |
+
return result['label'], result['score']
|
105 |
+
except:
|
106 |
+
return "Unknown", 0.0
|
107 |
+
|
108 |
+
def translate_with_analytics(text, destination_language, show_confidence=True):
|
109 |
+
"""Enhanced translation with analytics and progress tracking"""
|
110 |
+
|
111 |
+
if not text.strip():
|
112 |
+
return "⚠️ Please enter some text to translate", "", ""
|
113 |
+
|
114 |
+
# Update statistics
|
115 |
+
translation_stats['total_translations'] += 1
|
116 |
+
|
117 |
+
# Simulate processing for dramatic effect
|
118 |
+
yield "🔍 Analyzing text...", "", ""
|
119 |
+
time.sleep(0.5)
|
120 |
+
|
121 |
+
# Detect source language with confidence
|
122 |
+
detected_lang, confidence = detect_language_confidence(text)
|
123 |
+
translation_stats['languages_detected'].add(detected_lang)
|
124 |
+
|
125 |
+
yield f"🧠 Detected language: {detected_lang.upper()} ({confidence:.1%} confidence)", "", ""
|
126 |
+
time.sleep(0.3)
|
127 |
+
|
128 |
+
# Get language codes
|
129 |
+
try:
|
130 |
+
lang = pycountry.languages.get(alpha_2=detected_lang)
|
131 |
+
src_code = f"{lang.alpha_3}_Latn" if lang else "eng_Latn"
|
132 |
+
except:
|
133 |
+
src_code = "eng_Latn"
|
134 |
+
|
135 |
+
dest_code = get_FLORES_code_from_language(destination_language)
|
136 |
+
if not dest_code:
|
137 |
+
yield f"❌ Unsupported target language: {destination_language}", "", ""
|
138 |
+
return
|
139 |
+
|
140 |
+
yield f"⚡ Translating to {destination_language.split(' ', 1)[-1] if ' ' in destination_language else destination_language}...", "", ""
|
141 |
+
time.sleep(0.5)
|
142 |
+
|
143 |
+
# Handle same language
|
144 |
+
if src_code == dest_code:
|
145 |
+
analytics = f"""
|
146 |
+
📊 **Translation Analytics**
|
147 |
+
- Source: {detected_lang.upper()} ({confidence:.1%} confidence)
|
148 |
+
- Target: Same language detected
|
149 |
+
- Action: No translation needed
|
150 |
+
- Processing time: <1s
|
151 |
+
"""
|
152 |
+
yield "✅ Translation complete!", text, analytics.strip()
|
153 |
+
return
|
154 |
+
|
155 |
+
# Perform translation
|
156 |
+
try:
|
157 |
+
start_time = time.time()
|
158 |
+
|
159 |
+
# Calculate appropriate max_length based on input length
|
160 |
+
input_length = len(text)
|
161 |
+
# Set max_length to be 1.5x input length with a minimum of 512 and maximum of 2048
|
162 |
+
max_length = max(512, min(2048, int(input_length * 1.5)))
|
163 |
+
|
164 |
+
translation = text_translator(
|
165 |
+
text,
|
166 |
+
src_lang=src_code,
|
167 |
+
tgt_lang=dest_code,
|
168 |
+
max_length=max_length,
|
169 |
+
do_sample=False, # For more consistent results
|
170 |
+
num_beams=4 # Better quality translation
|
171 |
+
)
|
172 |
+
processing_time = time.time() - start_time
|
173 |
+
|
174 |
+
result = translation[0]['translation_text']
|
175 |
+
|
176 |
+
# Generate analytics
|
177 |
+
import re
|
178 |
+
clean_dest_lang = re.sub(r'[🇦-🇿]{2}\s*', '', destination_language).strip()
|
179 |
+
|
180 |
+
analytics = f"""
|
181 |
+
📊 **Translation Analytics**
|
182 |
+
- **Source Language**: {detected_lang.upper()} ({confidence:.1%} confidence)
|
183 |
+
- **Target Language**: {clean_dest_lang}
|
184 |
+
- **Characters Processed**: {len(text):,}
|
185 |
+
- **Max Length Used**: {max_length}
|
186 |
+
- **Processing Time**: {processing_time:.2f}s
|
187 |
+
- **Session Translations**: {translation_stats['total_translations']}
|
188 |
+
- **Languages Detected**: {len(translation_stats['languages_detected'])}
|
189 |
+
"""
|
190 |
+
|
191 |
+
yield "✅ Translation complete!", result, analytics.strip()
|
192 |
+
|
193 |
+
except Exception as e:
|
194 |
+
yield f"❌ Translation failed: {str(e)}", "", ""
|
195 |
+
|
196 |
+
def clear_all():
|
197 |
+
"""Reset all fields"""
|
198 |
+
return "", "", "", ""
|
199 |
+
|
200 |
+
# Custom CSS for a modern, sleek interface
|
201 |
+
custom_css = """
|
202 |
+
.gradio-container {
|
203 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
204 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
205 |
+
}
|
206 |
+
|
207 |
+
.gr-button {
|
208 |
+
background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
|
209 |
+
border: none;
|
210 |
+
border-radius: 25px;
|
211 |
+
color: white;
|
212 |
+
font-weight: bold;
|
213 |
+
transition: all 0.3s ease;
|
214 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.2);
|
215 |
+
}
|
216 |
+
|
217 |
+
.gr-button:hover {
|
218 |
+
transform: translateY(-2px);
|
219 |
+
box-shadow: 0 6px 20px rgba(0,0,0,0.3);
|
220 |
+
}
|
221 |
+
|
222 |
+
.gr-textbox {
|
223 |
+
border-radius: 15px;
|
224 |
+
border: 2px solid #e0e0e0;
|
225 |
+
transition: all 0.3s ease;
|
226 |
+
}
|
227 |
+
|
228 |
+
.gr-textbox:focus {
|
229 |
+
border-color: #667eea;
|
230 |
+
box-shadow: 0 0 15px rgba(102, 126, 234, 0.3);
|
231 |
+
}
|
232 |
+
|
233 |
+
.gr-dropdown {
|
234 |
+
border-radius: 15px;
|
235 |
+
border: 2px solid #e0e0e0;
|
236 |
+
}
|
237 |
+
|
238 |
+
.gr-panel {
|
239 |
+
background: rgba(255,255,255,0.95);
|
240 |
+
border-radius: 20px;
|
241 |
+
backdrop-filter: blur(10px);
|
242 |
+
box-shadow: 0 8px 32px rgba(0,0,0,0.1);
|
243 |
+
}
|
244 |
+
|
245 |
+
.gr-form {
|
246 |
+
background: transparent;
|
247 |
+
}
|
248 |
+
|
249 |
+
.gr-box {
|
250 |
+
border-radius: 15px;
|
251 |
+
background: rgba(255,255,255,0.9);
|
252 |
+
}
|
253 |
+
"""
|
254 |
+
|
255 |
+
# Create the interface
|
256 |
+
with gr.Blocks(css=custom_css, title="🌍 KS Translation Hub") as demo:
|
257 |
+
gr.HTML("""
|
258 |
+
<div style="text-align: center; padding: 20px; background: linear-gradient(45deg, #FF6B6B, #4ECDC4); border-radius: 20px; margin-bottom: 20px;">
|
259 |
+
<h1 style="color: white; font-size: 3em; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
|
260 |
+
🌍 KS Translation Hub
|
261 |
+
</h1>
|
262 |
+
<p style="color: white; font-size: 1.2em; margin: 10px 0 0 0; opacity: 0.9;">
|
263 |
+
Powered by Advanced Neural Networks • Real-time Language Detection • 15+ Languages
|
264 |
+
</p>
|
265 |
+
</div>
|
266 |
+
""")
|
267 |
+
|
268 |
+
with gr.Row():
|
269 |
+
with gr.Column(scale=1):
|
270 |
+
gr.HTML("<h3 style='text-align: center; color: #333;'>📝 Input</h3>")
|
271 |
+
input_text = gr.Textbox(
|
272 |
+
label="Enter text to translate",
|
273 |
+
placeholder="Type or paste your text here... 🖊️",
|
274 |
+
lines=8,
|
275 |
+
show_label=False
|
276 |
+
)
|
277 |
+
|
278 |
+
with gr.Row():
|
279 |
+
target_lang = gr.Dropdown(
|
280 |
+
choices=[f"{flag} {lang}" for lang, flag in LANGUAGES.items()],
|
281 |
+
label="🎯 Target Language",
|
282 |
+
value="🇪🇸 Spanish",
|
283 |
+
show_label=True
|
284 |
+
)
|
285 |
+
|
286 |
+
with gr.Column(scale=1):
|
287 |
+
gr.HTML("<h3 style='text-align: center; color: #333;'>✨ Output</h3>")
|
288 |
+
output_text = gr.Textbox(
|
289 |
+
label="Translation",
|
290 |
+
lines=8,
|
291 |
+
show_label=False,
|
292 |
+
interactive=False
|
293 |
+
)
|
294 |
+
|
295 |
+
with gr.Row():
|
296 |
+
with gr.Column(scale=1):
|
297 |
+
status_text = gr.Textbox(
|
298 |
+
label="🔄 Status",
|
299 |
+
value="Ready to translate...",
|
300 |
+
interactive=False,
|
301 |
+
show_label=True
|
302 |
+
)
|
303 |
+
|
304 |
+
with gr.Column(scale=1):
|
305 |
+
analytics_text = gr.Textbox(
|
306 |
+
label="📊 Analytics",
|
307 |
+
value="Translation analytics will appear here...",
|
308 |
+
interactive=False,
|
309 |
+
show_label=True,
|
310 |
+
lines=6
|
311 |
+
)
|
312 |
+
|
313 |
+
with gr.Row():
|
314 |
+
translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
|
315 |
+
clear_btn = gr.Button("🗑️ Clear All", variant="secondary", size="lg")
|
316 |
+
|
317 |
+
# Event handlers
|
318 |
+
translate_btn.click(
|
319 |
+
fn=translate_with_analytics,
|
320 |
+
inputs=[input_text, target_lang],
|
321 |
+
outputs=[status_text, output_text, analytics_text]
|
322 |
+
)
|
323 |
+
|
324 |
+
clear_btn.click(
|
325 |
+
fn=clear_all,
|
326 |
+
outputs=[input_text, output_text, status_text, analytics_text]
|
327 |
+
)
|
328 |
+
|
329 |
+
# Auto-translate on Enter key
|
330 |
+
input_text.submit(
|
331 |
+
fn=translate_with_analytics,
|
332 |
+
inputs=[input_text, target_lang],
|
333 |
+
outputs=[status_text, output_text, analytics_text]
|
334 |
+
)
|
335 |
+
|
336 |
+
gr.HTML("""
|
337 |
+
<div style="text-align: center; padding: 20px; margin-top: 20px; background: rgba(255,255,255,0.8); border-radius: 15px;">
|
338 |
+
<p style="color: #666; font-size: 0.9em;">
|
339 |
+
🤖 Powered by Transformers • 🔒 Privacy-First • ⚡ Real-time Processing
|
340 |
+
</p>
|
341 |
+
</div>
|
342 |
+
""")
|
343 |
+
|
344 |
+
# Launch with enhanced settings
|
345 |
+
if __name__ == "__main__":
|
346 |
+
demo.launch(
|
347 |
+
share=True,
|
348 |
+
server_name="127.0.0.1",
|
349 |
+
server_port=7860,
|
350 |
+
#show_tips=True,
|
351 |
+
#enable_queue=True,
|
352 |
+
max_threads=40
|
353 |
+
)
|