Karan0310 commited on
Commit
5b2e524
·
verified ·
1 Parent(s): 2d24792

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +353 -0
app.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ import json
4
+ import time
5
+ import random
6
+ from transformers import pipeline
7
+ import pycountry
8
+ from datetime import datetime
9
+ from pydantic import BaseModel, PydanticUserError, ConfigDict
10
+ from pydantic import BaseModel, ConfigDict
11
+ import json
12
+
13
+ class MyModel(BaseModel):
14
+ request: 'starlette.requests.Request'
15
+ model_config = ConfigDict(arbitrary_types_allowed=True)
16
+ from pydantic_core import core_schema
17
+ from starlette.requests import Request
18
+
19
+ def get_pydantic_core_schema(request_type, handler):
20
+ return core_schema.any_schema()
21
+
22
+ Request.__get_pydantic_core_schema__ = get_pydantic_core_schema
23
+ # Initialize pipelines with error handling
24
+ try:
25
+ lang_detector = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
26
+ text_translator = pipeline("translation", model="facebook/nllb-200-distilled-600M", torch_dtype=torch.bfloat16)
27
+ print("🚀 AI Translation Hub initialized successfully!")
28
+ except Exception as e:
29
+ print(f"⚠️ Error initializing models: {e}")
30
+
31
+ # Extended language support with emojis
32
+ LANGUAGES = {
33
+ 'English': '🇺🇸',
34
+ 'Spanish': '🇪🇸',
35
+ 'French': '🇫🇷',
36
+ 'German': '🇩🇪',
37
+ 'Italian': '🇮🇹',
38
+ 'Portuguese': '🇵🇹',
39
+ 'Russian': '🇷🇺',
40
+ 'Chinese (Simplified)': '🇨🇳',
41
+ 'Japanese': '🇯🇵',
42
+ 'Korean': '🇰🇷',
43
+ 'Arabic': '🇸🇦',
44
+ 'Hindi': '🇮🇳',
45
+ 'Dutch': '🇳🇱',
46
+ 'Swedish': '🇸🇪',
47
+ 'Norwegian': '🇳🇴'
48
+ }
49
+
50
+ # Load language data with fallback
51
+ try:
52
+ with open('.venv/language.json', 'r') as file:
53
+ language_data = json.load(file)
54
+ except FileNotFoundError:
55
+ print("⚠️ Language data file not found. Using basic mapping.")
56
+ language_data = {'languages': []}
57
+
58
+ # Translation statistics
59
+ translation_stats = {
60
+ 'total_translations': 0,
61
+ 'languages_detected': set(),
62
+ 'session_start': datetime.now()
63
+ }
64
+
65
+ def get_FLORES_code_from_language(language):
66
+ """Enhanced FLORES code lookup with fallback mapping"""
67
+ # Remove emoji and extract clean language name
68
+ import re
69
+ # Remove emoji flags and extra spaces
70
+ clean_language = re.sub(r'[🇦-🇿]{2}\s*', '', language).strip()
71
+
72
+ for entry in language_data.get('languages', []):
73
+ if entry['Language'].lower() == clean_language.lower():
74
+ return entry['FLORES-200 code']
75
+
76
+ # Fallback mapping for common languages
77
+ fallback_mapping = {
78
+ 'english': 'eng_Latn',
79
+ 'spanish': 'spa_Latn',
80
+ 'french': 'fra_Latn',
81
+ 'german': 'deu_Latn',
82
+ 'chinese (simplified)': 'zho_Hans',
83
+ 'italian': 'ita_Latn',
84
+ 'portuguese': 'por_Latn',
85
+ 'russian': 'rus_Cyrl',
86
+ 'japanese': 'jpn_Jpan',
87
+ 'korean': 'kor_Hang',
88
+ 'arabic': 'arb_Arab',
89
+ 'hindi': 'hin_Deva',
90
+ 'dutch': 'nld_Latn',
91
+ 'swedish': 'swe_Latn',
92
+ 'norwegian': 'nor_Latn'
93
+ }
94
+
95
+ return fallback_mapping.get(clean_language.lower())
96
+
97
+ def detect_language_confidence(text):
98
+ """Get language detection with confidence score"""
99
+ if not text.strip():
100
+ return "Unknown", 0.0
101
+
102
+ try:
103
+ result = lang_detector(text)[0]
104
+ return result['label'], result['score']
105
+ except:
106
+ return "Unknown", 0.0
107
+
108
+ def translate_with_analytics(text, destination_language, show_confidence=True):
109
+ """Enhanced translation with analytics and progress tracking"""
110
+
111
+ if not text.strip():
112
+ return "⚠️ Please enter some text to translate", "", ""
113
+
114
+ # Update statistics
115
+ translation_stats['total_translations'] += 1
116
+
117
+ # Simulate processing for dramatic effect
118
+ yield "🔍 Analyzing text...", "", ""
119
+ time.sleep(0.5)
120
+
121
+ # Detect source language with confidence
122
+ detected_lang, confidence = detect_language_confidence(text)
123
+ translation_stats['languages_detected'].add(detected_lang)
124
+
125
+ yield f"🧠 Detected language: {detected_lang.upper()} ({confidence:.1%} confidence)", "", ""
126
+ time.sleep(0.3)
127
+
128
+ # Get language codes
129
+ try:
130
+ lang = pycountry.languages.get(alpha_2=detected_lang)
131
+ src_code = f"{lang.alpha_3}_Latn" if lang else "eng_Latn"
132
+ except:
133
+ src_code = "eng_Latn"
134
+
135
+ dest_code = get_FLORES_code_from_language(destination_language)
136
+ if not dest_code:
137
+ yield f"❌ Unsupported target language: {destination_language}", "", ""
138
+ return
139
+
140
+ yield f"⚡ Translating to {destination_language.split(' ', 1)[-1] if ' ' in destination_language else destination_language}...", "", ""
141
+ time.sleep(0.5)
142
+
143
+ # Handle same language
144
+ if src_code == dest_code:
145
+ analytics = f"""
146
+ 📊 **Translation Analytics**
147
+ - Source: {detected_lang.upper()} ({confidence:.1%} confidence)
148
+ - Target: Same language detected
149
+ - Action: No translation needed
150
+ - Processing time: <1s
151
+ """
152
+ yield "✅ Translation complete!", text, analytics.strip()
153
+ return
154
+
155
+ # Perform translation
156
+ try:
157
+ start_time = time.time()
158
+
159
+ # Calculate appropriate max_length based on input length
160
+ input_length = len(text)
161
+ # Set max_length to be 1.5x input length with a minimum of 512 and maximum of 2048
162
+ max_length = max(512, min(2048, int(input_length * 1.5)))
163
+
164
+ translation = text_translator(
165
+ text,
166
+ src_lang=src_code,
167
+ tgt_lang=dest_code,
168
+ max_length=max_length,
169
+ do_sample=False, # For more consistent results
170
+ num_beams=4 # Better quality translation
171
+ )
172
+ processing_time = time.time() - start_time
173
+
174
+ result = translation[0]['translation_text']
175
+
176
+ # Generate analytics
177
+ import re
178
+ clean_dest_lang = re.sub(r'[🇦-🇿]{2}\s*', '', destination_language).strip()
179
+
180
+ analytics = f"""
181
+ 📊 **Translation Analytics**
182
+ - **Source Language**: {detected_lang.upper()} ({confidence:.1%} confidence)
183
+ - **Target Language**: {clean_dest_lang}
184
+ - **Characters Processed**: {len(text):,}
185
+ - **Max Length Used**: {max_length}
186
+ - **Processing Time**: {processing_time:.2f}s
187
+ - **Session Translations**: {translation_stats['total_translations']}
188
+ - **Languages Detected**: {len(translation_stats['languages_detected'])}
189
+ """
190
+
191
+ yield "✅ Translation complete!", result, analytics.strip()
192
+
193
+ except Exception as e:
194
+ yield f"❌ Translation failed: {str(e)}", "", ""
195
+
196
+ def clear_all():
197
+ """Reset all fields"""
198
+ return "", "", "", ""
199
+
200
+ # Custom CSS for a modern, sleek interface
201
+ custom_css = """
202
+ .gradio-container {
203
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
204
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
205
+ }
206
+
207
+ .gr-button {
208
+ background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
209
+ border: none;
210
+ border-radius: 25px;
211
+ color: white;
212
+ font-weight: bold;
213
+ transition: all 0.3s ease;
214
+ box-shadow: 0 4px 15px rgba(0,0,0,0.2);
215
+ }
216
+
217
+ .gr-button:hover {
218
+ transform: translateY(-2px);
219
+ box-shadow: 0 6px 20px rgba(0,0,0,0.3);
220
+ }
221
+
222
+ .gr-textbox {
223
+ border-radius: 15px;
224
+ border: 2px solid #e0e0e0;
225
+ transition: all 0.3s ease;
226
+ }
227
+
228
+ .gr-textbox:focus {
229
+ border-color: #667eea;
230
+ box-shadow: 0 0 15px rgba(102, 126, 234, 0.3);
231
+ }
232
+
233
+ .gr-dropdown {
234
+ border-radius: 15px;
235
+ border: 2px solid #e0e0e0;
236
+ }
237
+
238
+ .gr-panel {
239
+ background: rgba(255,255,255,0.95);
240
+ border-radius: 20px;
241
+ backdrop-filter: blur(10px);
242
+ box-shadow: 0 8px 32px rgba(0,0,0,0.1);
243
+ }
244
+
245
+ .gr-form {
246
+ background: transparent;
247
+ }
248
+
249
+ .gr-box {
250
+ border-radius: 15px;
251
+ background: rgba(255,255,255,0.9);
252
+ }
253
+ """
254
+
255
+ # Create the interface
256
+ with gr.Blocks(css=custom_css, title="🌍 KS Translation Hub") as demo:
257
+ gr.HTML("""
258
+ <div style="text-align: center; padding: 20px; background: linear-gradient(45deg, #FF6B6B, #4ECDC4); border-radius: 20px; margin-bottom: 20px;">
259
+ <h1 style="color: white; font-size: 3em; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
260
+ 🌍 KS Translation Hub
261
+ </h1>
262
+ <p style="color: white; font-size: 1.2em; margin: 10px 0 0 0; opacity: 0.9;">
263
+ Powered by Advanced Neural Networks • Real-time Language Detection • 15+ Languages
264
+ </p>
265
+ </div>
266
+ """)
267
+
268
+ with gr.Row():
269
+ with gr.Column(scale=1):
270
+ gr.HTML("<h3 style='text-align: center; color: #333;'>📝 Input</h3>")
271
+ input_text = gr.Textbox(
272
+ label="Enter text to translate",
273
+ placeholder="Type or paste your text here... 🖊️",
274
+ lines=8,
275
+ show_label=False
276
+ )
277
+
278
+ with gr.Row():
279
+ target_lang = gr.Dropdown(
280
+ choices=[f"{flag} {lang}" for lang, flag in LANGUAGES.items()],
281
+ label="🎯 Target Language",
282
+ value="🇪🇸 Spanish",
283
+ show_label=True
284
+ )
285
+
286
+ with gr.Column(scale=1):
287
+ gr.HTML("<h3 style='text-align: center; color: #333;'>✨ Output</h3>")
288
+ output_text = gr.Textbox(
289
+ label="Translation",
290
+ lines=8,
291
+ show_label=False,
292
+ interactive=False
293
+ )
294
+
295
+ with gr.Row():
296
+ with gr.Column(scale=1):
297
+ status_text = gr.Textbox(
298
+ label="🔄 Status",
299
+ value="Ready to translate...",
300
+ interactive=False,
301
+ show_label=True
302
+ )
303
+
304
+ with gr.Column(scale=1):
305
+ analytics_text = gr.Textbox(
306
+ label="📊 Analytics",
307
+ value="Translation analytics will appear here...",
308
+ interactive=False,
309
+ show_label=True,
310
+ lines=6
311
+ )
312
+
313
+ with gr.Row():
314
+ translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
315
+ clear_btn = gr.Button("🗑️ Clear All", variant="secondary", size="lg")
316
+
317
+ # Event handlers
318
+ translate_btn.click(
319
+ fn=translate_with_analytics,
320
+ inputs=[input_text, target_lang],
321
+ outputs=[status_text, output_text, analytics_text]
322
+ )
323
+
324
+ clear_btn.click(
325
+ fn=clear_all,
326
+ outputs=[input_text, output_text, status_text, analytics_text]
327
+ )
328
+
329
+ # Auto-translate on Enter key
330
+ input_text.submit(
331
+ fn=translate_with_analytics,
332
+ inputs=[input_text, target_lang],
333
+ outputs=[status_text, output_text, analytics_text]
334
+ )
335
+
336
+ gr.HTML("""
337
+ <div style="text-align: center; padding: 20px; margin-top: 20px; background: rgba(255,255,255,0.8); border-radius: 15px;">
338
+ <p style="color: #666; font-size: 0.9em;">
339
+ 🤖 Powered by Transformers • 🔒 Privacy-First • ⚡ Real-time Processing
340
+ </p>
341
+ </div>
342
+ """)
343
+
344
+ # Launch with enhanced settings
345
+ if __name__ == "__main__":
346
+ demo.launch(
347
+ share=True,
348
+ server_name="127.0.0.1",
349
+ server_port=7860,
350
+ #show_tips=True,
351
+ #enable_queue=True,
352
+ max_threads=40
353
+ )