UnarineLeo commited on
Commit
ee94686
Β·
verified Β·
1 Parent(s): d16dd2f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +273 -0
app.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ import time
5
+
6
+ # Global variables for model and tokenizer
7
+ model = None
8
+ tokenizer = None
9
+
10
+ def load_model():
11
+ """Load the model and tokenizer"""
12
+ global model, tokenizer
13
+
14
+ try:
15
+ model_name = "UnarineLeo/nllb_eng_ven_terms"
16
+ print(f"Loading model: {model_name}")
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
20
+
21
+ print("Model loaded successfully!")
22
+ return True
23
+ except Exception as e:
24
+ print(f"Error loading model: {e}")
25
+ return False
26
+
27
+ def translate_text(text, max_length=512, num_beams=5):
28
+ """
29
+ Translate English text to Venda
30
+
31
+ Args:
32
+ text (str): Input English text
33
+ max_length (int): Maximum length of translation
34
+ num_beams (int): Number of beams for beam search
35
+
36
+ Returns:
37
+ tuple: (translated_text, status_message)
38
+ """
39
+ global model, tokenizer
40
+
41
+ if not text.strip():
42
+ return "", "Please enter some text to translate."
43
+
44
+ if model is None or tokenizer is None:
45
+ return "", "Model not loaded. Please wait while the model loads."
46
+
47
+ try:
48
+ # Set source language
49
+ tokenizer.src_lang = "eng_Latn"
50
+
51
+ # Tokenize input
52
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
53
+
54
+ # Generate translation
55
+ start_time = time.time()
56
+ with torch.no_grad():
57
+ generated_tokens = model.generate(
58
+ **inputs,
59
+ forced_bos_token_id=tokenizer.lang_code_to_id["ven_Latn"],
60
+ max_length=max_length,
61
+ num_beams=num_beams,
62
+ early_stopping=True,
63
+ do_sample=False
64
+ )
65
+
66
+ # Decode translation
67
+ translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
68
+
69
+ end_time = time.time()
70
+ processing_time = round(end_time - start_time, 2)
71
+
72
+ status = f"βœ… Translation completed in {processing_time} seconds"
73
+
74
+ return translation, status
75
+
76
+ except Exception as e:
77
+ error_msg = f"❌ Translation error: {str(e)}"
78
+ return "", error_msg
79
+
80
+ def translate_batch(text_list):
81
+ """
82
+ Translate multiple lines of text
83
+
84
+ Args:
85
+ text_list (str): Multi-line text input
86
+
87
+ Returns:
88
+ tuple: (translated_text, status_message)
89
+ """
90
+ if not text_list.strip():
91
+ return "", "Please enter some text to translate."
92
+
93
+ lines = [line.strip() for line in text_list.split('\n') if line.strip()]
94
+
95
+ if not lines:
96
+ return "", "No valid text lines found."
97
+
98
+ try:
99
+ translations = []
100
+ total_time = 0
101
+
102
+ for i, line in enumerate(lines):
103
+ translation, status = translate_text(line)
104
+ if translation:
105
+ translations.append(f"{i+1}. EN: {line}")
106
+ translations.append(f" VE: {translation}")
107
+ translations.append("")
108
+
109
+ if translations:
110
+ result = "\n".join(translations)
111
+ status_msg = f"βœ… Successfully translated {len(lines)} lines"
112
+ return result, status_msg
113
+ else:
114
+ return "", "❌ No translations generated"
115
+
116
+ except Exception as e:
117
+ return "", f"❌ Batch translation error: {str(e)}"
118
+
119
+ # Load model on startup
120
+ print("Initializing model...")
121
+ model_loaded = load_model()
122
+
123
+ # Create Gradio interface
124
+ with gr.Blocks(title="English to Venda Translator", theme=gr.themes.Soft()) as demo:
125
+
126
+ gr.Markdown("""
127
+ # 🌍 English to Venda Translator
128
+
129
+ This app translates English text to Venda (Tshivenda) using the NLLB model.
130
+ Venda is a Bantu language spoken primarily in South Africa and Zimbabwe.
131
+
132
+ **Model:** `UnarineLeo/nllb_eng_ven_terms`
133
+ """)
134
+
135
+ with gr.Tab("Single Translation"):
136
+ with gr.Row():
137
+ with gr.Column():
138
+ input_text = gr.Textbox(
139
+ label="English Text",
140
+ placeholder="Enter English text to translate...",
141
+ lines=4,
142
+ max_lines=10
143
+ )
144
+
145
+ with gr.Row():
146
+ max_length_slider = gr.Slider(
147
+ minimum=50,
148
+ maximum=1000,
149
+ value=512,
150
+ step=50,
151
+ label="Max Translation Length"
152
+ )
153
+
154
+ num_beams_slider = gr.Slider(
155
+ minimum=1,
156
+ maximum=10,
157
+ value=5,
158
+ step=1,
159
+ label="Number of Beams (Quality vs Speed)"
160
+ )
161
+
162
+ translate_btn = gr.Button("πŸ”„ Translate", variant="primary")
163
+
164
+ with gr.Column():
165
+ output_text = gr.Textbox(
166
+ label="Venda Translation",
167
+ lines=4,
168
+ max_lines=10,
169
+ interactive=False
170
+ )
171
+
172
+ status_text = gr.Textbox(
173
+ label="Status",
174
+ interactive=False,
175
+ lines=1
176
+ )
177
+
178
+ # Examples
179
+ gr.Examples(
180
+ examples=[
181
+ ["Hello, how are you?"],
182
+ ["Good morning, everyone."],
183
+ ["Thank you for your help."],
184
+ ["What is your name?"],
185
+ ["I am learning Venda."],
186
+ ["Welcome to our school."],
187
+ ["The weather is beautiful today."],
188
+ ["Can you help me please?"]
189
+ ],
190
+ inputs=[input_text],
191
+ label="Try these examples:"
192
+ )
193
+
194
+ with gr.Tab("Batch Translation"):
195
+ with gr.Row():
196
+ with gr.Column():
197
+ batch_input = gr.Textbox(
198
+ label="Multiple English Sentences",
199
+ placeholder="Enter multiple English sentences, one per line...",
200
+ lines=8,
201
+ max_lines=15
202
+ )
203
+ batch_translate_btn = gr.Button("πŸ”„ Translate All", variant="primary")
204
+
205
+ with gr.Column():
206
+ batch_output = gr.Textbox(
207
+ label="Batch Translations",
208
+ lines=8,
209
+ max_lines=15,
210
+ interactive=False
211
+ )
212
+ batch_status = gr.Textbox(
213
+ label="Status",
214
+ interactive=False,
215
+ lines=1
216
+ )
217
+
218
+ with gr.Tab("About"):
219
+ gr.Markdown("""
220
+ ## About This Translator
221
+
222
+ This application uses a fine-tuned NLLB (No Language Left Behind) model specifically trained for English to Venda translation.
223
+
224
+ ### Features:
225
+ - **Single Translation**: Translate individual sentences or paragraphs
226
+ - **Batch Translation**: Translate multiple sentences at once
227
+ - **Adjustable Parameters**: Control translation quality and length
228
+ - **Examples**: Try pre-loaded example sentences
229
+
230
+ ### About Venda (Tshivenda):
231
+ - Spoken by approximately 1.2 million people
232
+ - Official language of South Africa
233
+ - Also spoken in Zimbabwe
234
+ - Part of the Bantu language family
235
+
236
+ ### Usage Tips:
237
+ - Keep sentences reasonably short for best results
238
+ - The model works best with common, everyday language
239
+ - Higher beam numbers generally produce better quality but slower translations
240
+
241
+ ### Technical Details:
242
+ - **Model**: UnarineLeo/nllb_eng_ven_terms
243
+ - **Architecture**: NLLB (No Language Left Behind)
244
+ - **Language Codes**: eng_Latn β†’ ven_Latn
245
+ """)
246
+
247
+ # Event handlers
248
+ translate_btn.click(
249
+ fn=translate_text,
250
+ inputs=[input_text, max_length_slider, num_beams_slider],
251
+ outputs=[output_text, status_text]
252
+ )
253
+
254
+ batch_translate_btn.click(
255
+ fn=translate_batch,
256
+ inputs=[batch_input],
257
+ outputs=[batch_output, batch_status]
258
+ )
259
+
260
+ # Auto-translate on example selection
261
+ input_text.submit(
262
+ fn=translate_text,
263
+ inputs=[input_text, max_length_slider, num_beams_slider],
264
+ outputs=[output_text, status_text]
265
+ )
266
+
267
+ # Launch the app
268
+ if __name__ == "__main__":
269
+ demo.launch(
270
+ share=True,
271
+ debug=True,
272
+ show_error=True
273
+ )