Spaces:

UnarineLeo
/

Lexicon_Translator

Sleeping

App Files Files Community

UnarineLeo commited on Jul 1

Commit

ee94686

verified ·

1 Parent(s): d16dd2f

Create app.py

Browse files

Files changed (1) hide show

app.py +273 -0

app.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import time
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+def load_model():
+    """Load the model and tokenizer"""
+    global model, tokenizer
+    try:
+        model_name = "UnarineLeo/nllb_eng_ven_terms"
+        print(f"Loading model: {model_name}")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+        print("Model loaded successfully!")
+        return True
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return False
+def translate_text(text, max_length=512, num_beams=5):
+    """
+    Translate English text to Venda
+    Args:
+        text (str): Input English text
+        max_length (int): Maximum length of translation
+        num_beams (int): Number of beams for beam search
+    Returns:
+        tuple: (translated_text, status_message)
+    """
+    global model, tokenizer
+    if not text.strip():
+        return "", "Please enter some text to translate."
+    if model is None or tokenizer is None:
+        return "", "Model not loaded. Please wait while the model loads."
+    try:
+        # Set source language
+        tokenizer.src_lang = "eng_Latn"
+        # Tokenize input
+        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+        # Generate translation
+        start_time = time.time()
+        with torch.no_grad():
+            generated_tokens = model.generate(
+                **inputs,
+                forced_bos_token_id=tokenizer.lang_code_to_id["ven_Latn"],
+                max_length=max_length,
+                num_beams=num_beams,
+                early_stopping=True,
+                do_sample=False
+            )
+        # Decode translation
+        translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+        end_time = time.time()
+        processing_time = round(end_time - start_time, 2)
+        status = f"✅ Translation completed in {processing_time} seconds"
+        return translation, status
+    except Exception as e:
+        error_msg = f"❌ Translation error: {str(e)}"
+        return "", error_msg
+def translate_batch(text_list):
+    """
+    Translate multiple lines of text
+    Args:
+        text_list (str): Multi-line text input
+    Returns:
+        tuple: (translated_text, status_message)
+    """
+    if not text_list.strip():
+        return "", "Please enter some text to translate."
+    lines = [line.strip() for line in text_list.split('\n') if line.strip()]
+    if not lines:
+        return "", "No valid text lines found."
+    try:
+        translations = []
+        total_time = 0
+        for i, line in enumerate(lines):
+            translation, status = translate_text(line)
+            if translation:
+                translations.append(f"{i+1}. EN: {line}")
+                translations.append(f"   VE: {translation}")
+                translations.append("")
+        if translations:
+            result = "\n".join(translations)
+            status_msg = f"✅ Successfully translated {len(lines)} lines"
+            return result, status_msg
+        else:
+            return "", "❌ No translations generated"
+    except Exception as e:
+        return "", f"❌ Batch translation error: {str(e)}"
+# Load model on startup
+print("Initializing model...")
+model_loaded = load_model()
+# Create Gradio interface
+with gr.Blocks(title="English to Venda Translator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🌍 English to Venda Translator
+    This app translates English text to Venda (Tshivenda) using the NLLB model.
+    Venda is a Bantu language spoken primarily in South Africa and Zimbabwe.
+    **Model:** `UnarineLeo/nllb_eng_ven_terms`
+    """)
+    with gr.Tab("Single Translation"):
+        with gr.Row():
+            with gr.Column():
+                input_text = gr.Textbox(
+                    label="English Text",
+                    placeholder="Enter English text to translate...",
+                    lines=4,
+                    max_lines=10
+                )
+                with gr.Row():
+                    max_length_slider = gr.Slider(
+                        minimum=50,
+                        maximum=1000,
+                        value=512,
+                        step=50,
+                        label="Max Translation Length"
+                    )
+                    num_beams_slider = gr.Slider(
+                        minimum=1,
+                        maximum=10,
+                        value=5,
+                        step=1,
+                        label="Number of Beams (Quality vs Speed)"
+                    )
+                translate_btn = gr.Button("🔄 Translate", variant="primary")
+            with gr.Column():
+                output_text = gr.Textbox(
+                    label="Venda Translation",
+                    lines=4,
+                    max_lines=10,
+                    interactive=False
+                )
+                status_text = gr.Textbox(
+                    label="Status",
+                    interactive=False,
+                    lines=1
+                )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["Hello, how are you?"],
+                ["Good morning, everyone."],
+                ["Thank you for your help."],
+                ["What is your name?"],
+                ["I am learning Venda."],
+                ["Welcome to our school."],
+                ["The weather is beautiful today."],
+                ["Can you help me please?"]
+            ],
+            inputs=[input_text],
+            label="Try these examples:"
+        )
+    with gr.Tab("Batch Translation"):
+        with gr.Row():
+            with gr.Column():
+                batch_input = gr.Textbox(
+                    label="Multiple English Sentences",
+                    placeholder="Enter multiple English sentences, one per line...",
+                    lines=8,
+                    max_lines=15
+                )
+                batch_translate_btn = gr.Button("🔄 Translate All", variant="primary")
+            with gr.Column():
+                batch_output = gr.Textbox(
+                    label="Batch Translations",
+                    lines=8,
+                    max_lines=15,
+                    interactive=False
+                )
+                batch_status = gr.Textbox(
+                    label="Status",
+                    interactive=False,
+                    lines=1
+                )
+    with gr.Tab("About"):
+        gr.Markdown("""
+        ## About This Translator
+        This application uses a fine-tuned NLLB (No Language Left Behind) model specifically trained for English to Venda translation.
+        ### Features:
+        - **Single Translation**: Translate individual sentences or paragraphs
+        - **Batch Translation**: Translate multiple sentences at once
+        - **Adjustable Parameters**: Control translation quality and length
+        - **Examples**: Try pre-loaded example sentences
+        ### About Venda (Tshivenda):
+        - Spoken by approximately 1.2 million people
+        - Official language of South Africa
+        - Also spoken in Zimbabwe
+        - Part of the Bantu language family
+        ### Usage Tips:
+        - Keep sentences reasonably short for best results
+        - The model works best with common, everyday language
+        - Higher beam numbers generally produce better quality but slower translations
+        ### Technical Details:
+        - **Model**: UnarineLeo/nllb_eng_ven_terms
+        - **Architecture**: NLLB (No Language Left Behind)
+        - **Language Codes**: eng_Latn → ven_Latn
+        """)
+    # Event handlers
+    translate_btn.click(
+        fn=translate_text,
+        inputs=[input_text, max_length_slider, num_beams_slider],
+        outputs=[output_text, status_text]
+    )
+    batch_translate_btn.click(
+        fn=translate_batch,
+        inputs=[batch_input],
+        outputs=[batch_output, batch_status]
+    )
+    # Auto-translate on example selection
+    input_text.submit(
+        fn=translate_text,
+        inputs=[input_text, max_length_slider, num_beams_slider],
+        outputs=[output_text, status_text]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        share=True,
+        debug=True,
+        show_error=True
+    )