import gradio as gr import json from langdetect import detect from transformers import pipeline import warnings warnings.filterwarnings("ignore") # Load language to model mapping def load_language_model_map(): """Load mapping between languages and translation models""" return { 'ar': 'Helsinki-NLP/opus-mt-ar-en', # Arabic to English 'fr': 'Helsinki-NLP/opus-mt-fr-en', # French to English 'de': 'Helsinki-NLP/opus-mt-de-en', # German to English 'es': 'Helsinki-NLP/opus-mt-es-en', # Spanish to English 'it': 'Helsinki-NLP/opus-mt-it-en', # Italian to English } # Language code to full name mapping LANGUAGE_NAMES = { 'en': 'English', 'ar': 'Arabic', 'fr': 'French', 'de': 'German', 'es': 'Spanish', 'it': 'Italian', } # Initialize translation pipelines def get_translation_pipelines(): """Initialize translation pipelines for different target languages from JSON""" try: with open('lang_model_map.json', 'r', encoding='utf-8') as f: data = json.load(f) # Extract output language mappings output_langs = data['language_to_model_mapping']['output_languages'] pipelines = {} for lang_name, lang_info in output_langs.items(): # Only load the main target languages to avoid memory issues if lang_name in ['Hebrew', 'Arabic', 'Spanish', 'French']: pipelines[lang_name] = pipeline("translation", model=lang_info['model']) return pipelines except FileNotFoundError: # Fallback to hardcoded pipelines if JSON file not found print("Warning: lang_model_map.json not found. Using fallback pipelines.") return { 'Hebrew': pipeline("translation", model="Helsinki-NLP/opus-mt-en-he"), 'Arabic': pipeline("translation", model="Helsinki-NLP/opus-mt-en-ar"), 'Spanish': pipeline("translation", model="Helsinki-NLP/opus-mt-en-es"), 'French': pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr") } # Global variables for caching pipelines language_model_map = load_language_model_map() target_pipelines = get_translation_pipelines() def detect_language(text): """Detect the language of input text""" try: detected_lang = detect(text) return detected_lang, LANGUAGE_NAMES.get(detected_lang, detected_lang) except: return 'unknown', 'Unknown' def translate_to_english(text, source_lang): """Translate text from source language to English""" if source_lang == 'en': return text if source_lang in language_model_map: try: model_name = language_model_map[source_lang] translator = pipeline("translation", model=model_name) result = translator(text, max_length=512) return result[0]['translation_text'] except Exception as e: return f"Translation error: {str(e)}" else: return "Translation model not available for this language" def translate_from_english(text, target_languages): """Translate English text to target languages""" translations = {} for lang_name in target_languages: if lang_name in target_pipelines: try: result = target_pipelines[lang_name](text, max_length=512) translations[lang_name] = result[0]['translation_text'] except Exception as e: translations[lang_name] = f"Error: {str(e)}" else: translations[lang_name] = "Model not available" return translations def smart_translate(input_text, target_lang1, target_lang2, target_lang3): """Main translation function""" if not input_text.strip(): return "Please enter text to translate", "", "", "", "", "" # Detect source language source_lang_code, source_lang_name = detect_language(input_text) # Translate to English first if not already English english_text = translate_to_english(input_text, source_lang_code) # Get target languages list target_languages = [] if target_lang1: target_languages.append(target_lang1) if target_lang2: target_languages.append(target_lang2) if target_lang3: target_languages.append(target_lang3) # Translate to target languages translations = translate_from_english(english_text, target_languages) # Format results result_text = f"**Original Text:** {input_text}\n\n" result_text += f"**Detected Language:** {source_lang_name} ({source_lang_code})\n\n" if source_lang_code != 'en': result_text += f"**English Translation:** {english_text}\n\n" result_text += "**Translations:**\n" for lang, translation in translations.items(): result_text += f"• **{lang}:** {translation}\n" # Return individual translations for display trans1 = translations.get(target_lang1, "") if target_lang1 else "" trans2 = translations.get(target_lang2, "") if target_lang2 else "" trans3 = translations.get(target_lang3, "") if target_lang3 else "" return result_text, source_lang_name, english_text, trans1, trans2, trans3 # Create and launch the Gradio interface target_options = list(target_pipelines.keys()) with gr.Blocks(title="Smart Multilingual Translator", theme=gr.themes.Soft()) as interface: gr.Markdown(""" # Smart Multilingual Translator ### Powered by Hugging Face Transformers This application automatically detects the language of your input text and translates it to your selected target languages. """) with gr.Row(): with gr.Column(scale=2): input_text = gr.Textbox( label="Input Text", placeholder="Enter text in any language...", lines=5 ) with gr.Row(): target_lang1 = gr.Dropdown( choices=target_options, label="Target Language 1", value="Hebrew" ) target_lang2 = gr.Dropdown( choices=target_options, label="Target Language 2", value="Arabic" ) target_lang3 = gr.Dropdown( choices=target_options, label="Target Language 3", value="Spanish" ) translate_btn = gr.Button("🔄 Translate", variant="primary", size="lg") with gr.Column(scale=3): result_display = gr.Markdown(label="Translation Results") with gr.Row(): with gr.Column(): detected_lang = gr.Textbox(label="Detected Language", interactive=False) with gr.Column(): english_trans = gr.Textbox(label="English Translation", interactive=False) with gr.Row(): trans1_output = gr.Textbox(label="Translation 1", interactive=False) trans2_output = gr.Textbox(label="Translation 2", interactive=False) trans3_output = gr.Textbox(label="Translation 3", interactive=False) # Event handlers translate_btn.click( fn=smart_translate, inputs=[input_text, target_lang1, target_lang2, target_lang3], outputs=[result_display, detected_lang, english_trans, trans1_output, trans2_output, trans3_output] ) gr.Markdown(""" --- ## Supported Languages ### Language Detection (Input) Arabic (ar) - العربية English (en) - English French (fr) - Français German (de) - Deutsch Italian (it) - Italiano Spanish (es) - Español ### Target Languages (Output) Hebrew (he) - עברית Arabic (ar) - العربية Spanish (es) - Español French (fr) - Français ### Models Used: - **Language Detection:** langdetect - **Translation Models:** Helsinki-NLP MarianMT models from Hugging Face - **Configuration:** Models loaded from lang_model_map.json """) interface.launch(share=True)