Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
from langdetect import detect | |
from transformers import pipeline | |
import warnings | |
warnings.filterwarnings("ignore") | |
# Load language to model mapping | |
def load_language_model_map(): | |
"""Load mapping between languages and translation models""" | |
return { | |
'ar': 'Helsinki-NLP/opus-mt-ar-en', # Arabic to English | |
'fr': 'Helsinki-NLP/opus-mt-fr-en', # French to English | |
'de': 'Helsinki-NLP/opus-mt-de-en', # German to English | |
'es': 'Helsinki-NLP/opus-mt-es-en', # Spanish to English | |
'it': 'Helsinki-NLP/opus-mt-it-en', # Italian to English | |
} | |
# Language code to full name mapping | |
LANGUAGE_NAMES = { | |
'en': 'English', | |
'ar': 'Arabic', | |
'fr': 'French', | |
'de': 'German', | |
'es': 'Spanish', | |
'it': 'Italian', | |
} | |
# Initialize translation pipelines | |
def get_translation_pipelines(): | |
"""Initialize translation pipelines for different target languages from JSON""" | |
try: | |
with open('lang_model_map.json', 'r', encoding='utf-8') as f: | |
data = json.load(f) | |
# Extract output language mappings | |
output_langs = data['language_to_model_mapping']['output_languages'] | |
pipelines = {} | |
for lang_name, lang_info in output_langs.items(): | |
# Only load the main target languages to avoid memory issues | |
if lang_name in ['Hebrew', 'Arabic', 'Spanish', 'French']: | |
pipelines[lang_name] = pipeline("translation", model=lang_info['model']) | |
return pipelines | |
except FileNotFoundError: | |
# Fallback to hardcoded pipelines if JSON file not found | |
print("Warning: lang_model_map.json not found. Using fallback pipelines.") | |
return { | |
'Hebrew': pipeline("translation", model="Helsinki-NLP/opus-mt-en-he"), | |
'Arabic': pipeline("translation", model="Helsinki-NLP/opus-mt-en-ar"), | |
'Spanish': pipeline("translation", model="Helsinki-NLP/opus-mt-en-es"), | |
'French': pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr") | |
} | |
# Global variables for caching pipelines | |
language_model_map = load_language_model_map() | |
target_pipelines = get_translation_pipelines() | |
def detect_language(text): | |
"""Detect the language of input text""" | |
try: | |
detected_lang = detect(text) | |
return detected_lang, LANGUAGE_NAMES.get(detected_lang, detected_lang) | |
except: | |
return 'unknown', 'Unknown' | |
def translate_to_english(text, source_lang): | |
"""Translate text from source language to English""" | |
if source_lang == 'en': | |
return text | |
if source_lang in language_model_map: | |
try: | |
model_name = language_model_map[source_lang] | |
translator = pipeline("translation", model=model_name) | |
result = translator(text, max_length=512) | |
return result[0]['translation_text'] | |
except Exception as e: | |
return f"Translation error: {str(e)}" | |
else: | |
return "Translation model not available for this language" | |
def translate_from_english(text, target_languages): | |
"""Translate English text to target languages""" | |
translations = {} | |
for lang_name in target_languages: | |
if lang_name in target_pipelines: | |
try: | |
result = target_pipelines[lang_name](text, max_length=512) | |
translations[lang_name] = result[0]['translation_text'] | |
except Exception as e: | |
translations[lang_name] = f"Error: {str(e)}" | |
else: | |
translations[lang_name] = "Model not available" | |
return translations | |
def smart_translate(input_text, target_lang1, target_lang2, target_lang3): | |
"""Main translation function""" | |
if not input_text.strip(): | |
return "Please enter text to translate", "", "", "", "", "" | |
# Detect source language | |
source_lang_code, source_lang_name = detect_language(input_text) | |
# Translate to English first if not already English | |
english_text = translate_to_english(input_text, source_lang_code) | |
# Get target languages list | |
target_languages = [] | |
if target_lang1: target_languages.append(target_lang1) | |
if target_lang2: target_languages.append(target_lang2) | |
if target_lang3: target_languages.append(target_lang3) | |
# Translate to target languages | |
translations = translate_from_english(english_text, target_languages) | |
# Format results | |
result_text = f"**Original Text:** {input_text}\n\n" | |
result_text += f"**Detected Language:** {source_lang_name} ({source_lang_code})\n\n" | |
if source_lang_code != 'en': | |
result_text += f"**English Translation:** {english_text}\n\n" | |
result_text += "**Translations:**\n" | |
for lang, translation in translations.items(): | |
result_text += f"• **{lang}:** {translation}\n" | |
# Return individual translations for display | |
trans1 = translations.get(target_lang1, "") if target_lang1 else "" | |
trans2 = translations.get(target_lang2, "") if target_lang2 else "" | |
trans3 = translations.get(target_lang3, "") if target_lang3 else "" | |
return result_text, source_lang_name, english_text, trans1, trans2, trans3 | |
# Create and launch the Gradio interface | |
target_options = list(target_pipelines.keys()) | |
with gr.Blocks(title="Smart Multilingual Translator", theme=gr.themes.Soft()) as interface: | |
gr.Markdown(""" | |
# Smart Multilingual Translator | |
### Powered by Hugging Face Transformers | |
This application automatically detects the language of your input text and translates it to your selected target languages. | |
""") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
input_text = gr.Textbox( | |
label="Input Text", | |
placeholder="Enter text in any language...", | |
lines=5 | |
) | |
with gr.Row(): | |
target_lang1 = gr.Dropdown( | |
choices=target_options, | |
label="Target Language 1", | |
value="Hebrew" | |
) | |
target_lang2 = gr.Dropdown( | |
choices=target_options, | |
label="Target Language 2", | |
value="Arabic" | |
) | |
target_lang3 = gr.Dropdown( | |
choices=target_options, | |
label="Target Language 3", | |
value="Spanish" | |
) | |
translate_btn = gr.Button("🔄 Translate", variant="primary", size="lg") | |
with gr.Column(scale=3): | |
result_display = gr.Markdown(label="Translation Results") | |
with gr.Row(): | |
with gr.Column(): | |
detected_lang = gr.Textbox(label="Detected Language", interactive=False) | |
with gr.Column(): | |
english_trans = gr.Textbox(label="English Translation", interactive=False) | |
with gr.Row(): | |
trans1_output = gr.Textbox(label="Translation 1", interactive=False) | |
trans2_output = gr.Textbox(label="Translation 2", interactive=False) | |
trans3_output = gr.Textbox(label="Translation 3", interactive=False) | |
# Event handlers | |
translate_btn.click( | |
fn=smart_translate, | |
inputs=[input_text, target_lang1, target_lang2, target_lang3], | |
outputs=[result_display, detected_lang, english_trans, trans1_output, trans2_output, trans3_output] | |
) | |
gr.Markdown(""" | |
--- | |
## Supported Languages | |
### Language Detection (Input) | |
Arabic (ar) - العربية | |
English (en) - English | |
French (fr) - Français | |
German (de) - Deutsch | |
Italian (it) - Italiano | |
Spanish (es) - Español | |
### Target Languages (Output) | |
Hebrew (he) - עברית | |
Arabic (ar) - العربية | |
Spanish (es) - Español | |
French (fr) - Français | |
### Models Used: | |
- **Language Detection:** langdetect | |
- **Translation Models:** Helsinki-NLP MarianMT models from Hugging Face | |
- **Configuration:** Models loaded from lang_model_map.json | |
""") | |
interface.launch(share=True) |