NovaBenya's picture
Upload 3 files
2c1ff66 verified
import gradio as gr
import json
from langdetect import detect
from transformers import pipeline
import warnings
warnings.filterwarnings("ignore")
# Load language to model mapping
def load_language_model_map():
"""Load mapping between languages and translation models"""
return {
'ar': 'Helsinki-NLP/opus-mt-ar-en', # Arabic to English
'fr': 'Helsinki-NLP/opus-mt-fr-en', # French to English
'de': 'Helsinki-NLP/opus-mt-de-en', # German to English
'es': 'Helsinki-NLP/opus-mt-es-en', # Spanish to English
'it': 'Helsinki-NLP/opus-mt-it-en', # Italian to English
}
# Language code to full name mapping
LANGUAGE_NAMES = {
'en': 'English',
'ar': 'Arabic',
'fr': 'French',
'de': 'German',
'es': 'Spanish',
'it': 'Italian',
}
# Initialize translation pipelines
def get_translation_pipelines():
"""Initialize translation pipelines for different target languages from JSON"""
try:
with open('lang_model_map.json', 'r', encoding='utf-8') as f:
data = json.load(f)
# Extract output language mappings
output_langs = data['language_to_model_mapping']['output_languages']
pipelines = {}
for lang_name, lang_info in output_langs.items():
# Only load the main target languages to avoid memory issues
if lang_name in ['Hebrew', 'Arabic', 'Spanish', 'French']:
pipelines[lang_name] = pipeline("translation", model=lang_info['model'])
return pipelines
except FileNotFoundError:
# Fallback to hardcoded pipelines if JSON file not found
print("Warning: lang_model_map.json not found. Using fallback pipelines.")
return {
'Hebrew': pipeline("translation", model="Helsinki-NLP/opus-mt-en-he"),
'Arabic': pipeline("translation", model="Helsinki-NLP/opus-mt-en-ar"),
'Spanish': pipeline("translation", model="Helsinki-NLP/opus-mt-en-es"),
'French': pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
}
# Global variables for caching pipelines
language_model_map = load_language_model_map()
target_pipelines = get_translation_pipelines()
def detect_language(text):
"""Detect the language of input text"""
try:
detected_lang = detect(text)
return detected_lang, LANGUAGE_NAMES.get(detected_lang, detected_lang)
except:
return 'unknown', 'Unknown'
def translate_to_english(text, source_lang):
"""Translate text from source language to English"""
if source_lang == 'en':
return text
if source_lang in language_model_map:
try:
model_name = language_model_map[source_lang]
translator = pipeline("translation", model=model_name)
result = translator(text, max_length=512)
return result[0]['translation_text']
except Exception as e:
return f"Translation error: {str(e)}"
else:
return "Translation model not available for this language"
def translate_from_english(text, target_languages):
"""Translate English text to target languages"""
translations = {}
for lang_name in target_languages:
if lang_name in target_pipelines:
try:
result = target_pipelines[lang_name](text, max_length=512)
translations[lang_name] = result[0]['translation_text']
except Exception as e:
translations[lang_name] = f"Error: {str(e)}"
else:
translations[lang_name] = "Model not available"
return translations
def smart_translate(input_text, target_lang1, target_lang2, target_lang3):
"""Main translation function"""
if not input_text.strip():
return "Please enter text to translate", "", "", "", "", ""
# Detect source language
source_lang_code, source_lang_name = detect_language(input_text)
# Translate to English first if not already English
english_text = translate_to_english(input_text, source_lang_code)
# Get target languages list
target_languages = []
if target_lang1: target_languages.append(target_lang1)
if target_lang2: target_languages.append(target_lang2)
if target_lang3: target_languages.append(target_lang3)
# Translate to target languages
translations = translate_from_english(english_text, target_languages)
# Format results
result_text = f"**Original Text:** {input_text}\n\n"
result_text += f"**Detected Language:** {source_lang_name} ({source_lang_code})\n\n"
if source_lang_code != 'en':
result_text += f"**English Translation:** {english_text}\n\n"
result_text += "**Translations:**\n"
for lang, translation in translations.items():
result_text += f"• **{lang}:** {translation}\n"
# Return individual translations for display
trans1 = translations.get(target_lang1, "") if target_lang1 else ""
trans2 = translations.get(target_lang2, "") if target_lang2 else ""
trans3 = translations.get(target_lang3, "") if target_lang3 else ""
return result_text, source_lang_name, english_text, trans1, trans2, trans3
# Create and launch the Gradio interface
target_options = list(target_pipelines.keys())
with gr.Blocks(title="Smart Multilingual Translator", theme=gr.themes.Soft()) as interface:
gr.Markdown("""
# Smart Multilingual Translator
### Powered by Hugging Face Transformers
This application automatically detects the language of your input text and translates it to your selected target languages.
""")
with gr.Row():
with gr.Column(scale=2):
input_text = gr.Textbox(
label="Input Text",
placeholder="Enter text in any language...",
lines=5
)
with gr.Row():
target_lang1 = gr.Dropdown(
choices=target_options,
label="Target Language 1",
value="Hebrew"
)
target_lang2 = gr.Dropdown(
choices=target_options,
label="Target Language 2",
value="Arabic"
)
target_lang3 = gr.Dropdown(
choices=target_options,
label="Target Language 3",
value="Spanish"
)
translate_btn = gr.Button("🔄 Translate", variant="primary", size="lg")
with gr.Column(scale=3):
result_display = gr.Markdown(label="Translation Results")
with gr.Row():
with gr.Column():
detected_lang = gr.Textbox(label="Detected Language", interactive=False)
with gr.Column():
english_trans = gr.Textbox(label="English Translation", interactive=False)
with gr.Row():
trans1_output = gr.Textbox(label="Translation 1", interactive=False)
trans2_output = gr.Textbox(label="Translation 2", interactive=False)
trans3_output = gr.Textbox(label="Translation 3", interactive=False)
# Event handlers
translate_btn.click(
fn=smart_translate,
inputs=[input_text, target_lang1, target_lang2, target_lang3],
outputs=[result_display, detected_lang, english_trans, trans1_output, trans2_output, trans3_output]
)
gr.Markdown("""
---
## Supported Languages
### Language Detection (Input)
Arabic (ar) - العربية
English (en) - English
French (fr) - Français
German (de) - Deutsch
Italian (it) - Italiano
Spanish (es) - Español
### Target Languages (Output)
Hebrew (he) - עברית
Arabic (ar) - العربية
Spanish (es) - Español
French (fr) - Français
### Models Used:
- **Language Detection:** langdetect
- **Translation Models:** Helsinki-NLP MarianMT models from Hugging Face
- **Configuration:** Models loaded from lang_model_map.json
""")
interface.launch(share=True)