SlouchyBuffalo commited on
Commit
5bfdb54
·
verified ·
1 Parent(s): 02adfaf

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -210
app.py DELETED
@@ -1,210 +0,0 @@
1
- import gradio as gr
2
- import spaces
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
- import torch
5
- from huggingface_hub import InferenceClient
6
- import os
7
-
8
- # Initialize Cerebras client for Llama 4
9
- cerebras_client = InferenceClient(
10
- "meta-llama/Llama-4-Scout-17B-16E-Instruct",
11
- provider="cerebras",
12
- token=os.getenv("HF_TOKEN"),
13
- )
14
-
15
- # Global variables for models and tokenizers
16
- en_es_tokenizer = None
17
- en_es_model = None
18
- es_en_tokenizer = None
19
- es_en_model = None
20
-
21
- @spaces.GPU(duration=60)
22
- def translate_en_to_es(text):
23
- global en_es_tokenizer, en_es_model
24
-
25
- # Initialize EN->ES model if needed
26
- if en_es_tokenizer is None or en_es_model is None:
27
- en_es_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", src_lang="eng_Latn", tgt_lang="spa_Latn")
28
- en_es_model = AutoModelForSeq2SeqLM.from_pretrained(
29
- "facebook/nllb-200-distilled-600M",
30
- torch_dtype=torch.float16
31
- ).cuda()
32
-
33
- # Translate
34
- inputs = en_es_tokenizer(text, return_tensors="pt", max_length=512, truncation=True).to("cuda")
35
- with torch.no_grad():
36
- outputs = en_es_model.generate(
37
- **inputs,
38
- forced_bos_token_id=en_es_tokenizer.convert_tokens_to_ids("spa_Latn"),
39
- max_length=512,
40
- num_beams=5,
41
- early_stopping=True
42
- )
43
-
44
- translation = en_es_tokenizer.decode(outputs[0], skip_special_tokens=True)
45
- return translation
46
-
47
- @spaces.GPU(duration=60)
48
- def translate_es_to_en(text):
49
- global es_en_tokenizer, es_en_model
50
-
51
- # Initialize ES->EN model if needed
52
- if es_en_tokenizer is None or es_en_model is None:
53
- es_en_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", src_lang="spa_Latn", tgt_lang="eng_Latn")
54
- es_en_model = AutoModelForSeq2SeqLM.from_pretrained(
55
- "facebook/nllb-200-distilled-600M",
56
- torch_dtype=torch.float16
57
- ).cuda()
58
-
59
- # Translate
60
- inputs = es_en_tokenizer(text, return_tensors="pt", max_length=512, truncation=True).to("cuda")
61
- with torch.no_grad():
62
- outputs = es_en_model.generate(
63
- **inputs,
64
- forced_bos_token_id=es_en_tokenizer.convert_tokens_to_ids("eng_Latn"),
65
- max_length=512,
66
- num_beams=5,
67
- early_stopping=True
68
- )
69
-
70
- translation = es_en_tokenizer.decode(outputs[0], skip_special_tokens=True)
71
- return translation
72
-
73
- def refine_with_llama(original_text, translation, direction, region="Mexico", formality="neutral"):
74
- if direction == "en_to_es":
75
- refine_prompt = f"""You are an expert Spanish translator specializing in {region} Spanish. Refine the following translation and explain your changes:
76
-
77
- Original English: {original_text}
78
- Initial Spanish translation: {translation}
79
- Region: {region}
80
- Formality level: {formality}
81
-
82
- Requirements:
83
- 1. Use {region} Spanish vocabulary and expressions
84
- 2. Adjust for {formality} formality level
85
- 3. Fix any contextual errors or awkward phrasing
86
- 4. Preserve idiomatic expressions appropriately for {region} Spanish
87
-
88
- Respond in this format:
89
- TRANSLATION: [your refined translation]
90
- EXPLANATION: [Brief explanation of changes made and why this version fits {formality} {region} Spanish better]"""
91
- else:
92
- refine_prompt = f"""You are an expert English translator. Refine the following translation and explain your changes:
93
-
94
- Original Spanish: {original_text}
95
- Initial English translation: {translation}
96
- Formality level: {formality}
97
-
98
- Requirements:
99
- 1. Use natural English expressions
100
- 2. Adjust for {formality} formality level
101
- 3. Fix any contextual errors or awkward phrasing
102
- 4. Preserve meaning while making it sound natural
103
-
104
- Respond in this format:
105
- TRANSLATION: [your refined translation]
106
- EXPLANATION: [Brief explanation of changes made and why this version fits {formality} English better]"""
107
-
108
- try:
109
- response = cerebras_client.chat_completion(
110
- messages=[{"role": "user", "content": refine_prompt}],
111
- max_tokens=512,
112
- temperature=0.3
113
- )
114
-
115
- # Parse response to extract translation and explanation
116
- content = response.choices[0].message.content.strip()
117
-
118
- if "TRANSLATION:" in content and "EXPLANATION:" in content:
119
- translation_part = content.split("TRANSLATION:")[1].split("EXPLANATION:")[0].strip()
120
- explanation_part = content.split("EXPLANATION:")[1].strip()
121
- return translation_part, explanation_part
122
- else:
123
- return content, "Explanation not available in expected format"
124
-
125
- except Exception as e:
126
- return f"Refinement error: {str(e)}", ""
127
-
128
- def complete_translation(text, direction, region, formality):
129
- if not text.strip():
130
- return "", "", ""
131
-
132
- try:
133
- # Step 1: Initial translation
134
- if direction == "English to Spanish":
135
- initial_translation = translate_en_to_es(text)
136
- refined_translation, explanation = refine_with_llama(text, initial_translation, "en_to_es", region, formality)
137
- else: # Spanish to English
138
- initial_translation = translate_es_to_en(text)
139
- refined_translation, explanation = refine_with_llama(text, initial_translation, "es_to_en", region, formality)
140
-
141
- return initial_translation, refined_translation, explanation
142
- except Exception as e:
143
- return f"Error: {str(e)}", "", ""
144
-
145
- # Create Gradio interface
146
- with gr.Blocks(title="Regional Spanish Translator with Explanations") as demo:
147
- gr.Markdown("# Regional Spanish Translator")
148
- gr.Markdown("Powered by NLLB-200 + Llama 4 with detailed explanations for regional Spanish variants and formality levels")
149
-
150
- with gr.Row():
151
- with gr.Column(scale=2):
152
- input_text = gr.Textbox(
153
- label="Text to Translate",
154
- placeholder="Enter text in English or Spanish...",
155
- lines=6
156
- )
157
-
158
- with gr.Row():
159
- direction = gr.Dropdown(
160
- choices=["English to Spanish", "Spanish to English"],
161
- value="English to Spanish",
162
- label="Translation Direction"
163
- )
164
-
165
- with gr.Row():
166
- region = gr.Dropdown(
167
- choices=["Mexico", "Spain", "Argentina", "Colombia", "Peru", "General"],
168
- value="Mexico",
169
- label="Spanish Variant"
170
- )
171
- formality = gr.Dropdown(
172
- choices=["informal", "neutral", "formal"],
173
- value="neutral",
174
- label="Formality Level"
175
- )
176
-
177
- translate_btn = gr.Button("Translate", variant="primary", size="lg")
178
-
179
- with gr.Column(scale=2):
180
- initial_output = gr.Textbox(
181
- label="Initial Translation (NLLB-200)",
182
- lines=2,
183
- interactive=False
184
- )
185
- refined_output = gr.Textbox(
186
- label="Refined Translation (Llama 4)",
187
- lines=2,
188
- interactive=False
189
- )
190
- explanation_output = gr.Textbox(
191
- label="Explanation of Changes",
192
- lines=4,
193
- interactive=False
194
- )
195
-
196
- # Connect function
197
- translate_btn.click(
198
- fn=complete_translation,
199
- inputs=[input_text, direction, region, formality],
200
- outputs=[initial_output, refined_output, explanation_output]
201
- )
202
-
203
- input_text.submit(
204
- fn=complete_translation,
205
- inputs=[input_text, direction, region, formality],
206
- outputs=[initial_output, refined_output, explanation_output]
207
- )
208
-
209
- if __name__ == "__main__":
210
- demo.launch()