SlouchyBuffalo commited on
Commit
e8eb0cd
·
verified ·
1 Parent(s): 00ec5d4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -202
app.py DELETED
@@ -1,202 +0,0 @@
1
- import gradio as gr
2
- import spaces
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
- import torch
5
- from huggingface_hub import InferenceClient
6
- import os
7
-
8
- # Initialize Cerebras client for Llama 4
9
- cerebras_client = InferenceClient(
10
- "meta-llama/Llama-4-Scout-17B-16E-Instruct",
11
- provider="cerebras",
12
- token=os.getenv("HF_TOKEN"),
13
- )
14
-
15
- # Global variables for models and tokenizers
16
- en_es_tokenizer = None
17
- en_es_model = None
18
- es_en_tokenizer = None
19
- es_en_model = None
20
-
21
- @spaces.GPU(duration=60)
22
- def translate_en_to_es(text):
23
- global en_es_tokenizer, en_es_model
24
-
25
- # Initialize EN->ES model if needed
26
- if en_es_tokenizer is None or en_es_model is None:
27
- en_es_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", src_lang="eng_Latn", tgt_lang="spa_Latn")
28
- en_es_model = AutoModelForSeq2SeqLM.from_pretrained(
29
- "facebook/nllb-200-distilled-600M",
30
- torch_dtype=torch.float16
31
- ).cuda()
32
-
33
- # Translate
34
- inputs = en_es_tokenizer(text, return_tensors="pt", max_length=512, truncation=True).to("cuda")
35
- with torch.no_grad():
36
- outputs = en_es_model.generate(
37
- **inputs,
38
- forced_bos_token_id=en_es_tokenizer.convert_tokens_to_ids("spa_Latn"),
39
- max_length=512,
40
- num_beams=5,
41
- early_stopping=True
42
- )
43
-
44
- translation = en_es_tokenizer.decode(outputs[0], skip_special_tokens=True)
45
- return translation
46
-
47
- @spaces.GPU(duration=60)
48
- def translate_es_to_en(text):
49
- global es_en_tokenizer, es_en_model
50
-
51
- # Initialize ES->EN model if needed
52
- if es_en_tokenizer is None or es_en_model is None:
53
- es_en_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", src_lang="spa_Latn", tgt_lang="eng_Latn")
54
- es_en_model = AutoModelForSeq2SeqLM.from_pretrained(
55
- "facebook/nllb-200-distilled-600M",
56
- torch_dtype=torch.float16
57
- ).cuda()
58
-
59
- # Translate
60
- inputs = es_en_tokenizer(text, return_tensors="pt", max_length=512, truncation=True).to("cuda")
61
- with torch.no_grad():
62
- outputs = es_en_model.generate(
63
- **inputs,
64
- forced_bos_token_id=es_en_tokenizer.convert_tokens_to_ids("eng_Latn"),
65
- max_length=512,
66
- num_beams=5,
67
- early_stopping=True
68
- )
69
-
70
- translation = es_en_tokenizer.decode(outputs[0], skip_special_tokens=True)
71
- return translation
72
-
73
- def refine_with_llama(original_text, translation, direction, formality="neutral"):
74
- if direction == "en_to_es":
75
- refine_prompt = f"""You are an expert Mexican Spanish translator. Refine the following translation and explain your changes:
76
-
77
- Original English: {original_text}
78
- Initial Spanish translation: {translation}
79
- Formality level: {formality}
80
-
81
- Requirements:
82
- 1. Use Mexican Spanish vocabulary and expressions
83
- 2. Adjust for {formality} formality level
84
- 3. Fix any contextual errors or awkward phrasing
85
- 4. Preserve idiomatic expressions appropriately for Mexican Spanish
86
-
87
- Respond in this format:
88
- TRANSLATION: [your refined translation]
89
- EXPLANATION: [Brief explanation of changes made and why this version fits {formality} Mexican Spanish better]"""
90
- else:
91
- refine_prompt = f"""You are an expert English translator. Refine the following translation and explain your changes:
92
-
93
- Original Spanish: {original_text}
94
- Initial English translation: {translation}
95
- Formality level: {formality}
96
-
97
- Requirements:
98
- 1. Use natural English expressions
99
- 2. Adjust for {formality} formality level
100
- 3. Fix any contextual errors or awkward phrasing
101
- 4. Preserve meaning while making it sound natural
102
-
103
- Respond in this format:
104
- TRANSLATION: [your refined translation]
105
- EXPLANATION: [Brief explanation of changes made and why this version fits {formality} English better]"""
106
-
107
- try:
108
- response = cerebras_client.chat_completion(
109
- messages=[{"role": "user", "content": refine_prompt}],
110
- max_tokens=512,
111
- temperature=0.3
112
- )
113
-
114
- # Parse response to extract translation and explanation
115
- content = response.choices[0].message.content.strip()
116
-
117
- if "TRANSLATION:" in content and "EXPLANATION:" in content:
118
- translation_part = content.split("TRANSLATION:")[1].split("EXPLANATION:")[0].strip()
119
- explanation_part = content.split("EXPLANATION:")[1].strip()
120
- return translation_part, explanation_part
121
- else:
122
- return content, "Explanation not available in expected format"
123
-
124
- except Exception as e:
125
- return f"Refinement error: {str(e)}", ""
126
-
127
- def complete_translation(text, direction, formality):
128
- if not text.strip():
129
- return "", "", ""
130
-
131
- try:
132
- # Step 1: Initial translation
133
- if direction == "English to Spanish":
134
- initial_translation = translate_en_to_es(text)
135
- refined_translation, explanation = refine_with_llama(text, initial_translation, "en_to_es", formality)
136
- else: # Spanish to English
137
- initial_translation = translate_es_to_en(text)
138
- refined_translation, explanation = refine_with_llama(text, initial_translation, "es_to_en", formality)
139
-
140
- return initial_translation, refined_translation, explanation
141
- except Exception as e:
142
- return f"Error: {str(e)}", "", ""
143
-
144
- # Create Gradio interface
145
- with gr.Blocks(title="Mexican Spanish Translator with Explanations") as demo:
146
- gr.Markdown("# Mexican Spanish Translator")
147
- gr.Markdown("Powered by NLLB-200 + Llama 4 with detailed explanations for Mexican Spanish formality levels")
148
-
149
- with gr.Row():
150
- with gr.Column(scale=2):
151
- input_text = gr.Textbox(
152
- label="Text to Translate",
153
- placeholder="Enter text in English or Spanish...",
154
- lines=6
155
- )
156
-
157
- with gr.Row():
158
- direction = gr.Dropdown(
159
- choices=["English to Spanish", "Spanish to English"],
160
- value="English to Spanish",
161
- label="Translation Direction"
162
- )
163
- formality = gr.Dropdown(
164
- choices=["informal", "neutral", "formal"],
165
- value="neutral",
166
- label="Formality Level (Mexican Spanish)"
167
- )
168
-
169
- translate_btn = gr.Button("Translate", variant="primary", size="lg")
170
-
171
- with gr.Column(scale=2):
172
- initial_output = gr.Textbox(
173
- label="Initial Translation (NLLB-200)",
174
- lines=2,
175
- interactive=False
176
- )
177
- refined_output = gr.Textbox(
178
- label="Refined Translation (Llama 4)",
179
- lines=2,
180
- interactive=False
181
- )
182
- explanation_output = gr.Textbox(
183
- label="Explanation of Changes",
184
- lines=4,
185
- interactive=False
186
- )
187
-
188
- # Connect function
189
- translate_btn.click(
190
- fn=complete_translation,
191
- inputs=[input_text, direction, formality],
192
- outputs=[initial_output, refined_output, explanation_output]
193
- )
194
-
195
- input_text.submit(
196
- fn=complete_translation,
197
- inputs=[input_text, direction, formality],
198
- outputs=[initial_output, refined_output, explanation_output]
199
- )
200
-
201
- if __name__ == "__main__":
202
- demo.launch()