Spaces:
Runtime error
Runtime error
Commit
·
41c98d7
1
Parent(s):
250ffea
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
@@ -25,6 +25,38 @@ model_greek = SpeechT5ForTextToSpeech.from_pretrained(model_id_greek)
|
|
25 |
processor_greek = SpeechT5Processor.from_pretrained(model_id_greek)
|
26 |
|
27 |
replacements = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
("ου", "u"),
|
29 |
("αυ", "af"),
|
30 |
("ευ", "ef"),
|
@@ -125,7 +157,13 @@ def translate_to_english(audio):
|
|
125 |
return outputs["text"]
|
126 |
|
127 |
|
|
|
|
|
|
|
|
|
|
|
128 |
def synthesise_from_english(text):
|
|
|
129 |
inputs = processor(text=text, return_tensors="pt")
|
130 |
speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
|
131 |
return speech.cpu().numpy()
|
@@ -144,9 +182,10 @@ def synthesise_from_greek(text):
|
|
144 |
|
145 |
|
146 |
def speech_to_speech_translation(audio):
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
150 |
synthesised_speech = synthesise_from_greek(translated_text)
|
151 |
synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
|
152 |
return ((16000, synthesised_speech), translated_text)
|
|
|
25 |
processor_greek = SpeechT5Processor.from_pretrained(model_id_greek)
|
26 |
|
27 |
replacements = [
|
28 |
+
("á", "a"),
|
29 |
+
("â", "a"),
|
30 |
+
("ã", "a"),
|
31 |
+
("í", "i"),
|
32 |
+
("á", "a"),
|
33 |
+
("í", "i"),
|
34 |
+
("ñ", "n"),
|
35 |
+
("ó", "o"),
|
36 |
+
("ú", "u"),
|
37 |
+
("ü", "u"),
|
38 |
+
("á", "a"),
|
39 |
+
("ç", "c"),
|
40 |
+
("è", "e"),
|
41 |
+
("ì", "i"),
|
42 |
+
("í", "i"),
|
43 |
+
("ò", "o"),
|
44 |
+
("ó", "o"),
|
45 |
+
("ù", "u"),
|
46 |
+
("ú", "u"),
|
47 |
+
("š", "s"),
|
48 |
+
("ï", "i"),
|
49 |
+
("à", "a"),
|
50 |
+
("â", "a"),
|
51 |
+
("ç", "c"),
|
52 |
+
("è", "e"),
|
53 |
+
("ë", "e"),
|
54 |
+
("î", "i"),
|
55 |
+
("ï", "i"),
|
56 |
+
("ô", "o"),
|
57 |
+
("ù", "u"),
|
58 |
+
("û", "u"),
|
59 |
+
("ü", "u"),
|
60 |
("ου", "u"),
|
61 |
("αυ", "af"),
|
62 |
("ευ", "ef"),
|
|
|
157 |
return outputs["text"]
|
158 |
|
159 |
|
160 |
+
def translate_to_greek(audio):
|
161 |
+
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "greek"})
|
162 |
+
return outputs["text"]
|
163 |
+
|
164 |
+
|
165 |
def synthesise_from_english(text):
|
166 |
+
text = cleanup_text(text)
|
167 |
inputs = processor(text=text, return_tensors="pt")
|
168 |
speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
|
169 |
return speech.cpu().numpy()
|
|
|
182 |
|
183 |
|
184 |
def speech_to_speech_translation(audio):
|
185 |
+
# translated_text = translate_to_english(audio)
|
186 |
+
translated_text = translate_to_greek(audio)
|
187 |
+
# synthesised_speech = synthesise_from_english(translated_text)
|
188 |
+
# translated_text = translate_from_english_to_greek(synthesised_speech)
|
189 |
synthesised_speech = synthesise_from_greek(translated_text)
|
190 |
synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
|
191 |
return ((16000, synthesised_speech), translated_text)
|