speech-to-speech-greek

Runtime error

App Files Files Community

Sandiago21 commited on Jul 17, 2023

Commit

41c98d7

1 Parent(s): 250ffea

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +42 -3

app.py CHANGED Viewed

@@ -25,6 +25,38 @@ model_greek = SpeechT5ForTextToSpeech.from_pretrained(model_id_greek)
 processor_greek = SpeechT5Processor.from_pretrained(model_id_greek)
 replacements = [
     ("ου", "u"),
     ("αυ", "af"),
     ("ευ", "ef"),
@@ -125,7 +157,13 @@ def translate_to_english(audio):
     return outputs["text"]
 def synthesise_from_english(text):
     inputs = processor(text=text, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
     return speech.cpu().numpy()
@@ -144,9 +182,10 @@ def synthesise_from_greek(text):
 def speech_to_speech_translation(audio):
-    translated_text = translate_to_english(audio)
-    synthesised_speech = synthesise_from_english(translated_text)
-    translated_text = translate_from_english_to_greek(synthesised_speech)
     synthesised_speech = synthesise_from_greek(translated_text)
     synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return ((16000, synthesised_speech), translated_text)

 processor_greek = SpeechT5Processor.from_pretrained(model_id_greek)
 replacements = [
+    ("á", "a"),
+    ("â", "a"),
+    ("ã", "a"),
+    ("í", "i"),
+    ("á", "a"),
+    ("í", "i"),
+    ("ñ", "n"),
+    ("ó", "o"),
+    ("ú", "u"),
+    ("ü", "u"),
+    ("á", "a"),
+    ("ç", "c"),
+    ("è", "e"),
+    ("ì", "i"),
+    ("í", "i"),
+    ("ò", "o"),
+    ("ó", "o"),
+    ("ù", "u"),
+    ("ú", "u"),
+    ("š", "s"),
+    ("ï", "i"),
+    ("à", "a"),
+    ("â", "a"),
+    ("ç", "c"),
+    ("è", "e"),
+    ("ë", "e"),
+    ("î", "i"),
+    ("ï", "i"),
+    ("ô", "o"),
+    ("ù", "u"),
+    ("û", "u"),
+    ("ü", "u"),
     ("ου", "u"),
     ("αυ", "af"),
     ("ευ", "ef"),
     return outputs["text"]
+def translate_to_greek(audio):
+    outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "greek"})
+    return outputs["text"]
 def synthesise_from_english(text):
+    text = cleanup_text(text)
     inputs = processor(text=text, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
     return speech.cpu().numpy()
 def speech_to_speech_translation(audio):
+#     translated_text = translate_to_english(audio)
+    translated_text = translate_to_greek(audio)
+#     synthesised_speech = synthesise_from_english(translated_text)
+#     translated_text = translate_from_english_to_greek(synthesised_speech)
     synthesised_speech = synthesise_from_greek(translated_text)
     synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return ((16000, synthesised_speech), translated_text)