Sandiago21 commited on
Commit
41c98d7
·
1 Parent(s): 250ffea

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +42 -3
app.py CHANGED
@@ -25,6 +25,38 @@ model_greek = SpeechT5ForTextToSpeech.from_pretrained(model_id_greek)
25
  processor_greek = SpeechT5Processor.from_pretrained(model_id_greek)
26
 
27
  replacements = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ("ου", "u"),
29
  ("αυ", "af"),
30
  ("ευ", "ef"),
@@ -125,7 +157,13 @@ def translate_to_english(audio):
125
  return outputs["text"]
126
 
127
 
 
 
 
 
 
128
  def synthesise_from_english(text):
 
129
  inputs = processor(text=text, return_tensors="pt")
130
  speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
131
  return speech.cpu().numpy()
@@ -144,9 +182,10 @@ def synthesise_from_greek(text):
144
 
145
 
146
  def speech_to_speech_translation(audio):
147
- translated_text = translate_to_english(audio)
148
- synthesised_speech = synthesise_from_english(translated_text)
149
- translated_text = translate_from_english_to_greek(synthesised_speech)
 
150
  synthesised_speech = synthesise_from_greek(translated_text)
151
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
152
  return ((16000, synthesised_speech), translated_text)
 
25
  processor_greek = SpeechT5Processor.from_pretrained(model_id_greek)
26
 
27
  replacements = [
28
+ ("á", "a"),
29
+ ("â", "a"),
30
+ ("ã", "a"),
31
+ ("í", "i"),
32
+ ("á", "a"),
33
+ ("í", "i"),
34
+ ("ñ", "n"),
35
+ ("ó", "o"),
36
+ ("ú", "u"),
37
+ ("ü", "u"),
38
+ ("á", "a"),
39
+ ("ç", "c"),
40
+ ("è", "e"),
41
+ ("ì", "i"),
42
+ ("í", "i"),
43
+ ("ò", "o"),
44
+ ("ó", "o"),
45
+ ("ù", "u"),
46
+ ("ú", "u"),
47
+ ("š", "s"),
48
+ ("ï", "i"),
49
+ ("à", "a"),
50
+ ("â", "a"),
51
+ ("ç", "c"),
52
+ ("è", "e"),
53
+ ("ë", "e"),
54
+ ("î", "i"),
55
+ ("ï", "i"),
56
+ ("ô", "o"),
57
+ ("ù", "u"),
58
+ ("û", "u"),
59
+ ("ü", "u"),
60
  ("ου", "u"),
61
  ("αυ", "af"),
62
  ("ευ", "ef"),
 
157
  return outputs["text"]
158
 
159
 
160
+ def translate_to_greek(audio):
161
+ outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "greek"})
162
+ return outputs["text"]
163
+
164
+
165
  def synthesise_from_english(text):
166
+ text = cleanup_text(text)
167
  inputs = processor(text=text, return_tensors="pt")
168
  speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
169
  return speech.cpu().numpy()
 
182
 
183
 
184
  def speech_to_speech_translation(audio):
185
+ # translated_text = translate_to_english(audio)
186
+ translated_text = translate_to_greek(audio)
187
+ # synthesised_speech = synthesise_from_english(translated_text)
188
+ # translated_text = translate_from_english_to_greek(synthesised_speech)
189
  synthesised_speech = synthesise_from_greek(translated_text)
190
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
191
  return ((16000, synthesised_speech), translated_text)