SohomToom commited on
Commit
e929cde
·
verified ·
1 Parent(s): dd7d3b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -22
app.py CHANGED
@@ -7,45 +7,51 @@ from TTS.api import TTS
7
  import tempfile
8
  import zipfile
9
 
10
- # Available TTS models with voice descriptions
11
  VOICE_MODELS = {
12
- "Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
13
- "LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
14
- "VCTK (Multiple Speakers)": "tts_models/en/vctk/vits",
15
- "Blizzard (Deep Male Voice)": "tts_models/en/blizzard2013/capacitron-t2-cv-v1"
16
  }
17
 
18
  def docx_to_wav_zip(doc_file, selected_voice):
19
- tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)
 
 
 
20
  document = Document(doc_file.name)
21
- paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
22
 
23
- temp_dir = tempfile.mkdtemp()
24
- audio_files = []
25
 
26
- for i, chunk in enumerate(paragraphs):
27
- wav_path = os.path.join(temp_dir, f"chunk_{i+1}.wav")
28
- tts.tts_to_file(text=chunk, file_path=wav_path)
29
- audio_files.append(wav_path)
 
 
30
 
31
- zip_path = os.path.join(temp_dir, "voiceover_chunks.zip")
32
- with zipfile.ZipFile(zip_path, 'w') as zipf:
33
- for wav_file in audio_files:
34
- zipf.write(wav_file, arcname=os.path.basename(wav_file))
 
35
 
36
- return zip_path
37
 
38
  # Gradio interface
39
  interface = gr.Interface(
40
  fn=docx_to_wav_zip,
41
  inputs=[
42
  gr.File(label="Upload .docx File"),
43
- gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)")
44
  ],
45
  outputs=gr.File(label="Download ZIP of WAV Files"),
46
- title="Realistic Voiceover from DOCX (Multiple Voices)",
47
- description="Upload a .docx file and choose a realistic voice to generate WAV voiceover files chunked by paragraph, downloadable as a ZIP archive."
48
  )
49
 
50
  if __name__ == "__main__":
51
- interface.launch()
 
7
  import tempfile
8
  import zipfile
9
 
10
+ # Available male English voices with accents (VCTK dataset)
11
  VOICE_MODELS = {
12
+ "VCTK Speaker 23 (British Male)": ("tts_models/en/vctk/vits", 23),
13
+ "VCTK Speaker 27 (Scottish Male)": ("tts_models/en/vctk/vits", 27),
14
+ "VCTK Speaker 36 (US Male)": ("tts_models/en/vctk/vits", 36),
15
+ "VCTK Speaker 42 (Irish Male)": ("tts_models/en/vctk/vits", 42)
16
  }
17
 
18
  def docx_to_wav_zip(doc_file, selected_voice):
19
+ model_name, speaker_idx = VOICE_MODELS[selected_voice]
20
+ tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
21
+
22
+ # Extract text from .docx
23
  document = Document(doc_file.name)
24
+ full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
25
 
26
+ # Split by paragraphs for shorter audio chunks
27
+ paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
28
 
29
+ with tempfile.TemporaryDirectory() as tmpdir:
30
+ audio_files = []
31
+ for i, para in enumerate(paragraphs):
32
+ wav_path = os.path.join(tmpdir, f"part_{i + 1}.wav")
33
+ tts.tts_to_file(text=para, speaker_idx=speaker_idx, file_path=wav_path)
34
+ audio_files.append(wav_path)
35
 
36
+ # Create a zip of all wav files
37
+ zip_path = os.path.join(tmpdir, "output_audio.zip")
38
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
39
+ for audio in audio_files:
40
+ zipf.write(audio, os.path.basename(audio))
41
 
42
+ return zip_path
43
 
44
  # Gradio interface
45
  interface = gr.Interface(
46
  fn=docx_to_wav_zip,
47
  inputs=[
48
  gr.File(label="Upload .docx File"),
49
+ gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Male Voice with Accent")
50
  ],
51
  outputs=gr.File(label="Download ZIP of WAV Files"),
52
+ title="Convert DOCX to WAV with Male Voices and Accents",
53
+ description="Upload a .docx file and choose a male voice (with accent) to generate paragraph-wise audio and download as ZIP."
54
  )
55
 
56
  if __name__ == "__main__":
57
+ interface.launch()