Spaces:

SohomToom
/

DocToAudioConverted

Running

App Files Files Community

SohomToom commited on May 6

Commit

e929cde

verified ·

1 Parent(s): dd7d3b9

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -22

app.py CHANGED Viewed

@@ -7,45 +7,51 @@ from TTS.api import TTS
 import tempfile
 import zipfile
-# Available TTS models with voice descriptions
 VOICE_MODELS = {
-    "Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
-    "LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
-    "VCTK (Multiple Speakers)": "tts_models/en/vctk/vits",
-    "Blizzard (Deep Male Voice)": "tts_models/en/blizzard2013/capacitron-t2-cv-v1"
 }
 def docx_to_wav_zip(doc_file, selected_voice):
-    tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)
     document = Document(doc_file.name)
-    paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
-    temp_dir = tempfile.mkdtemp()
-    audio_files = []
-    for i, chunk in enumerate(paragraphs):
-        wav_path = os.path.join(temp_dir, f"chunk_{i+1}.wav")
-        tts.tts_to_file(text=chunk, file_path=wav_path)
-        audio_files.append(wav_path)
-    zip_path = os.path.join(temp_dir, "voiceover_chunks.zip")
-    with zipfile.ZipFile(zip_path, 'w') as zipf:
-        for wav_file in audio_files:
-            zipf.write(wav_file, arcname=os.path.basename(wav_file))
-    return zip_path
 # Gradio interface
 interface = gr.Interface(
     fn=docx_to_wav_zip,
     inputs=[
         gr.File(label="Upload .docx File"),
-        gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)")
     ],
     outputs=gr.File(label="Download ZIP of WAV Files"),
-    title="Realistic Voiceover from DOCX (Multiple Voices)",
-    description="Upload a .docx file and choose a realistic voice to generate WAV voiceover files chunked by paragraph, downloadable as a ZIP archive."
 )
 if __name__ == "__main__":
-    interface.launch()

 import tempfile
 import zipfile
+# Available male English voices with accents (VCTK dataset)
 VOICE_MODELS = {
+    "VCTK Speaker 23 (British Male)": ("tts_models/en/vctk/vits", 23),
+    "VCTK Speaker 27 (Scottish Male)": ("tts_models/en/vctk/vits", 27),
+    "VCTK Speaker 36 (US Male)": ("tts_models/en/vctk/vits", 36),
+    "VCTK Speaker 42 (Irish Male)": ("tts_models/en/vctk/vits", 42)
 }
 def docx_to_wav_zip(doc_file, selected_voice):
+    model_name, speaker_idx = VOICE_MODELS[selected_voice]
+    tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
+    # Extract text from .docx
     document = Document(doc_file.name)
+    full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
+    # Split by paragraphs for shorter audio chunks
+    paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
+    with tempfile.TemporaryDirectory() as tmpdir:
+        audio_files = []
+        for i, para in enumerate(paragraphs):
+            wav_path = os.path.join(tmpdir, f"part_{i + 1}.wav")
+            tts.tts_to_file(text=para, speaker_idx=speaker_idx, file_path=wav_path)
+            audio_files.append(wav_path)
+        # Create a zip of all wav files
+        zip_path = os.path.join(tmpdir, "output_audio.zip")
+        with zipfile.ZipFile(zip_path, 'w') as zipf:
+            for audio in audio_files:
+                zipf.write(audio, os.path.basename(audio))
+        return zip_path
 # Gradio interface
 interface = gr.Interface(
     fn=docx_to_wav_zip,
     inputs=[
         gr.File(label="Upload .docx File"),
+        gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Male Voice with Accent")
     ],
     outputs=gr.File(label="Download ZIP of WAV Files"),
+    title="Convert DOCX to WAV with Male Voices and Accents",
+    description="Upload a .docx file and choose a male voice (with accent) to generate paragraph-wise audio and download as ZIP."
 )
 if __name__ == "__main__":
+    interface.launch()