Spaces:
Runtime error
Runtime error
Rename app.py to Infer.py
Browse files
Infer.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import numpy as np
|
3 |
+
import soundfile as sf
|
4 |
+
import os
|
5 |
+
|
6 |
+
MODEL_PATH = "model/clone_tts_model.h5"
|
7 |
+
TEXT_MAX_LEN = 100
|
8 |
+
SAMPLE_RATE = 22050
|
9 |
+
|
10 |
+
def generate_speech(text_input):
|
11 |
+
x_input = np.array([[ord(c) for c in text_input.ljust(TEXT_MAX_LEN)[:TEXT_MAX_LEN]]])
|
12 |
+
model = tf.keras.models.load_model(MODEL_PATH)
|
13 |
+
audio = model.predict(x_input)[0]
|
14 |
+
os.makedirs("output", exist_ok=True)
|
15 |
+
output_path = "output/generated.wav"
|
16 |
+
sf.write(output_path, audio, SAMPLE_RATE)
|
17 |
+
print(f"Generated speech saved at: {output_path}")
|
18 |
+
|
19 |
+
if __name__ == "__main__":
|
20 |
+
text = input("Enter text to synthesize: ")
|
21 |
+
generate_speech(text)
|
app.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from datasets import load_dataset
|
3 |
-
from TTS.api import TTS
|
4 |
-
import gradio as gr
|
5 |
-
|
6 |
-
# Accept Coqui license automatically
|
7 |
-
os.environ["COQUI_TOS_AGREED"] = "1"
|
8 |
-
|
9 |
-
# Load your dataset and retrieve the voice sample path
|
10 |
-
dataset = load_dataset("Emmylahot12/nnamdi", split="train")
|
11 |
-
if dataset[0]["audio"] is None:
|
12 |
-
raise ValueError("Dataset is empty or audio is missing")
|
13 |
-
|
14 |
-
voice_sample_path = dataset[0]["audio"]["path"]
|
15 |
-
|
16 |
-
# Initialize the TTS engine (CPU)
|
17 |
-
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
|
18 |
-
|
19 |
-
# Inference function
|
20 |
-
def synthesize(text, language="en"):
|
21 |
-
output_path = "output.wav"
|
22 |
-
tts.tts_to_file(
|
23 |
-
text=text,
|
24 |
-
speaker_wav=voice_sample_path,
|
25 |
-
language=language,
|
26 |
-
file_path=output_path
|
27 |
-
)
|
28 |
-
return output_path
|
29 |
-
|
30 |
-
# Gradio UI
|
31 |
-
gr.Interface(
|
32 |
-
fn=synthesize,
|
33 |
-
inputs=[
|
34 |
-
gr.Textbox(label="Enter text to synthesize"),
|
35 |
-
gr.Dropdown(["en", "fr", "es"], label="Language", value="en")
|
36 |
-
],
|
37 |
-
outputs=gr.Audio(label="Generated Audio"),
|
38 |
-
title="Nnamdi TTS App (XTTSv2)"
|
39 |
-
).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|