Spaces:
Running
Running
File size: 5,088 Bytes
fd43dfa 039f896 fd43dfa fe32dd7 039f896 fd43dfa 039f896 a3d55f4 add50a1 fd43dfa 90d12be add50a1 a3d55f4 fe32dd7 90d12be a3d55f4 add50a1 a3d55f4 add50a1 a3d55f4 add50a1 a3d55f4 add50a1 90d12be fe32dd7 add50a1 90d12be fe32dd7 ebb50cf 59b69bc 90d12be 59b69bc ebb50cf fe32dd7 90d12be fe32dd7 59b69bc de6323e ebb50cf 90d12be ebb50cf 90d12be ebb50cf de6323e ebb50cf de6323e ebb50cf de6323e fd43dfa 90d12be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
import os
import asyncio
from conver import ConversationConfig, URLToAudioConverter
from dotenv import load_dotenv
from pydub import AudioSegment
load_dotenv()
# Define paths relative to the root directory
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
MUSICA_FONDO = os.path.join(ROOT_DIR, "musica.mp3")
TAG1 = os.path.join(ROOT_DIR, "tag.mp3")
TAG2 = os.path.join(ROOT_DIR, "tag2.mp3")
def mezclar_musica_y_tags(audio_path: str, custom_music_path: str = None) -> str:
if not os.path.exists(audio_path):
return f"Error: Audio file {audio_path} not found"
podcast_audio = AudioSegment.from_file(audio_path)
music_file = custom_music_path if custom_music_path and os.path.exists(custom_music_path) else MUSICA_FONDO
if not os.path.exists(music_file):
return f"Error: Music file {music_file} not found"
musica_fondo = AudioSegment.from_file(music_file).apply_gain(-15)
if not os.path.exists(TAG1):
return f"Error: Tag file {TAG1} not found"
if not os.path.exists(TAG2):
return f"Error: Tag file {TAG2} not found"
tag_outro = AudioSegment.from_file(TAG1).apply_gain(-5)
tag_trans = AudioSegment.from_file(TAG2).apply_gain(-5)
duracion_podcast = len(podcast_audio)
repeticiones = (duracion_podcast // len(musica_fondo)) + 1
musica_fondo_loop = musica_fondo * repeticiones
musica_fondo_loop = musica_fondo_loop[:duracion_podcast]
mezcla = musica_fondo_loop.overlay(podcast_audio)
mezcla = mezcla + tag_outro
silent_ranges = []
for i in range(0, len(podcast_audio) - 500, 100):
chunk = podcast_audio[i:i+500]
if chunk.dBFS < -40:
silent_ranges.append((i, i + 500))
for start, end in reversed(silent_ranges):
if (end - start) >= len(tag_trans):
mezcla = mezcla.overlay(tag_trans, position=start + 50)
output_path = audio_path.replace(".mp3", "_con_musica.mp3")
mezcla.export(output_path, format="mp3")
return output_path
def synthesize_sync(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt):
return asyncio.run(synthesize(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt))
async def synthesize(article_url, text_input, language="en", skip_llm=False, agregar_musica=False, custom_music=None, custom_prompt=None):
if not article_url and not text_input:
return "Error: Ingresa una URL o texto", None
try:
config = ConversationConfig(custom_prompt_template=custom_prompt)
converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
voices = {
"en": ("en-US-AvaMultilingualNeural", "en-US-AndrewMultilingualNeural"),
"es": ("es-ES-AlvaroNeural", "es-ES-ElviraNeural")
}
voice1, voice2 = voices.get(language, voices["en"])
if skip_llm and text_input:
output_file, conversation = await converter.raw_text_to_audio(text_input, voice1, voice2, custom_music)
elif text_input:
output_file, conversation = await converter.text_to_audio(text_input, voice1, voice2, custom_music)
else:
output_file, conversation = await converter.url_to_audio(article_url, voice1, voice2, custom_music)
if agregar_musica:
output_file = mezclar_musica_y_tags(output_file, custom_music)
return conversation, output_file
except Exception as e:
return f"Error: {str(e)}", None
with gr.Blocks(theme='gstaff/sketch') as demo:
gr.Markdown("# 🎙 Podcast Converter")
with gr.Group():
text_url = gr.Textbox(label="URL (opcional)", placeholder="https://...")
text_input = gr.Textbox(label="Texto manual", lines=5, placeholder="Pega tu texto aquí...")
language = gr.Dropdown(["en", "es"], label="Idioma", value="en")
skip_llm = gr.Checkbox(label="🔴 Modo libre (sin filtros LLM)", value=False)
agregar_musica = gr.Checkbox(label="🎵 Agregar música de fondo y cortinillas", value=False)
custom_music = gr.File(label="Subir música de fondo (opcional)", file_types=[".mp3"])
custom_prompt = gr.Textbox(
label="Prompt personalizado (opcional)",
placeholder="{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. Usa un tono informal y genera al menos 6 intercambios por hablante. Devuelve SOLO un objeto JSON: {\"conversation\": [{\"speaker\": \"Anfitrión1\", \"text\": \"...\"}, {\"speaker\": \"Anfitrión2\", \"text\": \"...\"}]}"
)
btn = gr.Button("Generar Podcast", variant="primary")
with gr.Row():
conv_display = gr.Textbox(label="Conversación", interactive=False, lines=10)
aud = gr.Audio(label="Audio Generado", interactive=False)
btn.click(
synthesize_sync,
inputs=[text_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt],
outputs=[conv_display, aud]
)
demo.launch() |