Spaces:
Running
Running
File size: 4,549 Bytes
fd43dfa 039f896 fd43dfa fe32dd7 039f896 fd43dfa 039f896 fe32dd7 fd43dfa 90d12be fe32dd7 90d12be fe32dd7 ebb50cf 90d12be fe32dd7 ebb50cf 59b69bc 90d12be 59b69bc ebb50cf fe32dd7 90d12be fe32dd7 59b69bc de6323e ebb50cf 90d12be ebb50cf 90d12be ebb50cf de6323e ebb50cf de6323e ebb50cf de6323e fd43dfa 90d12be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
import os
import asyncio
from conver import ConversationConfig, URLToAudioConverter
from dotenv import load_dotenv
from pydub import AudioSegment
load_dotenv()
MUSICA_FONDO = "musica.mp3"
TAG1 = "tag.mp3"
TAG2 = "tag2.mp3"
def mezclar_musica_y_tags(audio_path: str, custom_music_path: str = None) -> str:
podcast_audio = AudioSegment.from_file(audio_path)
music_file = custom_music_path if custom_music_path and os.path.exists(custom_music_path) else MUSICA_FONDO
musica_fondo = AudioSegment.from_file(music_file).apply_gain(-15)
tag_outro = AudioSegment.from_file(TAG1).apply_gain(-5)
tag_trans = AudioSegment.from_file(TAG2).apply_gain(-5)
duracion_podcast = len(podcast_audio)
repeticiones = (duracion_podcast // len(musica_fondo)) + 1
musica_fondo_loop = musica_fondo * repeticiones
musica_fondo_loop = musica_fondo_loop[:duracion_podcast]
mezcla = musica_fondo_loop.overlay(podcast_audio)
mezcla = mezcla + tag_outro # tag.mp3 como outro
silent_ranges = []
for i in range(0, len(podcast_audio) - 500, 100):
chunk = podcast_audio[i:i+500]
if chunk.dBFS < -40:
silent_ranges.append((i, i + 500))
for start, end in reversed(silent_ranges):
if (end - start) >= len(tag_trans):
mezcla = mezcla.overlay(tag_trans, position=start + 50)
output_path = audio_path.replace(".mp3", "_con_musica.mp3")
mezcla.export(output_path, format="mp3")
return output_path
def synthesize_sync(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt):
return asyncio.run(synthesize(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt))
async def synthesize(article_url, text_input, language="en", skip_llm=False, agregar_musica=False, custom_music=None, custom_prompt=None):
if not article_url and not text_input:
return "Error: Ingresa una URL o texto", None
try:
config = ConversationConfig(custom_prompt_template=custom_prompt)
converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
voices = {
"en": ("en-US-AvaMultilingualNeural", "en-US-AndrewMultilingualNeural"),
"es": ("es-ES-AlvaroNeural", "es-ES-ElviraNeural")
}
voice1, voice2 = voices.get(language, voices["en"])
if skip_llm and text_input:
output_file, conversation = await converter.raw_text_to_audio(text_input, voice1, voice2, custom_music)
elif text_input:
output_file, conversation = await converter.text_to_audio(text_input, voice1, voice2, custom_music)
else:
output_file, conversation = await converter.url_to_audio(article_url, voice1, voice2, custom_music)
if agregar_musica:
output_file = mezclar_musica_y_tags(output_file, custom_music)
return conversation, output_file
except Exception as e:
return f"Error: {str(e)}", None
with gr.Blocks(theme='gstaff/sketch') as demo:
gr.Markdown("# 🎙 Podcast Converter")
with gr.Group():
text_url = gr.Textbox(label="URL (opcional)", placeholder="https://...")
text_input = gr.Textbox(label="Texto manual", lines=5, placeholder="Pega tu texto aquí...")
language = gr.Dropdown(["en", "es"], label="Idioma", value="en")
skip_llm = gr.Checkbox(label="🔴 Modo libre (sin filtros LLM)", value=False)
agregar_musica = gr.Checkbox(label="🎵 Agregar música de fondo y cortinillas", value=False)
custom_music = gr.File(label="Subir música de fondo (opcional)", file_types=[".mp3"])
custom_prompt = gr.Textbox(
label="Prompt personalizado (opcional)",
placeholder="{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. Usa un tono informal y genera al menos 6 intercambios por hablante. Devuelve SOLO un objeto JSON: {\"conversation\": [{\"speaker\": \"Anfitrión1\", \"text\": \"...\"}, {\"speaker\": \"Anfitrión2\", \"text\": \"...\"}]}"
)
btn = gr.Button("Generar Podcast", variant="primary")
with gr.Row():
conv_display = gr.Textbox(label="Conversación", interactive=False, lines=10)
aud = gr.Audio(label="Audio Generado", interactive=False)
btn.click(
synthesize_sync,
inputs=[text_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt],
outputs=[conv_display, aud]
)
demo.launch() |