File size: 4,549 Bytes
fd43dfa
039f896
fd43dfa
 
 
fe32dd7
039f896
fd43dfa
039f896
fe32dd7
 
 
fd43dfa
90d12be
fe32dd7
90d12be
 
 
 
fe32dd7
 
 
 
 
 
 
90d12be
 
 
 
 
 
 
 
 
 
fe32dd7
 
 
 
 
90d12be
 
fe32dd7
90d12be
59b69bc
 
 
 
90d12be
59b69bc
 
 
 
 
 
 
 
 
90d12be
fe32dd7
90d12be
59b69bc
90d12be
fe32dd7
 
90d12be
fe32dd7
59b69bc
 
 
 
de6323e
 
 
 
 
 
 
fe32dd7
90d12be
 
 
 
 
de6323e
 
 
 
 
 
 
 
90d12be
de6323e
 
fd43dfa
90d12be
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import os
import asyncio
from conver import ConversationConfig, URLToAudioConverter
from dotenv import load_dotenv
from pydub import AudioSegment

load_dotenv()

MUSICA_FONDO = "musica.mp3"
TAG1 = "tag.mp3"
TAG2 = "tag2.mp3"

def mezclar_musica_y_tags(audio_path: str, custom_music_path: str = None) -> str:
    podcast_audio = AudioSegment.from_file(audio_path)
    music_file = custom_music_path if custom_music_path and os.path.exists(custom_music_path) else MUSICA_FONDO
    musica_fondo = AudioSegment.from_file(music_file).apply_gain(-15)
    tag_outro = AudioSegment.from_file(TAG1).apply_gain(-5)
    tag_trans = AudioSegment.from_file(TAG2).apply_gain(-5)

    duracion_podcast = len(podcast_audio)
    repeticiones = (duracion_podcast // len(musica_fondo)) + 1
    musica_fondo_loop = musica_fondo * repeticiones
    musica_fondo_loop = musica_fondo_loop[:duracion_podcast]

    mezcla = musica_fondo_loop.overlay(podcast_audio)
    mezcla = mezcla + tag_outro  # tag.mp3 como outro

    silent_ranges = []
    for i in range(0, len(podcast_audio) - 500, 100):
        chunk = podcast_audio[i:i+500]
        if chunk.dBFS < -40:
            silent_ranges.append((i, i + 500))
    for start, end in reversed(silent_ranges):
        if (end - start) >= len(tag_trans):
            mezcla = mezcla.overlay(tag_trans, position=start + 50)

    output_path = audio_path.replace(".mp3", "_con_musica.mp3")
    mezcla.export(output_path, format="mp3")
    return output_path

def synthesize_sync(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt):
    return asyncio.run(synthesize(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt))

async def synthesize(article_url, text_input, language="en", skip_llm=False, agregar_musica=False, custom_music=None, custom_prompt=None):
    if not article_url and not text_input:
        return "Error: Ingresa una URL o texto", None

    try:
        config = ConversationConfig(custom_prompt_template=custom_prompt)
        converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
        
        voices = {
            "en": ("en-US-AvaMultilingualNeural", "en-US-AndrewMultilingualNeural"),
            "es": ("es-ES-AlvaroNeural", "es-ES-ElviraNeural")
        }
        voice1, voice2 = voices.get(language, voices["en"])

        if skip_llm and text_input:
            output_file, conversation = await converter.raw_text_to_audio(text_input, voice1, voice2, custom_music)
        elif text_input:
            output_file, conversation = await converter.text_to_audio(text_input, voice1, voice2, custom_music)
        else:
            output_file, conversation = await converter.url_to_audio(article_url, voice1, voice2, custom_music)

        if agregar_musica:
            output_file = mezclar_musica_y_tags(output_file, custom_music)

        return conversation, output_file
    except Exception as e:
        return f"Error: {str(e)}", None

with gr.Blocks(theme='gstaff/sketch') as demo:
    gr.Markdown("# 🎙 Podcast Converter")
    with gr.Group():
        text_url = gr.Textbox(label="URL (opcional)", placeholder="https://...")
        text_input = gr.Textbox(label="Texto manual", lines=5, placeholder="Pega tu texto aquí...")
        language = gr.Dropdown(["en", "es"], label="Idioma", value="en")
        skip_llm = gr.Checkbox(label="🔴 Modo libre (sin filtros LLM)", value=False)
        agregar_musica = gr.Checkbox(label="🎵 Agregar música de fondo y cortinillas", value=False)
        custom_music = gr.File(label="Subir música de fondo (opcional)", file_types=[".mp3"])
        custom_prompt = gr.Textbox(
            label="Prompt personalizado (opcional)",
            placeholder="{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. Usa un tono informal y genera al menos 6 intercambios por hablante. Devuelve SOLO un objeto JSON: {\"conversation\": [{\"speaker\": \"Anfitrión1\", \"text\": \"...\"}, {\"speaker\": \"Anfitrión2\", \"text\": \"...\"}]}"
        )
        btn = gr.Button("Generar Podcast", variant="primary")
    
    with gr.Row():
        conv_display = gr.Textbox(label="Conversación", interactive=False, lines=10)
        aud = gr.Audio(label="Audio Generado", interactive=False)
    
    btn.click(
        synthesize_sync,
        inputs=[text_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt],
        outputs=[conv_display, aud]
    )

demo.launch()