File size: 5,088 Bytes
fd43dfa
039f896
fd43dfa
 
 
fe32dd7
039f896
fd43dfa
039f896
a3d55f4
add50a1
 
 
 
fd43dfa
90d12be
add50a1
a3d55f4
 
fe32dd7
90d12be
a3d55f4
add50a1
a3d55f4
 
 
 
add50a1
a3d55f4
add50a1
a3d55f4
add50a1
90d12be
 
fe32dd7
 
 
 
 
 
 
add50a1
90d12be
 
 
 
 
 
 
 
 
fe32dd7
 
 
 
 
ebb50cf
 
 
 
59b69bc
 
 
 
90d12be
59b69bc
 
 
 
 
 
 
 
ebb50cf
 
 
 
 
 
fe32dd7
 
90d12be
fe32dd7
59b69bc
 
 
 
de6323e
 
 
 
 
 
 
ebb50cf
 
90d12be
 
ebb50cf
90d12be
ebb50cf
de6323e
 
 
 
 
ebb50cf
de6323e
ebb50cf
de6323e
 
fd43dfa
90d12be
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import os
import asyncio
from conver import ConversationConfig, URLToAudioConverter
from dotenv import load_dotenv
from pydub import AudioSegment

load_dotenv()

# Define paths relative to the root directory
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
MUSICA_FONDO = os.path.join(ROOT_DIR, "musica.mp3")
TAG1 = os.path.join(ROOT_DIR, "tag.mp3")
TAG2 = os.path.join(ROOT_DIR, "tag2.mp3")

def mezclar_musica_y_tags(audio_path: str, custom_music_path: str = None) -> str:
    if not os.path.exists(audio_path):
        return f"Error: Audio file {audio_path} not found"
    
    podcast_audio = AudioSegment.from_file(audio_path)
    music_file = custom_music_path if custom_music_path and os.path.exists(custom_music_path) else MUSICA_FONDO
    
    if not os.path.exists(music_file):
        return f"Error: Music file {music_file} not found"
    
    musica_fondo = AudioSegment.from_file(music_file).apply_gain(-15)
    
    if not os.path.exists(TAG1):
        return f"Error: Tag file {TAG1} not found"
    if not os.path.exists(TAG2):
        return f"Error: Tag file {TAG2} not found"
    
    tag_outro = AudioSegment.from_file(TAG1).apply_gain(-5)
    tag_trans = AudioSegment.from_file(TAG2).apply_gain(-5)

    duracion_podcast = len(podcast_audio)
    repeticiones = (duracion_podcast // len(musica_fondo)) + 1
    musica_fondo_loop = musica_fondo * repeticiones
    musica_fondo_loop = musica_fondo_loop[:duracion_podcast]

    mezcla = musica_fondo_loop.overlay(podcast_audio)
    mezcla = mezcla + tag_outro

    silent_ranges = []
    for i in range(0, len(podcast_audio) - 500, 100):
        chunk = podcast_audio[i:i+500]
        if chunk.dBFS < -40:
            silent_ranges.append((i, i + 500))
    for start, end in reversed(silent_ranges):
        if (end - start) >= len(tag_trans):
            mezcla = mezcla.overlay(tag_trans, position=start + 50)

    output_path = audio_path.replace(".mp3", "_con_musica.mp3")
    mezcla.export(output_path, format="mp3")
    return output_path

def synthesize_sync(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt):
    return asyncio.run(synthesize(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt))

async def synthesize(article_url, text_input, language="en", skip_llm=False, agregar_musica=False, custom_music=None, custom_prompt=None):
    if not article_url and not text_input:
        return "Error: Ingresa una URL o texto", None

    try:
        config = ConversationConfig(custom_prompt_template=custom_prompt)
        converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
        
        voices = {
            "en": ("en-US-AvaMultilingualNeural", "en-US-AndrewMultilingualNeural"),
            "es": ("es-ES-AlvaroNeural", "es-ES-ElviraNeural")
        }
        voice1, voice2 = voices.get(language, voices["en"])

        if skip_llm and text_input:
            output_file, conversation = await converter.raw_text_to_audio(text_input, voice1, voice2, custom_music)
        elif text_input:
            output_file, conversation = await converter.text_to_audio(text_input, voice1, voice2, custom_music)
        else:
            output_file, conversation = await converter.url_to_audio(article_url, voice1, voice2, custom_music)

        if agregar_musica:
            output_file = mezclar_musica_y_tags(output_file, custom_music)

        return conversation, output_file
    except Exception as e:
        return f"Error: {str(e)}", None

with gr.Blocks(theme='gstaff/sketch') as demo:
    gr.Markdown("# 🎙 Podcast Converter")
    with gr.Group():
        text_url = gr.Textbox(label="URL (opcional)", placeholder="https://...")
        text_input = gr.Textbox(label="Texto manual", lines=5, placeholder="Pega tu texto aquí...")
        language = gr.Dropdown(["en", "es"], label="Idioma", value="en")
        skip_llm = gr.Checkbox(label="🔴 Modo libre (sin filtros LLM)", value=False)
        agregar_musica = gr.Checkbox(label="🎵 Agregar música de fondo y cortinillas", value=False)
        custom_music = gr.File(label="Subir música de fondo (opcional)", file_types=[".mp3"])
        custom_prompt = gr.Textbox(
            label="Prompt personalizado (opcional)",
            placeholder="{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. Usa un tono informal y genera al menos 6 intercambios por hablante. Devuelve SOLO un objeto JSON: {\"conversation\": [{\"speaker\": \"Anfitrión1\", \"text\": \"...\"}, {\"speaker\": \"Anfitrión2\", \"text\": \"...\"}]}"
        )
        btn = gr.Button("Generar Podcast", variant="primary")
    
    with gr.Row():
        conv_display = gr.Textbox(label="Conversación", interactive=False, lines=10)
        aud = gr.Audio(label="Audio Generado", interactive=False)
    
    btn.click(
        synthesize_sync,
        inputs=[text_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt],
        outputs=[conv_display, aud]
    )

demo.launch()