Podcastking2 / app.py
gnosticdev's picture
Create app.py
039f896 verified
raw
history blame
2.29 kB
import gradio as gr
import os
import asyncio
import torch
from transformers import pipeline
from pydub import AudioSegment
import io
# Configuración del modelo local (CPU)
pipe = pipeline(
"text-generation",
model="OpenAssistant/llama2-7b-orca-8k-3319",
device="cpu",
torch_dtype=torch.float32
)
# TTS básico (simulado - reemplázalo con Silero/Coqui si prefieres)
def text_to_speech(text, language="es"):
# Mock: Guarda un audio vacío (implementa TTS real aquí)
audio_path = "output.mp3"
AudioSegment.silent(duration=1000).export(audio_path, format="mp3")
return audio_path
async def synthesize(text_url, text_input, language="es"):
# Usa el texto directo o extrae de la URL
text = text_input if text_input else await fetch_text(text_url)
if not text:
return "Error: No hay texto para procesar", None
# Genera diálogo en el idioma del texto
prompt = f"Convierte esto en un diálogo de podcast (en {language}): {text}"
output = pipe(prompt, max_length=1000)
conversation = output[0]["generated_text"]
# Convierte a audio (simulado)
audio_path = text_to_speech(conversation, language)
return conversation, audio_path
def synthesize_sync(text_url, text_input, language):
return asyncio.run(synthesize(text_url, text_input, language))
# Interfaz
with gr.Blocks(theme='gstaff/sketch') as demo:
gr.Markdown("# 🎙 Convertir Texto/URL en Podcast")
gr.Markdown("### Soporta español/otros idiomas (sin GPU/API)")
with gr.Group():
text_url = gr.Textbox(label="URL del artículo (opcional)", placeholder="Ej: https://example.com")
text_input = gr.Textbox(label="O pega el texto aquí", lines=5, placeholder="Hola, esto es un ejemplo...")
language = gr.Dropdown(
label="Idioma",
choices=["es", "en", "fr", "de"],
value="es"
)
btn = gr.Button("Generar Podcast", variant="primary")
with gr.Row():
conv_display = gr.Textbox(label="Conversación generada", interactive=False)
aud = gr.Audio(label="Podcast", interactive=False)
btn.click(
synthesize_sync,
inputs=[text_url, text_input, language],
outputs=[conv_display, aud]
)
demo.launch()