Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,115 Bytes
c301481 72ae2e5 8279ac9 7ca09b0 8279ac9 7ca09b0 c301481 8279ac9 72ae2e5 8279ac9 72ae2e5 8279ac9 72ae2e5 8279ac9 72ae2e5 c301481 8dbb6cc c301481 8dbb6cc 05a5c90 8dbb6cc c301481 02e83fb c301481 8279ac9 c301481 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import xml.etree.ElementTree as ET
from datetime import datetime
import os
from huggingface_hub import InferenceClient
import re
client = InferenceClient(
"meta-llama/Llama-3.1-8B-Instruct",
provider="hf-inference",
token=os.getenv("HF_TOKEN"),
)
def clean_label(line):
# Remove common label patterns
return re.sub(r"^\s*(\*\*?)?(Headline|Description)\:?\*?\*?\s*", "", line, flags=re.IGNORECASE)
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
"""Ask the LLM for a headline and a short description for the podcast episode."""
prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
1. A catchy, informative headline for a podcast episode about it (max 15 words).
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
Output ONLY the headline on the first line, and the description on the second line. Do NOT include any labels, markdown, or extra formatting.
Here is the topic:
{subject[:10000]}
"""
messages = [
{"role": "system", "content": "You are a world-class podcast producer."},
{"role": "user", "content": prompt},
]
response = client.chat_completion(
messages,
max_tokens=512,
)
full_text = response.choices[0].message.content.strip()
# Try to split headline and description
lines = [clean_label(l.strip()) for l in full_text.splitlines() if l.strip()]
if len(lines) >= 2:
headline = lines[0]
description = " ".join(lines[1:])
else:
headline = full_text[:80]
description = full_text
return headline, description
def indent(elem, level=0):
i = "\n" + level * " "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
for child in elem:
indent(child, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
# -----------------------------------------------------------------------------
# UPDATE RSS
# -----------------------------------------------------------------------------
def get_next_episode_number(podcast_dir="podcasts"):
files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")]
return len(files) + 1
def update_rss(subject, audio_url, audio_length, paper_id=None, rss_path="rss.xml"):
# Generate headline and description automatically
title, description = generate_headline_and_description(subject)
if paper_id:
paper_url = f"https://huggingface.co/papers/{paper_id}"
description += f'\n\n<a href="{paper_url}">[Read the paper on Hugging Face]</a>'
tree = ET.parse(rss_path)
root = tree.getroot()
channel = root.find("channel")
# Update lastBuildDate
last_build_date = channel.find("lastBuildDate")
now_rfc2822 = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
if last_build_date is not None:
last_build_date.text = now_rfc2822
# Create new item
item = ET.Element("item")
ET.SubElement(item, "title").text = title
ET.SubElement(item, "description").text = description
ET.SubElement(item, "pubDate").text = now_rfc2822
ET.SubElement(item, "enclosure", url=audio_url, length=str(audio_length), type="audio/wav")
ET.SubElement(item, "guid").text = audio_url
ET.SubElement(item, "itunes:explicit").text = "false"
# Insert new item after lastBuildDate (i.e., as the first item)
# Find the first <item> and insert before it, or append if none exist
items = channel.findall("item")
if items:
channel.insert(list(channel).index(items[0]), item)
else:
channel.append(item)
# Write back to file with pretty formatting
indent(root)
ET.register_namespace('itunes', "http://www.itunes.com/dtds/podcast-1.0.dtd")
tree.write(rss_path, encoding="utf-8", xml_declaration=True) |