Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,568 Bytes
547fef1 7961a34 547fef1 a668eca e211da0 72ae2e5 547fef1 7961a34 547fef1 7961a34 547fef1 7961a34 547fef1 7961a34 547fef1 8dbb6cc 547fef1 7961a34 e211da0 7961a34 e211da0 7961a34 e211da0 dfa51a8 e211da0 bbec9d3 e211da0 bbec9d3 7961a34 1eea8aa dfa51a8 7961a34 e211da0 72ae2e5 8dbb6cc bbec9d3 ec6d43c dfa51a8 547fef1 7961a34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
from papers import PaperManager
from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE
import soundfile as sf
import numpy as np
import argparse
from huggingface_hub import HfApi
import requests
import json
from datetime import datetime
import os
import tempfile
from update_rss import generate_headline_and_description, get_next_episode_number, update_rss
def submit_job(
inference_provider: str,
hf_token: str
):
# Configuration variables
username = HfApi(token=hf_token).whoami()["name"]
space_id = "fdaudens/podcast-jobs" # Your Space ID
flavor = "cpu-basic" # Machine type
# Create the API request
url = f"https://huggingface.co/api/jobs/{username}"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {hf_token}"
}
payload = {
"spaceId": space_id,
"command": ["python", "run_job.py"],
"arguments": [
"--provider", inference_provider
],
"environment": {
"HF_API_KEY": hf_token
},
"flavor": flavor
}
# Launch the job
response = requests.post(url, headers=headers, data=json.dumps(payload))
return response.text
def main():
parser = argparse.ArgumentParser(description="Podcast job runner")
parser.add_argument("--provider", type=str, default="hf-inference")
parser.add_argument("--name", type=str, default="podcast")
parser.add_argument("--flavor", type=str, default="t4-medium")
args = parser.parse_args()
print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}")
# 1. Get the most popular paper's content
paper_manager = PaperManager()
top_papers = paper_manager.get_top_content()
# Get the first (most popular) paper's id and text
first_paper = list(top_papers.values())[0]
subject = first_paper['content']
paper_id = first_paper['id']
# 2. Generate the podcast script
podcast_script = generate_podcast_script(subject)
# 3. Synthesize the podcast audio
lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
sr = 24000
speed = 1.0
audio_segments = []
pipeline = kpipeline
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
for line in lines:
if line.startswith("[MIKE]"):
pipeline_voice = pipeline_voice_male
voice = MALE_VOICE
utterance = line[len("[MIKE]"):].strip()
elif line.startswith("[JANE]"):
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line[len("[JANE]"):].strip()
else:
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line
for _, ps, _ in pipeline(utterance, voice, speed):
ref_s = pipeline_voice[len(ps) - 1]
audio_numpy = kmodel(ps, ref_s, speed).numpy()
audio_segments.append(audio_numpy)
# Concatenate all audio segments
if audio_segments:
full_audio = np.concatenate(audio_segments)
# Create a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
sf.write(temp_path, full_audio, sr)
# Get API token from environment
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY")
if hf_token is None:
print("No Hugging Face token found in environment. Cannot upload to Space.")
return
# Initialize the Hugging Face API
api = HfApi(token=hf_token)
# Set up Space path info
space_id = "fdaudens/podcast-jobs" # Your Space ID
today = datetime.now().strftime("%Y-%m-%d")
base_name = args.name
podcast_filename = f"{base_name}-{today}.wav"
# Path in the Space repository
space_path = f"podcasts/{podcast_filename}"
# Check file size before upload
file_size = os.path.getsize(temp_path)
print("Temp file size before upload:", file_size)
# Upload directly to the Space (crucial: repo_type="space")
print(f"Uploading podcast to Space {space_id} at path {space_path}...")
result = api.upload_file(
path_or_fileobj=temp_path,
path_in_repo=space_path,
repo_id=space_id,
repo_type="space",
token=hf_token
)
print("Upload result:", result)
audio_url = f"https://huggingface.co/spaces/{space_id}/resolve/main/{space_path}"
audio_length = file_size
print(f"Podcast audio uploaded to Space at {space_path}")
print(f"Access URL: {audio_url}")
# After uploading the podcast audio
# headline, description = generate_headline_and_description(subject)
# episode_number = get_next_episode_number()
update_rss(subject, audio_url, audio_length, paper_id=paper_id)
# After update_rss(...)
api.upload_file(
path_or_fileobj="rss.xml",
path_in_repo="rss.xml",
repo_id=space_id,
repo_type="space",
token=hf_token
)
print("rss.xml uploaded to Space.")
# Clean up temporary file at the very end
os.unlink(temp_path)
else:
print("No audio generated.")
if __name__ == "__main__":
main() |