Jaward commited on
Commit
bc6ee15
·
verified ·
1 Parent(s): e368dbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +428 -428
app.py CHANGED
@@ -6,6 +6,8 @@ import asyncio
6
  import logging
7
  import torch
8
  import random
 
 
9
  from serpapi import GoogleSearch
10
  from pydantic import BaseModel
11
  from autogen_agentchat.agents import AssistantAgent
@@ -19,9 +21,10 @@ from autogen_ext.models.ollama import OllamaChatCompletionClient
19
  from markdown_pdf import MarkdownPdf, Section
20
  import traceback
21
  import soundfile as sf
22
- import tempfile
23
  from pydub import AudioSegment
24
  from TTS.api import TTS
 
25
 
26
  # Set up logging
27
  logging.basicConfig(
@@ -35,8 +38,6 @@ logging.basicConfig(
35
  logger = logging.getLogger(__name__)
36
 
37
  # Set up environment
38
- OUTPUT_DIR = "outputs"
39
- os.makedirs(OUTPUT_DIR, exist_ok=True)
40
  os.environ["COQUI_TOS_AGREED"] = "1"
41
 
42
  # Define Pydantic model for slide data
@@ -82,17 +83,6 @@ def search_web(query: str, serpapi_key: str) -> str:
82
  logger.error("Unexpected error during search: %s", str(e))
83
  return f"Unexpected error during search: {str(e)}"
84
 
85
- # Define helper function for progress HTML
86
- def html_with_progress(label, progress):
87
- return f"""
88
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
89
- <div style="width: 100%; background-color: #FFFFFF; border-radius: 10px; overflow: hidden; margin-bottom: 20px;">
90
- <div style="width: {progress}%; height: 30px; background-color: #4CAF50; border-radius: 10px;"></div>
91
- </div>
92
- <h2 style="font-style: italic; color: #555;">{label}</h2>
93
- </div>
94
- """
95
-
96
  # Function to get model client based on selected service
97
  def get_model_client(service, api_key):
98
  if service == "OpenAI-gpt-4o-2024-08-06":
@@ -139,7 +129,7 @@ def clean_script_text(script):
139
  return script
140
 
141
  # Helper function to validate and convert speaker audio (MP3 or WAV)
142
- async def validate_and_convert_speaker_audio(speaker_audio):
143
  if not os.path.exists(speaker_audio):
144
  logger.error("Speaker audio file does not exist: %s", speaker_audio)
145
  return None
@@ -152,9 +142,8 @@ async def validate_and_convert_speaker_audio(speaker_audio):
152
  audio = AudioSegment.from_mp3(speaker_audio)
153
  # Convert to mono, 22050 Hz
154
  audio = audio.set_channels(1).set_frame_rate(22050)
155
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
156
- audio.export(temp_file.name, format="wav")
157
- speaker_wav = temp_file.name
158
  elif ext == ".wav":
159
  speaker_wav = speaker_audio
160
  else:
@@ -172,9 +161,9 @@ async def validate_and_convert_speaker_audio(speaker_audio):
172
  if data.ndim == 2:
173
  logger.info("Converting stereo WAV to mono: %s", speaker_wav)
174
  data = data.mean(axis=1)
175
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
176
- sf.write(temp_file.name, data, samplerate)
177
- speaker_wav = temp_file.name
178
 
179
  logger.info("Validated speaker audio: %s", speaker_wav)
180
  return speaker_wav
@@ -280,7 +269,7 @@ def extract_json_from_message(message):
280
  return None
281
 
282
  # Function to generate Markdown and convert to PDF (portrait, centered)
283
- def generate_slides_pdf(slides):
284
  pdf = MarkdownPdf()
285
 
286
  for slide in slides:
@@ -299,12 +288,49 @@ def generate_slides_pdf(slides):
299
  """
300
  pdf.add_section(Section(markdown_content, toc=False))
301
 
302
- pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
303
  pdf.save(pdf_file)
304
 
305
  logger.info("Generated PDF slides (portrait): %s", pdf_file)
306
  return pdf_file
307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  # Async function to update audio preview
309
  async def update_audio_preview(audio_file):
310
  if audio_file:
@@ -315,47 +341,38 @@ async def update_audio_preview(audio_file):
315
  # Async function to generate lecture materials and audio
316
  async def on_generate(api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides):
317
  if not serpapi_key:
318
- yield f"""
319
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
320
- <h2 style="color: #d9534f;">SerpApi key required</h2>
321
- <p style="margin-top: 20px;">Please provide a valid SerpApi key and try again.</p>
322
- </div>
323
- """
324
  return
325
 
326
- # Initialize TTS model
327
- tts = None
328
- try:
329
- device = "cuda" if torch.cuda.is_available() else "cpu"
330
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
331
- logger.info("TTS model initialized on %s", device)
332
- except Exception as e:
333
- logger.error("Failed to initialize TTS model: %s", str(e))
334
- yield f"""
335
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
336
- <h2 style="color: #d9534f;">TTS model initialization failed</h2>
337
- <p style="margin-top: 20px;">Error: {str(e)}</p>
338
- <p>Please ensure the Coqui TTS model is properly installed and try again.</p>
339
- </div>
340
- """
341
- return
342
 
343
- model_client = get_model_client(api_service, api_key)
344
-
345
- research_agent = AssistantAgent(
346
- name="research_agent",
347
- model_client=model_client,
348
- handoffs=["slide_agent"],
349
- system_message="You are a Research Agent. Use the search_web tool to gather information on the topic and keywords from the initial message. Summarize the findings concisely in a single message, then use the handoff_to_slide_agent tool to pass the task to the Slide Agent. Do not produce any other output.",
350
- tools=[search_web]
351
- )
352
- slide_agent = AssistantAgent(
353
- name="slide_agent",
354
- model_client=model_client,
355
- handoffs=["script_agent"],
356
- system_message=f"""
357
- You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({num_slides}), generate exactly {num_slides} content slides, plus one quiz slide, one assignment slide, and one thank-you slide, for a total of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {num_slides + 3} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
358
- Example output for 2 content slides:
359
  ```json
360
  [
361
  {{"title": "Slide 1", "content": "Content for slide 1"}},
@@ -365,14 +382,14 @@ Example output for 2 content slides:
365
  {{"title": "Thank You", "content": "Thank you message"}}
366
  ]
367
  ```""",
368
- output_content_type=None,
369
- reflect_on_tool_use=False
370
- )
371
- script_agent = AssistantAgent(
372
- name="script_agent",
373
- model_client=model_client,
374
- handoffs=["feynman_agent"],
375
- system_message=f"""
376
  You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
377
  Example for 1 content slide:
378
  ```json
@@ -383,392 +400,373 @@ Example for 1 content slide:
383
  "Thanks for, um, attending today!"
384
  ]
385
  ```""",
386
- output_content_type=None,
387
- reflect_on_tool_use=False
388
- )
389
- feynman_agent = AssistantAgent(
390
- name="feynman_agent",
391
- model_client=model_client,
392
- handoffs=[],
393
- system_message=f"""
394
  You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
395
  Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
396
  """)
397
-
398
- swarm = Swarm(
399
- participants=[research_agent, slide_agent, script_agent, feynman_agent],
400
- termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
401
- )
402
-
403
- progress = 0
404
- label = "Research: in progress..."
405
- yield html_with_progress(label, progress)
406
- await asyncio.sleep(0.1)
407
-
408
- initial_message = f"""
409
- Lecture Title: {title}
410
- Topic: {topic}
411
- Additional Instructions: {instructions}
412
- Audience: {lecture_type}
413
- Number of Content Slides: {num_slides}
414
- Please start by researching the topic.
415
- """
416
- logger.info("Starting lecture generation for topic: %s", topic)
417
-
418
- slides = None
419
- scripts = None
420
- error_html = """
421
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
422
- <h2 style="color: #d9534f;">Failed to generate lecture materials</h2>
423
- <p style="margin-top: 20px;">Please try again with different parameters or a different model.</p>
424
- </div>
425
- """
426
-
427
- try:
428
- logger.info("Research Agent starting...")
429
- task_result = await Console(swarm.run_stream(task=initial_message))
430
- logger.info("Swarm execution completed")
431
 
432
- script_retry_count = 0
433
- max_script_retries = 2
 
 
434
 
435
- for message in task_result.messages:
436
- source = getattr(message, 'source', getattr(message, 'sender', None))
437
- logger.debug("Processing message from %s, type: %s, content: %s", source, type(message), message.to_text() if hasattr(message, 'to_text') else str(message))
438
-
439
- if isinstance(message, HandoffMessage):
440
- logger.info("Handoff from %s to %s, Context: %s", source, message.target, message.context)
441
- if source == "research_agent" and message.target == "slide_agent":
442
- progress = 25
443
- label = "Slides: generating..."
444
- yield html_with_progress(label, progress)
445
- await asyncio.sleep(0.1)
446
- elif source == "slide_agent" and message.target == "script_agent":
447
- if slides is None:
448
- logger.warning("Slide Agent handoff without slides JSON")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
  extracted_json = extract_json_from_message(message)
450
  if extracted_json:
451
  slides = extracted_json
452
- logger.info("Extracted slides JSON from HandoffMessage context: %s", slides)
453
- if slides is None:
454
- label = "Slides: failed to generate..."
455
- yield html_with_progress(label, progress)
456
- await asyncio.sleep(0.1)
457
- progress = 50
458
- label = "Scripts: generating..."
459
- yield html_with_progress(label, progress)
460
- await asyncio.sleep(0.1)
461
- elif source == "script_agent" and message.target == "feynman_agent":
462
- if scripts is None:
463
- logger.warning("Script Agent handoff without scripts JSON")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  extracted_json = extract_json_from_message(message)
465
  if extracted_json:
466
  scripts = extracted_json
467
- logger.info("Extracted scripts JSON from HandoffMessage context: %s", scripts)
468
- progress = 75
469
- label = "Review: in progress..."
470
- yield html_with_progress(label, progress)
471
- await asyncio.sleep(0.1)
472
-
473
- elif source == "research_agent" and isinstance(message, TextMessage) and "handoff_to_slide_agent" in message.content:
474
- logger.info("Research Agent completed research")
475
- progress = 25
476
- label = "Slides: generating..."
477
- yield html_with_progress(label, progress)
478
- await asyncio.sleep(0.1)
479
-
480
- elif source == "slide_agent" and isinstance(message, (TextMessage, StructuredMessage)):
481
- logger.debug("Slide Agent message received: %s", message.to_text())
482
- extracted_json = extract_json_from_message(message)
483
- if extracted_json:
484
- slides = extracted_json
485
- logger.info("Slide Agent generated %d slides: %s", len(slides), slides)
486
- # Save slide content to individual files
487
- for i, slide in enumerate(slides):
488
- content_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_content.txt")
489
- try:
490
- with open(content_file, "w", encoding="utf-8") as f:
491
- f.write(slide["content"])
492
- logger.info("Saved slide content to %s: %s", content_file, slide["content"])
493
- except Exception as e:
494
- logger.error("Error saving slide content to %s: %s", content_file, str(e))
495
- progress = 50
496
- label = "Scripts: generating..."
497
- yield html_with_progress(label, progress)
498
- await asyncio.sleep(0.1)
499
- else:
500
- logger.warning("No JSON extracted from slide_agent message: %s", message.to_text())
501
-
502
- elif source == "script_agent" and isinstance(message, (TextMessage, StructuredMessage)):
503
- logger.debug("Script Agent message received: %s", message.to_text())
504
- extracted_json = extract_json_from_message(message)
505
- if extracted_json:
506
- scripts = extracted_json
507
- logger.info("Script Agent generated scripts for %d slides: %s", len(scripts), scripts)
508
- # Save raw scripts to individual files
509
- for i, script in enumerate(scripts):
510
- script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_raw_script.txt")
511
- try:
512
- with open(script_file, "w", encoding="utf-8") as f:
513
- f.write(script)
514
- logger.info("Saved raw script to %s: %s", script_file, script)
515
- except Exception as e:
516
- logger.error("Error saving raw script to %s: %s", script_file, str(e))
517
- progress = 75
518
- label = "Scripts generated and saved. Reviewing..."
519
- yield html_with_progress(label, progress)
520
- await asyncio.sleep(0.1)
521
- else:
522
- logger.warning("No JSON extracted from script_agent message: %s", message.to_text())
523
- if script_retry_count < max_script_retries:
524
- script_retry_count += 1
525
- logger.info("Retrying script generation (attempt %d/%d)", script_retry_count, max_script_retries)
526
- # Re-prompt script agent
527
- retry_message = TextMessage(
528
- content="Please generate scripts for the slides as per your instructions.",
529
- source="user",
530
- recipient="script_agent"
531
- )
532
- task_result.messages.append(retry_message)
533
- continue
534
-
535
- elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
536
- logger.info("Feynman Agent completed lecture review: %s", message.content)
537
- progress = 90 # Set to 90% before audio generation
538
- label = "Lecture materials ready. Generating audio..."
539
- yield html_with_progress(label, progress)
540
- await asyncio.sleep(0.1)
541
-
542
- logger.info("Slides state: %s", "Generated" if slides else "None")
543
- logger.info("Scripts state: %s", "Generated" if scripts else "None")
544
- if not slides or not scripts:
545
- error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
546
- error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
547
- logger.error("%s", error_message)
548
- logger.debug("Dumping all messages for debugging:")
549
- for msg in task_result.messages:
550
- source = getattr(msg, 'source', getattr(msg, 'sender', None))
551
- logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
552
- yield f"""
553
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
554
- <h2 style="color: #d9534f;">{error_message}</h2>
555
- <p style="margin-top: 20px;">Please try again with a different model (e.g., Anthropic-claude-3-sonnet-20240229) or simplify the topic/instructions.</p>
556
- </div>
557
- """
558
- return
559
-
560
- expected_slide_count = num_slides + 3
561
- if len(slides) != expected_slide_count:
562
- logger.error("Expected %d slides (including %d content slides + 3), but received %d", expected_slide_count, num_slides, len(slides))
563
- yield f"""
564
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
565
- <h2 style="color: #d9534f;">Incorrect number of slides</h2>
566
- <p style="margin-top: 20px;">Expected {expected_slide_count} slides ({num_slides} content slides + quiz, assignment, thank-you), but generated {len(slides)}. Please try again.</p>
567
- </div>
568
- """
569
- return
570
-
571
- if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
572
- logger.error("Scripts are not a list of strings: %s", scripts)
573
- yield f"""
574
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
575
- <h2 style="color: #d9534f;">Invalid script format</h2>
576
- <p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
577
- </div>
578
- """
579
- return
580
-
581
- if len(scripts) != expected_slide_count:
582
- logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
583
- yield f"""
584
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
585
- <h2 style="color: #d9534f;">Mismatch in slides and scripts</h2>
586
- <p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
587
- </div>
588
- """
589
- return
590
-
591
- # Generate PDF from slides
592
- pdf_file = generate_slides_pdf(slides)
593
- pdf_path = f"file://{os.path.abspath(pdf_file)}"
594
-
595
- audio_files = []
596
- speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
597
- validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
598
- if not validated_speaker_wav:
599
- logger.error("Invalid speaker audio after conversion, skipping TTS")
600
- yield f"""
601
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
602
- <h2 style="color: #d9534f;">Invalid speaker audio</h2>
603
- <p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
604
- </div>
605
- """
606
- return
607
-
608
- # Process audio generation sequentially with retries
609
- for i, script in enumerate(scripts):
610
- cleaned_script = clean_script_text(script)
611
- audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.wav")
612
- script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
613
-
614
- # Save cleaned script
615
- try:
616
- with open(script_file, "w", encoding="utf-8") as f:
617
- f.write(cleaned_script or "")
618
- logger.info("Saved cleaned script to %s: %s", script_file, cleaned_script)
619
- except Exception as e:
620
- logger.error("Error saving cleaned script to %s: %s", script_file, str(e))
621
-
622
- if not cleaned_script:
623
- logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
624
- audio_files.append(None)
625
- # Update progress (even for skipped slides)
626
- progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
627
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
628
- yield html_with_progress(label, progress)
629
- await asyncio.sleep(0.1)
630
- continue
631
-
632
- max_retries = 2
633
- for attempt in range(max_retries + 1):
634
- try:
635
- current_text = cleaned_script
636
- if attempt > 0:
637
- sentences = re.split(r"[.!?]+", cleaned_script)
638
- sentences = [s.strip() for s in sentences if s.strip()][:2]
639
- current_text = ". ".join(sentences) + "."
640
- logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
641
 
642
- success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
643
- if not success:
644
- raise RuntimeError("TTS generation failed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
 
646
- logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
647
- audio_files.append(audio_file)
648
- progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
649
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
650
- yield html_with_progress(label, progress)
651
- await asyncio.sleep(0.1)
652
- break
653
- except Exception as e:
654
- logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
655
- if attempt == max_retries:
656
- logger.error("Max retries reached for slide %d, skipping", i + 1)
657
  audio_files.append(None)
658
- progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
659
  label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
660
  yield html_with_progress(label, progress)
661
  await asyncio.sleep(0.1)
662
- break
663
-
664
- audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]
665
-
666
- slides_info = json.dumps({"slides": [
667
- {"title": slide["title"], "content": slide["content"]}
668
- for slide in slides
669
- ], "audioFiles": audio_files})
670
-
671
- html_output = f"""
672
- <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
673
- <div id="slide-content" style="flex: 1; overflow: hidden;">
674
- <iframe id="pdf-viewer" src="https://mozilla.github.io/pdf.js/web/viewer.html?file={pdf_path}" style="width: 100%; height: 100%; border: none;"></iframe>
675
- </div>
676
- <div style="padding: 20px;">
677
- <div id="progress-bar" style="width: 100%; height: 5px; background-color: #ddd; border-radius: 2px; margin-bottom: 10px;">
678
- <div id="progress-fill" style="width: {(1/len(slides)*100)}%; height: 100%; background-color: #4CAF50; border-radius: 2px;"></div>
679
- </div>
680
- <div style="display: flex; justify-content: center; margin-bottom: 10px;">
681
- <button onclick="prevSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
682
- <button onclick="togglePlay()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
683
- <button onclick="nextSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
684
  </div>
685
- <p id="slide-counter" style="text-align: center;">Slide 1 of {len(slides)}</p>
686
- </div>
687
- </div>
688
- <script>
689
- const lectureData = {slides_info};
690
- let currentSlide = 0;
691
- const totalSlides = lectureData.slides.length;
692
- const slideCounter = document.getElementById('slide-counter');
693
- const progressFill = document.getElementById('progress-fill');
694
- let audioElements = [];
695
- let currentAudio = null;
696
- const pdfViewer = document.getElementById('pdf-viewer');
697
 
698
- for (let i = 0; i < totalSlides; i++) {{
699
- if (lectureData.audioFiles && lectureData.audioFiles[i]) {{
700
- const audio = new Audio(lectureData.audioFiles[i]);
701
- audioElements.push(audio);
702
- }} else {{
703
- audioElements.push(null);
704
- }}
705
- }}
706
 
707
- function updateSlide() {{
708
- pdfViewer.src = `https://mozilla.github.io/pdf.js/web/viewer.html?file={pdf_path}#page=${{currentSlide + 1}}`;
709
- slideCounter.textContent = `Slide ${{currentSlide + 1}} of ${{totalSlides}}`;
710
- progressFill.style.width = `${{(currentSlide + 1) / totalSlides * 100}}%`;
711
 
712
- if (currentAudio) {{
713
- currentAudio.pause();
714
- currentAudio.currentTime = 0;
715
- }}
716
 
717
- if (audioElements[currentSlide]) {{
718
- currentAudio = audioElements[currentSlide];
719
- currentAudio.play().catch(e => console.error('Audio play failed:', e));
720
- }} else {{
721
- currentAudio = null;
722
- }}
723
- }}
724
 
725
- function prevSlide() {{
726
- if (currentSlide > 0) {{
727
- currentSlide--;
728
- updateSlide();
729
- }}
730
- }}
731
 
732
- function nextSlide() {{
733
- if (currentSlide < totalSlides - 1) {{
734
- currentSlide++;
735
- updateSlide();
736
- }}
737
- }}
738
 
739
- function togglePlay() {{
740
- if (!audioElements[currentSlide]) return;
741
- if (currentAudio.paused) {{
742
- currentAudio.play().catch(e => console.error('Audio play failed:', e));
743
- }} else {{
744
- currentAudio.pause();
745
- }}
746
- }}
747
 
748
- audioElements.forEach((audio, index) => {{
749
- if (audio) {{
750
- audio.addEventListener('ended', () => {{
751
- if (index < totalSlides - 1) {{
752
- nextSlide();
 
 
753
  }}
754
  }});
755
- }}
756
- }});
757
- </script>
758
- """
759
- logger.info("Lecture generation completed successfully")
760
- yield html_output
761
-
762
- except Exception as e:
763
- logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
764
- yield f"""
765
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
766
- <h2 style="color: #d9534f;">Error during lecture generation</h2>
767
- <p style="margin-top: 10px; font-size: 16px;">{str(e)}</p>
768
- <p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
769
- </div>
770
- """
771
- return
772
 
773
  # Gradio interface
774
  with gr.Blocks(title="Agent Feynman") as demo:
@@ -803,6 +801,8 @@ with gr.Blocks(title="Agent Feynman") as demo:
803
  </div>
804
  """
805
  slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
 
 
806
 
807
  speaker_audio.change(
808
  fn=update_audio_preview,
@@ -813,7 +813,7 @@ with gr.Blocks(title="Agent Feynman") as demo:
813
  generate_btn.click(
814
  fn=on_generate,
815
  inputs=[api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides],
816
- outputs=[slide_display]
817
  )
818
 
819
  if __name__ == "__main__":
 
6
  import logging
7
  import torch
8
  import random
9
+ import tempfile
10
+ import zipfile
11
  from serpapi import GoogleSearch
12
  from pydantic import BaseModel
13
  from autogen_agentchat.agents import AssistantAgent
 
21
  from markdown_pdf import MarkdownPdf, Section
22
  import traceback
23
  import soundfile as sf
24
+ import shutil
25
  from pydub import AudioSegment
26
  from TTS.api import TTS
27
+ from gradio_pdf import PDF
28
 
29
  # Set up logging
30
  logging.basicConfig(
 
38
  logger = logging.getLogger(__name__)
39
 
40
  # Set up environment
 
 
41
  os.environ["COQUI_TOS_AGREED"] = "1"
42
 
43
  # Define Pydantic model for slide data
 
83
  logger.error("Unexpected error during search: %s", str(e))
84
  return f"Unexpected error during search: {str(e)}"
85
 
 
 
 
 
 
 
 
 
 
 
 
86
  # Function to get model client based on selected service
87
  def get_model_client(service, api_key):
88
  if service == "OpenAI-gpt-4o-2024-08-06":
 
129
  return script
130
 
131
  # Helper function to validate and convert speaker audio (MP3 or WAV)
132
+ async def validate_and_convert_speaker_audio(speaker_audio, temp_dir):
133
  if not os.path.exists(speaker_audio):
134
  logger.error("Speaker audio file does not exist: %s", speaker_audio)
135
  return None
 
142
  audio = AudioSegment.from_mp3(speaker_audio)
143
  # Convert to mono, 22050 Hz
144
  audio = audio.set_channels(1).set_frame_rate(22050)
145
+ speaker_wav = os.path.join(temp_dir, "speaker_converted.wav")
146
+ audio.export(speaker_wav, format="wav")
 
147
  elif ext == ".wav":
148
  speaker_wav = speaker_audio
149
  else:
 
161
  if data.ndim == 2:
162
  logger.info("Converting stereo WAV to mono: %s", speaker_wav)
163
  data = data.mean(axis=1)
164
+ mono_wav = os.path.join(temp_dir, "speaker_mono.wav")
165
+ sf.write(mono_wav, data, samplerate)
166
+ speaker_wav = mono_wav
167
 
168
  logger.info("Validated speaker audio: %s", speaker_wav)
169
  return speaker_wav
 
269
  return None
270
 
271
  # Function to generate Markdown and convert to PDF (portrait, centered)
272
+ def generate_slides_pdf(slides, temp_dir):
273
  pdf = MarkdownPdf()
274
 
275
  for slide in slides:
 
288
  """
289
  pdf.add_section(Section(markdown_content, toc=False))
290
 
291
+ pdf_file = os.path.join(temp_dir, "slides.pdf")
292
  pdf.save(pdf_file)
293
 
294
  logger.info("Generated PDF slides (portrait): %s", pdf_file)
295
  return pdf_file
296
 
297
+ # Helper function to create ZIP file of outputs
298
+ def create_outputs_zip(temp_dir, slides, audio_files, scripts):
299
+ zip_path = os.path.join(temp_dir, "lecture_outputs.zip")
300
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
301
+ # Add slides PDF
302
+ pdf_file = os.path.join(temp_dir, "slides.pdf")
303
+ if os.path.exists(pdf_file):
304
+ zipf.write(pdf_file, "slides.pdf")
305
+
306
+ # Add audio files
307
+ for i, audio_file in enumerate(audio_files):
308
+ if audio_file and os.path.exists(audio_file):
309
+ zipf.write(audio_file, f"slide_{i+1}.wav")
310
+
311
+ # Add raw and cleaned scripts
312
+ for i in range(len(slides)):
313
+ raw_script_file = os.path.join(temp_dir, f"slide_{i+1}_raw_script.txt")
314
+ cleaned_script_file = os.path.join(temp_dir, f"slide_{i+1}_script.txt")
315
+ if os.path.exists(raw_script_file):
316
+ zipf.write(raw_script_file, f"slide_{i+1}_raw_script.txt")
317
+ if os.path.exists(cleaned_script_file):
318
+ zipf.write(cleaned_script_file, f"slide_{i+1}_script.txt")
319
+
320
+ logger.info("Created ZIP file: %s", zip_path)
321
+ return zip_path
322
+
323
+ # Helper function for progress HTML
324
+ def html_with_progress(label, progress):
325
+ return f"""
326
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
327
+ <div style="width: 100%; background-color: #FFFFFF; border-radius: 10px; overflow: hidden; margin-bottom: 20px;">
328
+ <div style="width: {progress}%; height: 30px; background-color: #4CAF50; border-radius: 10px;"></div>
329
+ </div>
330
+ <h2 style="font-style: italic; color: #555;">{label}</h2>
331
+ </div>
332
+ """
333
+
334
  # Async function to update audio preview
335
  async def update_audio_preview(audio_file):
336
  if audio_file:
 
341
  # Async function to generate lecture materials and audio
342
  async def on_generate(api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides):
343
  if not serpapi_key:
344
+ yield html_with_progress("SerpApi key required. Please provide a valid key.", 0)
 
 
 
 
 
345
  return
346
 
347
+ # Create temporary directory
348
+ with tempfile.TemporaryDirectory() as temp_dir:
349
+ # Initialize TTS model
350
+ tts = None
351
+ try:
352
+ device = "cuda" if torch.cuda.is_available() else "cpu"
353
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
354
+ logger.info("TTS model initialized on %s", device)
355
+ except Exception as e:
356
+ logger.error("Failed to initialize TTS model: %s", str(e))
357
+ yield html_with_progress(f"TTS model initialization failed: {str(e)}", 0)
358
+ return
 
 
 
 
359
 
360
+ model_client = get_model_client(api_service, api_key)
361
+
362
+ research_agent = AssistantAgent(
363
+ name="research_agent",
364
+ model_client=model_client,
365
+ handoffs=["slide_agent"],
366
+ system_message="You are a Research Agent. Use the search_web tool to gather information on the topic and keywords from the initial message. Summarize the findings concisely in a single message, then use the handoff_to_slide_agent tool to pass the task to the Slide Agent. Do not produce any other output.",
367
+ tools=[search_web]
368
+ )
369
+ slide_agent = AssistantAgent(
370
+ name="slide_agent",
371
+ model_client=model_client,
372
+ handoffs=["script_agent"],
373
+ system_message=f"""
374
+ You are a Slide Agent. Using the research from the conversation history, generate EXACTLY {num_slides} content slides on the topic, plus 1 quiz slide, 1 assignment slide, and 1 thank-you slide, for a TOTAL of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, with each slide as an object with 'title' and 'content' keys. Ensure the JSON is valid and contains precisely {num_slides + 3} slides. If the slide count is incorrect, adjust the output to meet this requirement before proceeding. Do not include explanatory text or comments. After outputting the JSON, use the handoff_to_script_agent tool.
375
+ Example for 2 content slides:
376
  ```json
377
  [
378
  {{"title": "Slide 1", "content": "Content for slide 1"}},
 
382
  {{"title": "Thank You", "content": "Thank you message"}}
383
  ]
384
  ```""",
385
+ output_content_type=None,
386
+ reflect_on_tool_use=False
387
+ )
388
+ script_agent = AssistantAgent(
389
+ name="script_agent",
390
+ model_client=model_client,
391
+ handoffs=["feynman_agent"],
392
+ system_message=f"""
393
  You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
394
  Example for 1 content slide:
395
  ```json
 
400
  "Thanks for, um, attending today!"
401
  ]
402
  ```""",
403
+ output_content_type=None,
404
+ reflect_on_tool_use=False
405
+ )
406
+ feynman_agent = AssistantAgent(
407
+ name="feynman_agent",
408
+ model_client=model_client,
409
+ handoffs=[],
410
+ system_message=f"""
411
  You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
412
  Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
413
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
 
415
+ swarm = Swarm(
416
+ participants=[research_agent, slide_agent, script_agent, feynman_agent],
417
+ termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
418
+ )
419
 
420
+ progress = 0
421
+ label = "Research: in progress..."
422
+ yield html_with_progress(label, progress)
423
+ await asyncio.sleep(0.1)
424
+
425
+ initial_message = f"""
426
+ Lecture Title: {title}
427
+ Topic: {topic}
428
+ Additional Instructions: {instructions}
429
+ Audience: {lecture_type}
430
+ Number of Content Slides: {num_slides}
431
+ Please start by researching the topic.
432
+ """
433
+ logger.info("Starting lecture generation for topic: %s", topic)
434
+
435
+ slides = None
436
+ scripts = None
437
+ max_slide_retries = 2
438
+ slide_retry_count = 0
439
+
440
+ while slide_retry_count <= max_slide_retries:
441
+ try:
442
+ logger.info("Research Agent starting (Slide attempt %d/%d)", slide_retry_count + 1, max_slide_retries)
443
+ task_result = await Console(swarm.run_stream(task=initial_message))
444
+ logger.info("Swarm execution completed")
445
+
446
+ script_retry_count = 0
447
+ max_script_retries = 2
448
+
449
+ for message in task_result.messages:
450
+ source = getattr(message, 'source', getattr(message, 'sender', None))
451
+ logger.debug("Processing message from %s, type: %s, content: %s", source, type(message), message.to_text() if hasattr(message, 'to_text') else str(message))
452
+
453
+ if isinstance(message, HandoffMessage):
454
+ logger.info("Handoff from %s to %s, Context: %s", source, message.target, message.context)
455
+ if source == "research_agent" and message.target == "slide_agent":
456
+ progress = 25
457
+ label = "Slides: generating..."
458
+ yield html_with_progress(label, progress)
459
+ await asyncio.sleep(0.1)
460
+ elif source == "slide_agent" and message.target == "script_agent":
461
+ if slides is None:
462
+ logger.warning("Slide Agent handoff without slides JSON")
463
+ extracted_json = extract_json_from_message(message)
464
+ if extracted_json:
465
+ slides = extracted_json
466
+ logger.info("Extracted slides JSON from HandoffMessage context: %s", slides)
467
+ if slides is None:
468
+ label = "Slides: failed to generate..."
469
+ yield html_with_progress(label, progress)
470
+ await asyncio.sleep(0.1)
471
+ progress = 50
472
+ label = "Scripts: generating..."
473
+ yield html_with_progress(label, progress)
474
+ await asyncio.sleep(0.1)
475
+ elif source == "script_agent" and message.target == "feynman_agent":
476
+ if scripts is None:
477
+ logger.warning("Script Agent handoff without scripts JSON")
478
+ extracted_json = extract_json_from_message(message)
479
+ if extracted_json:
480
+ scripts = extracted_json
481
+ logger.info("Extracted scripts JSON from HandoffMessage context: %s", scripts)
482
+ progress = 75
483
+ label = "Review: in progress..."
484
+ yield html_with_progress(label, progress)
485
+ await asyncio.sleep(0.1)
486
+
487
+ elif source == "research_agent" and isinstance(message, TextMessage) and "handoff_to_slide_agent" in message.content:
488
+ logger.info("Research Agent completed research")
489
+ progress = 25
490
+ label = "Slides: generating..."
491
+ yield html_with_progress(label, progress)
492
+ await asyncio.sleep(0.1)
493
+
494
+ elif source == "slide_agent" and isinstance(message, (TextMessage, StructuredMessage)):
495
+ logger.debug("Slide Agent message received: %s", message.to_text())
496
  extracted_json = extract_json_from_message(message)
497
  if extracted_json:
498
  slides = extracted_json
499
+ logger.info("Slide Agent generated %d slides: %s", len(slides), slides)
500
+ expected_slide_count = num_slides + 3
501
+ if len(slides) != expected_slide_count:
502
+ logger.warning("Generated %d slides, expected %d. Retrying...", len(slides), expected_slide_count)
503
+ slide_retry_count += 1
504
+ if slide_retry_count <= max_slide_retries:
505
+ # Re-prompt slide agent
506
+ retry_message = TextMessage(
507
+ content=f"Please generate EXACTLY {num_slides} content slides plus 1 quiz, 1 assignment, and 1 thank-you slide (total {num_slides + 3}).",
508
+ source="user",
509
+ recipient="slide_agent"
510
+ )
511
+ task_result.messages.append(retry_message)
512
+ slides = None
513
+ continue
514
+ else:
515
+ yield html_with_progress(f"Failed to generate correct number of slides after {max_slide_retries} retries. Expected {expected_slide_count}, got {len(slides)}.", progress)
516
+ return
517
+ # Save slide content to individual files
518
+ for i, slide in enumerate(slides):
519
+ content_file = os.path.join(temp_dir, f"slide_{i+1}_content.txt")
520
+ try:
521
+ with open(content_file, "w", encoding="utf-8") as f:
522
+ f.write(slide["content"])
523
+ logger.info("Saved slide content to %s: %s", content_file, slide["content"])
524
+ except Exception as e:
525
+ logger.error("Error saving slide content to %s: %s", content_file, str(e))
526
+ progress = 50
527
+ label = "Scripts: generating..."
528
+ yield html_with_progress(label, progress)
529
+ await asyncio.sleep(0.1)
530
+ else:
531
+ logger.warning("No JSON extracted from slide_agent message: %s", message.to_text())
532
+
533
+ elif source == "script_agent" and isinstance(message, (TextMessage, StructuredMessage)):
534
+ logger.debug("Script Agent message received: %s", message.to_text())
535
  extracted_json = extract_json_from_message(message)
536
  if extracted_json:
537
  scripts = extracted_json
538
+ logger.info("Script Agent generated scripts for %d slides: %s", len(scripts), scripts)
539
+ # Save raw scripts to individual files
540
+ for i, script in enumerate(scripts):
541
+ script_file = os.path.join(temp_dir, f"slide_{i+1}_raw_script.txt")
542
+ try:
543
+ with open(script_file, "w", encoding="utf-8") as f:
544
+ f.write(script)
545
+ logger.info("Saved raw script to %s: %s", script_file, script)
546
+ except Exception as e:
547
+ logger.error("Error saving raw script to %s: %s", script_file, str(e))
548
+ progress = 75
549
+ label = "Scripts generated and saved. Reviewing..."
550
+ yield html_with_progress(label, progress)
551
+ await asyncio.sleep(0.1)
552
+ else:
553
+ logger.warning("No JSON extracted from script_agent message: %s", message.to_text())
554
+ if script_retry_count < max_script_retries:
555
+ script_retry_count += 1
556
+ logger.info("Retrying script generation (attempt %d/%d)", script_retry_count, max_script_retries)
557
+ # Re-prompt script agent
558
+ retry_message = TextMessage(
559
+ content="Please generate scripts for the slides as per your instructions.",
560
+ source="user",
561
+ recipient="script_agent"
562
+ )
563
+ task_result.messages.append(retry_message)
564
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
 
566
+ elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
567
+ logger.info("Feynman Agent completed lecture review: %s", message.content)
568
+ progress = 90
569
+ label = "Lecture materials ready. Generating audio..."
570
+ yield html_with_progress(label, progress)
571
+ await asyncio.sleep(0.1)
572
+
573
+ logger.info("Slides state: %s", "Generated" if slides else "None")
574
+ logger.info("Scripts state: %s", "Generated" if scripts else "None")
575
+ if not slides or not scripts:
576
+ error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
577
+ error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
578
+ logger.error("%s", error_message)
579
+ yield html_with_progress(error_message, progress)
580
+ return
581
+
582
+ expected_slide_count = num_slides + 3
583
+ if len(slides) != expected_slide_count:
584
+ logger.error("Final validation failed: Expected %d slides, received %d", expected_slide_count, len(slides))
585
+ yield html_with_progress(f"Incorrect number of slides. Expected {expected_slide_count}, got {len(slides)}.", progress)
586
+ return
587
+
588
+ if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
589
+ logger.error("Scripts are not a list of strings: %s", scripts)
590
+ yield html_with_progress("Invalid script format. Scripts must be a list of strings.", progress)
591
+ return
592
+
593
+ if len(scripts) != expected_slide_count:
594
+ logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
595
+ yield html_with_progress(f"Mismatch in slides and scripts. Generated {len(slides)} slides but {len(scripts)} scripts.", progress)
596
+ return
597
+
598
+ # Generate PDF from slides
599
+ pdf_file = generate_slides_pdf(slides, temp_dir)
600
+
601
+ audio_files = []
602
+ speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
603
+ validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio, temp_dir)
604
+ if not validated_speaker_wav:
605
+ logger.error("Invalid speaker audio after conversion, skipping TTS")
606
+ yield html_with_progress("Invalid speaker audio. Please upload a valid MP3 or WAV file.", progress)
607
+ return
608
+
609
+ # Process audio generation sequentially with retries
610
+ for i, script in enumerate(scripts):
611
+ cleaned_script = clean_script_text(script)
612
+ audio_file = os.path.join(temp_dir, f"slide_{i+1}.wav")
613
+ script_file = os.path.join(temp_dir, f"slide_{i+1}_script.txt")
614
+
615
+ # Save cleaned script
616
+ try:
617
+ with open(script_file, "w", encoding="utf-8") as f:
618
+ f.write(cleaned_script or "")
619
+ logger.info("Saved cleaned script to %s: %s", script_file, cleaned_script)
620
+ except Exception as e:
621
+ logger.error("Error saving cleaned script to %s: %s", script_file, str(e))
622
 
623
+ if not cleaned_script:
624
+ logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
 
 
 
 
 
 
 
 
 
625
  audio_files.append(None)
626
+ progress = 90 + ((i + 1) / len(scripts)) * 10
627
  label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
628
  yield html_with_progress(label, progress)
629
  await asyncio.sleep(0.1)
630
+ continue
631
+
632
+ max_retries = 2
633
+ for attempt in range(max_retries + 1):
634
+ try:
635
+ current_text = cleaned_script
636
+ if attempt > 0:
637
+ sentences = re.split(r"[.!?]+", cleaned_script)
638
+ sentences = [s.strip() for s in sentences if s.strip()][:2]
639
+ current_text = ". ".join(sentences) + "."
640
+ logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
641
+
642
+ success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
643
+ if not success:
644
+ raise RuntimeError("TTS generation failed")
645
+
646
+ logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
647
+ audio_files.append(audio_file)
648
+ progress = 90 + ((i + 1) / len(scripts)) * 10
649
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
650
+ yield html_with_progress(label, progress)
651
+ await asyncio.sleep(0.1)
652
+ break
653
+ except Exception as e:
654
+ logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
655
+ if attempt == max_retries:
656
+ logger.error("Max retries reached for slide %d, skipping", i + 1)
657
+ audio_files.append(None)
658
+ progress = 90 + ((i + 1) / len(scripts)) * 10
659
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
660
+ yield html_with_progress(label, progress)
661
+ await asyncio.sleep(0.1)
662
+ break
663
+
664
+ # Create ZIP file of all outputs
665
+ zip_path = create_outputs_zip(temp_dir, slides, audio_files, scripts)
666
+
667
+ # Prepare UI output
668
+ slides_info = json.dumps({"slides": [
669
+ {"title": slide["title"], "content": slide["content"]}
670
+ for slide in slides
671
+ ], "audioFiles": audio_files})
672
+
673
+ html_output = f"""
674
+ <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
675
+ <div id="slide-content" style="flex: 1; overflow: auto;">
676
+ <div id="pdf-viewer"></div>
677
+ </div>
678
+ <div style="padding: 20px;">
679
+ <div id="progress-bar" style="width: 100%; height: 5px; background-color: #ddd; border-radius: 2px; margin-bottom: 10px;">
680
+ <div id="progress-fill" style="width: {(1/len(slides)*100)}%; height: 100%; background-color: #4CAF50; border-radius: 2px;"></div>
681
+ </div>
682
+ <div style="display: flex; justify-content: center; margin-bottom: 10px;">
683
+ <button onclick="prevSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
684
+ <button onclick="togglePlay()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
685
+ <button onclick="nextSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
686
+ </div>
687
+ <p id="slide-counter" style="text-align: center;">Slide 1 of {len(slides)}</p>
688
+ </div>
689
  </div>
690
+ <script>
691
+ const lectureData = {slides_info};
692
+ let currentSlide = 0;
693
+ const totalSlides = lectureData.slides.length;
694
+ const slideCounter = document.getElementById('slide-counter');
695
+ const progressFill = document.getElementById('progress-fill');
696
+ let audioElements = [];
697
+ let currentAudio = null;
 
 
 
 
698
 
699
+ for (let i = 0; i < totalSlides; i++) {{
700
+ if (lectureData.audioFiles && lectureData.audioFiles[i]) {{
701
+ const audio = new Audio('file://' + lectureData.audioFiles[i]);
702
+ audioElements.push(audio);
703
+ }} else {{
704
+ audioElements.push(null);
705
+ }}
706
+ }}
707
 
708
+ function updateSlide() {{
709
+ slideCounter.textContent = `Slide ${{currentSlide + 1}} of ${{totalSlides}}`;
710
+ progressFill.style.width = `${{(currentSlide + 1) / totalSlides * 100}}%`;
 
711
 
712
+ if (currentAudio) {{
713
+ currentAudio.pause();
714
+ currentAudio.currentTime = 0;
715
+ }}
716
 
717
+ if (audioElements[currentSlide]) {{
718
+ currentAudio = audioElements[currentSlide];
719
+ currentAudio.play().catch(e => console.error('Audio play failed:', e));
720
+ }} else {{
721
+ currentAudio = null;
722
+ }}
723
+ }}
724
 
725
+ function prevSlide() {{
726
+ if (currentSlide > 0) {{
727
+ currentSlide--;
728
+ updateSlide();
729
+ }}
730
+ }}
731
 
732
+ function nextSlide() {{
733
+ if (currentSlide < totalSlides - 1) {{
734
+ currentSlide++;
735
+ updateSlide();
736
+ }}
737
+ }}
738
 
739
+ function togglePlay() {{
740
+ if (!audioElements[currentSlide]) return;
741
+ if (currentAudio.paused) {{
742
+ currentAudio.play().catch(e => console.error('Audio play failed:', e));
743
+ }} else {{
744
+ currentAudio.pause();
745
+ }}
746
+ }}
747
 
748
+ audioElements.forEach((audio, index) => {{
749
+ if (audio) {{
750
+ audio.addEventListener('ended', () => {{
751
+ if (index < totalSlides - 1) {{
752
+ nextSlide();
753
+ }}
754
+ }});
755
  }}
756
  }});
757
+ </script>
758
+ """
759
+ yield {
760
+ "pdf": pdf_file,
761
+ "html": html_output,
762
+ "zip": zip_path
763
+ }
764
+ return
765
+
766
+ except Exception as e:
767
+ logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
768
+ yield html_with_progress(f"Error during lecture generation: {str(e)}", progress)
769
+ return
 
 
 
 
770
 
771
  # Gradio interface
772
  with gr.Blocks(title="Agent Feynman") as demo:
 
801
  </div>
802
  """
803
  slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
804
+ pdf_display = gr.PDF(label="Lecture Slides PDF")
805
+ outputs_zip = gr.File(label="Download Outputs (PDF, Audio, Scripts)")
806
 
807
  speaker_audio.change(
808
  fn=update_audio_preview,
 
813
  generate_btn.click(
814
  fn=on_generate,
815
  inputs=[api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides],
816
+ outputs=[slide_display, pdf_display, outputs_zip]
817
  )
818
 
819
  if __name__ == "__main__":