Spaces:

MostlyK
/

Manimator

Running

MostlyKIGuess commited on 1 day ago

Commit

2e31ab2

1 Parent(s): f10bf2f

Refactor Gemini API integration and enhance video generation workflow

- Updated the `gemini.py` file to include a structured output model
using Pydantic for better validation and error handling.
- Improved the system prompt to enforce stricter timing with wording!!
GGs to claude prompting guide
- use first gemini as structured, google enhanced fallback with non
strucutred because get fucked by gemini API ,you can't use tool calling
on structured output
- add a new subtitle_service.py for generating ASS subtitle files from
timestamps.
- app.py now handles the subtitle service!!!
- add subtitle support to manim service
- change tts_service.py to generate synchronized audio and subtitles
- test fallback thingy usingthe test fallback!

Files changed (7) hide show

src/api/fallback_gemini.py +179 -65
src/api/gemini.py +147 -123
src/app.py +109 -80
src/services/manim_service.py +128 -65
src/services/subtitle_service.py +67 -0
src/services/tts_service.py +72 -41
src/tests/test_fallback.py +20 -0

src/api/fallback_gemini.py CHANGED Viewed

@@ -3,11 +3,42 @@ import re
 from google import genai
 from google.genai import types as genai_types
 import logging
-from .gemini import SYSTEM_PROMPT, base_prompt_instructions
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 def fix_manim_code(faulty_code: str, error_message: str, original_context: str):
     api_key = os.getenv("GEMINI_API_KEY")
     if not api_key:
         logging.error("GEMINI_API_KEY not found in environment variables for fallback.")
@@ -15,99 +46,182 @@ def fix_manim_code(faulty_code: str, error_message: str, original_context: str):
     client = genai.Client(api_key=api_key)
-    fix_prompt_text = (
-        f"The following Manim code, intended to '{original_context}', failed with an error.\n\n"
-        "### FAULTY CODE:\n"
-        f"```python\n{faulty_code}\n```\n\n"
-        "### ERROR MESSAGE:\n"
-        f"```\n{error_message}\n```\n\n"
-        "### INSTRUCTIONS:\n"
-        "1. Analyze the error message and the faulty code.\n"
-        "2. Correct the code to fix the specific error reported.\n"
-        "3. Ensure the corrected code still fulfills the original request and adheres strictly to *all* the requirements listed below.\n"
-        "4. Pay close attention to vector dimensions, matrix operations, allowed Manim methods, and total duration (30 seconds).\n"
-        "5. If the code logic changes significantly, update the narration accordingly.\n"
-        "6. Return *only* the corrected code and narration using the '### MANIM CODE:' and '### NARRATION:' delimiters, just like the original request.\n\n"
-        "### REQUIREMENTS (Apply these to the corrected code):\n"
-        f"{base_prompt_instructions}"
-    )
-    contents = [fix_prompt_text]
     logging.info("Attempting to fix Manim code via fallback...")
     try:
         generation_config = genai_types.GenerateContentConfig(
-            system_instruction=SYSTEM_PROMPT
         )
         response = client.models.generate_content(
-                model="gemini-2.0-flash",
-                contents=contents,
-                config=generation_config
-                )
         if response:
             try:
                 content = response.text
                 logging.info("Received response from fallback attempt.")
-                if "### NARRATION:" in content:
-                    manim_code, narration = content.split("### NARRATION:", 1)
-                    manim_code = re.sub(r"```python", "", manim_code).replace("```", "").strip()
                     narration = narration.strip()
                     if "from manim import *" not in manim_code:
-                         logging.warning("Adding missing 'from manim import *' (fallback fix).")
-                         manim_code = "from manim import *\nimport numpy as np\n" + manim_code
                     elif "import numpy as np" not in manim_code:
-                         logging.warning("Adding missing 'import numpy as np' (fallback fix).")
-                         lines = manim_code.splitlines()
-                         for i, line in enumerate(lines):
-                             if "from manim import *" in line:
-                                 lines.insert(i + 1, "import numpy as np")
-                                 manim_code = "\n".join(lines)
-                                 break
-                    logging.info("Successfully parsed fixed code and narration from fallback.")
-                    return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
                 else:
-                    logging.warning("Delimiter '### NARRATION:' not found in fallback response. Attempting fallback extraction.")
-                    code_match = re.search(r'```python(.*?)```', content, re.DOTALL)
                     if code_match:
                         manim_code = code_match.group(1).strip()
-                        narration_part = content.split('```', 2)[-1].strip()
                         narration = narration_part if len(narration_part) > 20 else ""
                         if not narration:
-                            logging.warning("Fallback narration extraction resulted in empty or very short text (fallback fix).")
                         else:
-                            logging.info("Successfully parsed code and narration using fallback regex (fallback fix).")
                         if "from manim import *" not in manim_code:
-                             logging.warning("Adding missing 'from manim import *' (fallback fix, regex path).")
-                             manim_code = "from manim import *\nimport numpy as np\n" + manim_code
                         elif "import numpy as np" not in manim_code:
-                             logging.warning("Adding missing 'import numpy as np' (fallback fix, regex path).")
-                             lines = manim_code.splitlines()
-                             for i, line in enumerate(lines):
-                                 if "from manim import *" in line:
-                                     lines.insert(i + 1, "import numpy as np")
-                                     manim_code = "\n".join(lines)
-                                     break
-                        logging.info("Successfully parsed fixed code using fallback extraction.")
-                        return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
                     else:
-                         logging.error("Fallback extraction failed: No Python code block found in fallback response.")
-                         logging.debug(f"Fallback content without code block:\n{content}")
-                         return None, None
             except ValueError:
                 logging.error("Could not extract text from the fallback response.")
                 if response.prompt_feedback and response.prompt_feedback.block_reason:
-                     logging.error(f"Fallback content generation blocked. Reason: {response.prompt_feedback.block_reason.name}")
                 return None, None
             except Exception as e:
-                 logging.exception(f"Error processing fallback response: {e}")
-                 return None, None
         else:
             logging.error("No response received from Gemini during fallback attempt.")
             return None, None

 from google import genai
 from google.genai import types as genai_types
 import logging
+from .gemini import base_prompt_instructions
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+FALLBACK_SYSTEM_PROMPT = """You are an expert Manim programmer specializing in fixing broken Manim code and creating visually striking 60-second animations, strictly following Manim Community v0.19.0 standards.
+CRITICAL TIMING REQUIREMENTS:
+- **Total Duration:** Exactly 60 seconds (1 minute)
+- **Narration:** Exactly 150-160 words (average speaking pace: 2.5 words per second)
+- **Animation Structure:** Use this timing framework:
+  * Introduction: 8-10 seconds
+  * Main content: 40-45 seconds (3-4 major segments)
+  * Conclusion/summary: 7-10 seconds
+- **Synchronization:** Each narration sentence should correspond to 3-5 seconds of animation
+Core Requirements:
+- **API Version:** Use only Manim Community v0.19.0 API
+- **Vectors & Math:** Use 3D vectors (np.array([x, y, 0])) and ensure correct math operations
+- **Matrix Visualization:** Use MathTex for matrices: r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'
+- **Star Usage:** Use Star(n=5, ...) not n_points
+- **Error Prevention:** Always validate Scene class exists; avoid 3D scenes
+- **Visual Style:** Create vibrant, dynamic animations with smooth transitions
+IMPORTANT: Your response must be formatted with clear delimiters:
+- Start Manim code with: ### MANIM CODE:
+- Start narration with: ### NARRATION:
+- End response after narration (no additional text)
+"""
 def fix_manim_code(faulty_code: str, error_message: str, original_context: str):
+    """
+    Enhanced fallback function with Google Search integration.
+    """
     api_key = os.getenv("GEMINI_API_KEY")
     if not api_key:
         logging.error("GEMINI_API_KEY not found in environment variables for fallback.")
     client = genai.Client(api_key=api_key)
+    # Enhanced fallback prompt with better structure and error analysis
+    fix_prompt_text = f"""
+TASK: Fix the broken Manim code that failed with a specific error.
+### ORIGINAL REQUEST:
+{original_context}
+### BROKEN MANIM CODE:
+```python
+{faulty_code}
+```
+### ERROR ENCOUNTERED:
+```
+{error_message}
+```
+### ANALYSIS INSTRUCTIONS:
+1. **Error Analysis**: Examine the error message carefully. Common issues include:
+   - Import errors (missing 'from manim import *' or 'import numpy as np')
+   - Scene class not found (class must inherit from Scene)
+   - Invalid Manim methods or syntax
+   - Vector dimension mismatches (use np.array([x, y, 0]))
+   - Animation object validation errors
+   - Timing issues (ensure total duration = 60 seconds)
+2. **Google Search**: Use Google Search to find:
+   - Recent Manim Community v0.19.0 API changes
+   - Specific error message solutions
+   - Updated method signatures or deprecated features
+   - Working examples of similar animations
+3. **Code Fixing Strategy**:
+   - Keep the original animation concept intact
+   - Fix only what's necessary to resolve the error
+   - Maintain 60-second duration and 120-150 word narration
+   - Ensure all imports are present
+   - Validate Scene class exists and is properly named
+   - Use only verified Manim methods from the allowed list
+4. **Quality Checks**:
+   - Verify vector operations use 3D format: np.array([x, y, 0])
+   - Check all self.play() calls have valid animation objects
+   - Ensure run_time and self.wait() sum to exactly 60 seconds
+   - Count narration words (must be 120-150)
+### OUTPUT FORMAT:
+Provide your response in exactly this format:
+### MANIM CODE:
+[Insert the complete, fixed Manim code here - include all imports and Scene class]
+### NARRATION:
+[Insert the narration script here - exactly 120-150 words, synchronized with animations]
+### REQUIREMENTS TO FOLLOW:
+{base_prompt_instructions}
+"""
+    contents = [fix_prompt_text]
     logging.info("Attempting to fix Manim code via fallback...")
     try:
+        grounding_tool = genai_types.Tool(google_search=genai_types.GoogleSearch())
         generation_config = genai_types.GenerateContentConfig(
+            tools=[grounding_tool],
+            temperature=0.4,  # lower coz grounding
+            system_instruction=FALLBACK_SYSTEM_PROMPT,
         )
         response = client.models.generate_content(
+            model="gemini-2.5-flash",
+            contents=contents,  # type: ignore
+            config=generation_config,
+        )
         if response:
+            # print(response)
             try:
                 content = response.text
                 logging.info("Received response from fallback attempt.")
+                if "### NARRATION:" in content:  # type: ignore
+                    manim_code, narration = content.split("### NARRATION:", 1)  # type: ignore
+                    manim_code = (
+                        re.sub(r"```python", "", manim_code).replace("```", "").strip()
+                    )
                     narration = narration.strip()
                     if "from manim import *" not in manim_code:
+                        logging.warning(
+                            "Adding missing 'from manim import *' (fallback fix)."
+                        )
+                        manim_code = (
+                            "from manim import *\nimport numpy as np\n" + manim_code
+                        )
                     elif "import numpy as np" not in manim_code:
+                        logging.warning(
+                            "Adding missing 'import numpy as np' (fallback fix)."
+                        )
+                        lines = manim_code.splitlines()
+                        for i, line in enumerate(lines):
+                            if "from manim import *" in line:
+                                lines.insert(i + 1, "import numpy as np")
+                                manim_code = "\n".join(lines)
+                                break
+                    logging.info(
+                        "Successfully parsed fixed code and narration from fallback."
+                    )
+                    return {
+                        "manim_code": manim_code,
+                        "output_file": "output.mp4",
+                    }, narration
                 else:
+                    logging.warning(
+                        "Delimiter '### NARRATION:' not found in fallback response. Attempting fallback extraction."
+                    )
+                    code_match = re.search(r"```python(.*?)```", content, re.DOTALL)  # type: ignore
                     if code_match:
                         manim_code = code_match.group(1).strip()
+                        narration_part = content.split("```", 2)[-1].strip()
                         narration = narration_part if len(narration_part) > 20 else ""
                         if not narration:
+                            logging.warning(
+                                "Fallback narration extraction resulted in empty or very short text (fallback fix)."
+                            )
                         else:
+                            logging.info(
+                                "Successfully parsed code and narration using fallback regex (fallback fix)."
+                            )
                         if "from manim import *" not in manim_code:
+                            logging.warning(
+                                "Adding missing 'from manim import *' (fallback fix, regex path)."
+                            )
+                            manim_code = (
+                                "from manim import *\nimport numpy as np\n" + manim_code
+                            )
                         elif "import numpy as np" not in manim_code:
+                            logging.warning(
+                                "Adding missing 'import numpy as np' (fallback fix, regex path)."
+                            )
+                            lines = manim_code.splitlines()
+                            for i, line in enumerate(lines):
+                                if "from manim import *" in line:
+                                    lines.insert(i + 1, "import numpy as np")
+                                    manim_code = "\n".join(lines)
+                                    break
+                        logging.info(
+                            "Successfully parsed fixed code using fallback extraction."
+                        )
+                        return {
+                            "manim_code": manim_code,
+                            "output_file": "output.mp4",
+                        }, narration
                     else:
+                        logging.error(
+                            "Fallback extraction failed: No Python code block found in fallback response."
+                        )
+                        logging.debug(
+                            f"Fallback content without code block:\n{content}"
+                        )
+                        return None, None
             except ValueError:
                 logging.error("Could not extract text from the fallback response.")
                 if response.prompt_feedback and response.prompt_feedback.block_reason:
+                    logging.error(
+                        f"Fallback content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
+                    )
                 return None, None
             except Exception as e:
+                logging.exception(f"Error processing fallback response: {e}")
+                return None, None
         else:
             logging.error("No response received from Gemini during fallback attempt.")
             return None, None

src/api/gemini.py CHANGED Viewed

@@ -5,69 +5,108 @@ from dotenv import load_dotenv
 import os
 import pathlib
 import logging
 load_dotenv()
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# --- Global System Prompt ---
-SYSTEM_PROMPT = """You are an expert Manim programmer specializing in creating crazy, cutting-edge, and visually striking animations based on user prompts or documents, strictly following Manim Community v0.19.0 standards.
 Core Requirements:
-- **API Version:** Use only Manim Community v0.19.0 API.
-- **Vectors & Math:** Use 3D vectors (`np.array([x, y, 0])`) and ensure correct math operations.
-- **Allowed Methods:** Strictly use the verified list of Manim methods provided in the detailed instructions. No external images.
-- **        "\n   - self.play(), self.wait(), Create(), Write(), Transform(), FadeIn(), FadeOut(), Add(), Remove(), MoveAlongPath(), Rotating(), Circumscribe(), Indicate(), FocusOn(), Shift(), Scale(), MoveTo(), NextTo(), Axes(), Plot(), LineGraph(), BarChart(), Dot(), Line(), Arrow(), Text(), Tex(), MathTex(), VGroup(), Mobject.animate, self.camera.frame.animate"
-- **Matrix Visualization:** Use `MathTex` for displaying matrices in the format `r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'`.
-- **Duration:** The total animation duration MUST be exactly 30 seconds.
--**Error handling**:"An unexpected error occurred during video creation: No Scene class found in generated code, This error SHOULD NEVER occur. Make sure to validate the code before returning it. If this error occurs, please log the error and return None for both manim_code and narration.Make sure you don't do 3Dscene coz that gives this error"
-- **Engagement:** Create visually stunning and crazy animations that push creative boundaries. Use vibrant colors, dynamic movements, and unexpected transformations.
-- **Text Handling:** Fade out text and other elements as soon as they are no longer needed, ensuring a smooth transition.
-- **Synchronization:** Align animation pacing (`run_time`, `wait`) roughly with the narration segments.
-- **Output Format:** Return *only* the Python code and narration script, separated by '### MANIM CODE:' and '### NARRATION:' delimiters. Adhere strictly to this format.
-- **Code Quality:** Generate error-free, runnable code with necessary imports (`from manim import *`, `import numpy as np`) and exactly one Scene class. Validate objects and animation calls.
 """
-# --- Detailed Instructions ---
 base_prompt_instructions = (
-        "\nFollow these requirements strictly:"
-        "\n1. Use only Manim Community v0.19.0 API"
-        "\n2. Vector operations:"
-        "\n   - All vectors must be 3D: np.array([x, y, 0])"
-        "\n   - Matrix multiplication: result = np.dot(matrix, vector[:2])"
-        "\n   - Append 0 for Z: np.append(result, 0)"
-        "\n3. Matrix visualization:"
-        "\n   - Use MathTex for display"
-        "\n   - Format: r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'"
-        "\n4. Use only verified Manim methods:"
-        "\n   - self.play(), self.wait(), Create(), Write(), Transform(), FadeIn(), FadeOut(), Add(), Remove(), MoveAlongPath(), Rotating(), Circumscribe(), Indicate(), FocusOn(), Shift(), Scale(), MoveTo(), NextTo(), Axes(), Plot(), LineGraph(), BarChart(), Dot(), Line(), Arrow(), Text(), Tex(), MathTex(), VGroup(), Mobject.animate, self.camera.frame.animate"
-        "\n5. DO NOT USE IMAGES IMPORTS."
-        "\n6. Make the video crazy and innovative by:"
-        "\n   - Fading out text and other elements gracefully once they are no longer needed"
-        "\n   - Adding creative interactive elements like arrows, labels, and transitions"
-        "\n   - Incorporating graphs/plots (Axes, Plot, LineGraph, BarChart) where appropriate"
-        "\n   - Leveraging smooth transitions and varied pacing to keep the viewer engaged."
-        "\n7. Ensure the video is error-free by:"
-        "\n   - Validating all objects before animations"
-        "\n   - Handling exceptions gracefully (in generated code if applicable)"
-        "\n   - Ensuring operands for vector operations match in shape to avoid broadcasting errors"
-        "\n8. Validate that every arrow creation ensures its start and end points are distinct to prevent normalization errors."
-        "\n9. Use longer scenes (e.g., 5-6 seconds per major step) for complex transformations and shorter scenes for simple animations, with a total duration of exactly 30 seconds."
-        "\n10. Align the narration script with the animation pace for seamless storytelling."
-        "\n11. Ensure all objects in self.play() are valid animations (e.g., `Create(obj)`, `obj.animate.shift(UP)`)."
-        "\n12. Use Mobject.animate for animations involving Mobject methods."
-        "\n13. CRITICAL: DO NOT USE BARCHATS, LINEGRAPHS, OR PLOTTING WITHOUT EXPLICIT INSTRUCTIONS."
-        "\n14. Provide creative and sometimes crazy Manim video scripts that push the conventional boundaries."
-        "\n15. **Synchronization:** Structure the narration and Manim code for better synchronization:"
-        "\n    - Keep narration segments concise and directly tied to the visual elements."
-        "\n    - Use `self.wait(duration)` in the Manim code to match natural pauses in narration."
-        "\n    - Adjust `run_time` in `self.play()` calls to match the speaking duration of the associated narration."
-        "\n    - Ensure the animation and narration sum to exactly 30 seconds."
-        "\n### MANIM CODE:\n"
-        "Provide only valid Python code using Manim Community v0.19.0 to generate the video animation.\n\n"
-        "### NARRATION:\n"
-        "Provide a concise narration script for the video that aligns with the Manim code's pacing and visuals.DO NOT give timestamps.\n\n"
-    )
 def load_manim_examples():
@@ -75,7 +114,6 @@ def load_manim_examples():
     if not guide_path.exists():
         logging.warning(f"Manim examples guide not found at {guide_path}")
         return ""
     logging.info(f"Loading Manim examples from {guide_path}")
     return guide_path.read_text(encoding="utf-8")
@@ -85,7 +123,6 @@ def generate_video(idea: str | None = None, pdf_path: str | None = None):
     if not api_key:
         logging.error("GEMINI_API_KEY not found in environment variables")
         raise Exception("GEMINI_API_KEY not found in environment variables")
     if not idea and not pdf_path:
         raise ValueError("Either an idea or a pdf_path must be provided.")
     if idea and pdf_path:
@@ -97,7 +134,10 @@ def generate_video(idea: str | None = None, pdf_path: str | None = None):
     manim_examples = load_manim_examples()
     if manim_examples:
-        examples_prompt = "Below are examples of Manim code that demonstrate proper usage patterns. Use these as reference when generating your animation:\n\n" + manim_examples
         contents.append(examples_prompt)
         logging.info("Added Manim examples from guide.md to prime the model")
     else:
@@ -108,12 +148,14 @@ def generate_video(idea: str | None = None, pdf_path: str | None = None):
     if pdf_path:
         pdf_file_path = pathlib.Path(pdf_path)
         if not pdf_file_path.exists():
-             logging.error(f"PDF file not found at: {pdf_path}")
-             raise FileNotFoundError(f"PDF file not found at: {pdf_path}")
         logging.info(f"Reading PDF: {pdf_path}")
         pdf_data = pdf_file_path.read_bytes()
-        pdf_part = genai_types.Part.from_bytes(data=pdf_data, mime_type='application/pdf')
         contents.append(pdf_part)
         user_prompt_text = f"Create a 30-second Manim video script summarizing the key points or illustrating a core concept from the provided PDF document. {base_prompt_instructions}"
@@ -127,81 +169,63 @@ def generate_video(idea: str | None = None, pdf_path: str | None = None):
     logging.info("Sending request to Gemini API...")
     try:
         generation_config = genai_types.GenerateContentConfig(
-        system_instruction=SYSTEM_PROMPT
-    )
         response = client.models.generate_content(
-            model="gemini-2.0-flash",
-            contents=contents,
-            config=generation_config
-            )
     except Exception as e:
         logging.exception(f"Error calling Gemini API: {e}")
         raise Exception(f"Error calling Gemini API: {e}")
     if response:
         try:
-            content = response.text
-            logging.info("Received response from Gemini.")
-        except ValueError:
-             logging.warning("Could not extract text from the response. Response details:")
-             logging.warning(response)
-             if response.prompt_feedback and response.prompt_feedback.block_reason:
-                 logging.error(f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}")
-                 raise Exception(f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}")
-             else:
-                 logging.error("Failed to generate content. The response was empty or malformed.")
-                 raise Exception("Failed to generate content. The response was empty or malformed.")
-        if "### NARRATION:" in content:
-            manim_code, narration = content.split("### NARRATION:", 1)
-            manim_code = re.sub(r"```python", "", manim_code).replace("```", "").strip()
-            narration = narration.strip()
-            logging.info("Successfully parsed code and narration using delimiter.")
             if "from manim import *" not in manim_code:
-                 logging.warning("Adding missing 'from manim import *'.")
-                 manim_code = "from manim import *\nimport numpy as np\n" + manim_code
             elif "import numpy as np" not in manim_code:
-                 logging.warning("Adding missing 'import numpy as np'.")
-                 lines = manim_code.splitlines()
-                 for i, line in enumerate(lines):
-                     if "from manim import *" in line:
-                         lines.insert(i + 1, "import numpy as np")
-                         manim_code = "\n".join(lines)
-                         break
             return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
-        else:
-            logging.warning("Delimiter '### NARRATION:' not found. Attempting fallback extraction.")
-            code_match = re.search(r'```python(.*?)```', content, re.DOTALL)
-            if code_match:
-                manim_code = code_match.group(1).strip()
-                narration_part = content.split('```', 2)[-1].strip()
-                narration = narration_part if len(narration_part) > 20 else ""
-                if not narration:
-                    logging.warning("Fallback narration extraction resulted in empty or very short text.")
-                else:
-                    logging.info("Successfully parsed code and narration using fallback regex.")
-                if "from manim import *" not in manim_code:
-                     logging.warning("Adding missing 'from manim import *' (fallback).")
-                     manim_code = "from manim import *\nimport numpy as np\n" + manim_code
-                elif "import numpy as np" not in manim_code:
-                     logging.warning("Adding missing 'import numpy as np' (fallback).")
-                     lines = manim_code.splitlines()
-                     for i, line in enumerate(lines):
-                         if "from manim import *" in line:
-                             lines.insert(i + 1, "import numpy as np")
-                             manim_code = "\n".join(lines)
-                             break
-                return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
             else:
-                 logging.error("Fallback extraction failed: No Python code block found in response.")
-                 logging.debug(f"Content without code block:\n{content}")
-                 raise Exception("The response does not contain the expected '### NARRATION:' delimiter or a valid Python code block.")
     else:
-        logging.error("Error generating video content. No response received from Gemini.")
         raise Exception("Error generating video content. No response received.")

 import os
 import pathlib
 import logging
+from pydantic import BaseModel
 load_dotenv()
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+class ManimOutput(BaseModel):
+    manim_code: str
+    narration: str
+SYSTEM_PROMPT = """You are an expert Manim programmer specializing in creating visually striking 60-second animations based on user prompts or documents, strictly following Manim Community v0.19.0 standards. Your output MUST be a JSON object conforming to the provided schema.
+CRITICAL TIMING REQUIREMENTS:
+- **Total Duration:** Exactly 60 seconds (1 minute)
+- **Narration:** Exactly 150-160 words (average speaking pace: 2.5 words per second)
+- **Animation Structure:** Use this timing framework:
+  * Introduction: 8-10 seconds
+  * Main content: 40-45 seconds (3-4 major segments)
+  * Conclusion/summary: 7-10 seconds
+- **Synchronization:** Each narration sentence should correspond to 3-5 seconds of animation
 Core Requirements:
+- **API Version:** Use only Manim Community v0.19.0 API
+- **Vectors & Math:** Use 3D vectors (np.array([x, y, 0])) and ensure correct math operations
+- **Matrix Visualization:** Use MathTex for matrices: r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'
+- **Star Usage:** Use Star(n=5, ...) not n_points
+- **Error Prevention:** Always validate Scene class exists; avoid 3D scenes
+- **Visual Style:** Create vibrant, dynamic animations with smooth transitions
+- **Output Format:** JSON with "manim_code" and "narration" keys
 """
+# Detailed Instructions
 base_prompt_instructions = (
+    "\nSTRICT TIMING REQUIREMENTS:"
+    "\n1. **Video Duration:** Exactly 60 seconds total"
+    "\n2. **Narration Constraints:**"
+    "\n   - Exactly 150-160 words (no more, no less)"
+    "\n   - Speaking pace: 2.5 words per second"
+    "\n   - Use short, clear sentences (8-12 words each)"
+    "\n   - Include natural pauses between major concepts"
+    "\n3. **Animation Timing Structure:**"
+    "\n   - Use self.wait() to match narration pauses"
+    "\n   - run_time in self.play() should match sentence duration"
+    "\n   - Fade out elements after 3-5 seconds to avoid clutter"
+    "\n   - Example timing: self.play(Create(obj), run_time=3), self.wait(1)"
+    "\nTECHNICAL REQUIREMENTS:"
+    "\n4. Use only Manim Community v0.19.0 API"
+    "\n5. Vector operations (3D vectors): np.array([x, y, 0])"
+    "\n6. Matrix display: MathTex(r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}')"
+    "\n7. Verified methods only: Create(), Write(), Transform(), FadeIn(), FadeOut(), "
+    "\n   Add(), Remove(), MoveAlongPath(), Rotating(), Circumscribe(), Indicate(), "
+    "\n   FocusOn(), Shift(), Scale(), MoveTo(), NextTo(), Axes(), Plot(), LineGraph(), "
+    "\n   BarChart(), Dot(), Line(), Arrow(), Text(), Tex(), MathTex(), VGroup()"
+    "\n8. Star shapes: Star(n=5, ...) not n_points"
+    "\n9. NO image imports or 3D scenes"
+    "\n10. There is no .to_center() method so please don't use that"
+    "\nVISUAL & CONTENT GUIDELINES:"
+    "\n10. Create 4-5 distinct visual segments matching narration flow"
+    "\n11. Use vibrant colors and smooth transitions"
+    "\n12. Fade out text/objects when no longer needed"
+    "\n13. Include interactive elements: arrows, labels, highlights"
+    "\n14. Validate all objects before animation calls"
+    "\n15. Use longer run_times (4-6s) for complex animations, shorter (2-3s) for simple ones"
+    "\nCODE STRUCTURE TEMPLATE:"
+    "\n16. Always follow this timing pattern:"
+    "\n    ```python"
+    "\n    class VideoScene(Scene):"
+    "\n        def construct(self):"
+    "\n            # Intro (8-10s): Title + brief setup"
+    "\n            title = Text('Title')"
+    "\n            self.play(Write(title), run_time=3)"
+    "\n            self.wait(2)  # Pause for narration"
+    "\n            self.play(FadeOut(title), run_time=2)"
+    "\n            "
+    "\n            # Main content (40-45s): 3-4 segments"
+    "\n            # Segment 1 (10-12s)"
+    "\n            # Segment 2 (10-12s)  "
+    "\n            # Segment 3 (10-12s)"
+    "\n            # Segment 4 (8-10s)"
+    "\n            "
+    "\n            # Conclusion (7-10s): Summary + fade out"
+    "\n    ```"
+    "\nNARRATION STRUCTURE:"
+    "\n17. Follow this word count breakdown:"
+    "\n    - Introduction: 15-25 words (8-10 seconds)"
+    "\n    - Main content: 70-85 words (36-40 seconds)"
+    "\n    - Conclusion: 20-25 words (8-10 seconds)"
+    "\n    - Natural pauses: 3-5 seconds total"
+    "\n18. Use active voice, present tense"
+    "\n19. Include transition phrases: 'Now let's see...', 'Next, we'll explore...'"
+    "\n20. End with a strong concluding statement"
+    "\nQUALITY ASSURANCE:"
+    "\n21. Count words in narration before finalizing (must be 120-150)"
+    "\n22. Calculate total animation time (self.play + self.wait = 60s)"
+    "\n23. Ensure Scene class exists and imports are correct"
+    "\n24. Test that all animation objects are valid before use"
+    "\n25. No broadcasting errors in vector operations"
+    "\n26. Distinct start/end points for arrows to prevent normalization errors"
+)
 def load_manim_examples():
     if not guide_path.exists():
         logging.warning(f"Manim examples guide not found at {guide_path}")
         return ""
     logging.info(f"Loading Manim examples from {guide_path}")
     return guide_path.read_text(encoding="utf-8")
     if not api_key:
         logging.error("GEMINI_API_KEY not found in environment variables")
         raise Exception("GEMINI_API_KEY not found in environment variables")
     if not idea and not pdf_path:
         raise ValueError("Either an idea or a pdf_path must be provided.")
     if idea and pdf_path:
     manim_examples = load_manim_examples()
     if manim_examples:
+        examples_prompt = (
+            "Below are examples of Manim code that demonstrate proper usage patterns. Use these as reference when generating your animation:\n\n"
+            + manim_examples
+        )
         contents.append(examples_prompt)
         logging.info("Added Manim examples from guide.md to prime the model")
     else:
     if pdf_path:
         pdf_file_path = pathlib.Path(pdf_path)
         if not pdf_file_path.exists():
+            logging.error(f"PDF file not found at: {pdf_path}")
+            raise FileNotFoundError(f"PDF file not found at: {pdf_path}")
         logging.info(f"Reading PDF: {pdf_path}")
         pdf_data = pdf_file_path.read_bytes()
+        pdf_part = genai_types.Part.from_bytes(
+            data=pdf_data, mime_type="application/pdf"
+        )
         contents.append(pdf_part)
         user_prompt_text = f"Create a 30-second Manim video script summarizing the key points or illustrating a core concept from the provided PDF document. {base_prompt_instructions}"
     logging.info("Sending request to Gemini API...")
     try:
         generation_config = genai_types.GenerateContentConfig(
+            response_mime_type="application/json",
+            response_schema=ManimOutput,
+            system_instruction=SYSTEM_PROMPT,
+        )
         response = client.models.generate_content(
+            model="gemini-2.5-flash", contents=contents, config=generation_config
+        )
     except Exception as e:
         logging.exception(f"Error calling Gemini API: {e}")
         raise Exception(f"Error calling Gemini API: {e}")
     if response:
         try:
+            parsed_output = response.parsed
+            if not parsed_output or not isinstance(parsed_output, ManimOutput):
+                logging.error("Failed to parse structured output from Gemini.")
+                raise Exception("Failed to parse structured output from Gemini.")
+            manim_code = parsed_output.manim_code
+            narration = parsed_output.narration
+            logging.info("Successfully parsed structured output from Gemini.")
             if "from manim import *" not in manim_code:
+                logging.warning("Adding missing 'from manim import *'.")
+                manim_code = "from manim import *\nimport numpy as np\n" + manim_code
             elif "import numpy as np" not in manim_code:
+                logging.warning("Adding missing 'import numpy as np'.")
+                lines = manim_code.splitlines()
+                for i, line in enumerate(lines):
+                    if "from manim import *" in line:
+                        lines.insert(i + 1, "import numpy as np")
+                        manim_code = "\n".join(lines)
+                        break
             return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
+        except (ValueError, AttributeError) as e:
+            logging.warning(
+                f"Could not parse the response. Error: {e}. Response details:"
+            )
+            logging.warning(response)
+            if response.prompt_feedback and response.prompt_feedback.block_reason:
+                logging.error(
+                    f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
+                )
+                raise Exception(
+                    f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
+                )
             else:
+                logging.error(
+                    "Failed to generate content. The response was empty or malformed."
+                )
+                raise Exception(
+                    "Failed to generate content. The response was empty or malformed."
+                )
     else:
+        logging.error(
+            "Error generating video content. No response received from Gemini."
+        )
         raise Exception("Error generating video content. No response received.")

src/app.py CHANGED Viewed

@@ -9,19 +9,22 @@ from api.fallback_gemini import fix_manim_code
 from services.manim_service import create_manim_video
 from services.tts_service import generate_audio
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 def main():
     st.title("Manimator")
-    st.write("Generate videos from text ideas or PDF files, You can also just paste arxiv links ;p")
     input_type = st.radio("Choose input type:", ("Text Idea", "Upload PDF"))
     idea = None
     uploaded_file = None
-    pdf_path = None
     original_context = ""
-    audio_file = None
-    current_audio_file = None
     if input_type == "Text Idea":
         idea = st.text_area("Enter your idea:")
         if idea:
@@ -32,156 +35,182 @@ def main():
             original_context = f"Summary/concept from PDF: {uploaded_file.name}"
     if st.button("Generate Video"):
-        temp_pdf_file = None
         video_data = None
         script = None
-        audio_file = None
         final_video = None
-        max_retries = 1
         try:
             if input_type == "Text Idea" and idea:
                 with st.spinner("Generating initial script and code from idea..."):
-                    logging.info(f"Generating video from idea: {idea[:50]}...")
                     video_data, script = generate_video(idea=idea)
             elif input_type == "Upload PDF" and uploaded_file is not None:
                 with st.spinner("Generating initial script and code from PDF..."):
-                    logging.info(f"Generating video from PDF: {uploaded_file.name}")
-                    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
                         temp_pdf.write(uploaded_file.getvalue())
                         pdf_path = temp_pdf.name
-                        temp_pdf_file = pdf_path
                     video_data, script = generate_video(pdf_path=pdf_path)
             else:
                 st.error("Please provide an idea or upload a PDF.")
                 return
             if not video_data or not script:
-                 st.error("Failed to generate initial script/code from Gemini.")
-                 return
-            with st.spinner("Generating audio..."):
-                 logging.info("Generating audio for the script.")
-                 try:
-                     audio_file = generate_audio(script)
-                 except ValueError as e:
-                     st.warning(f"Could not generate audio: {e}. Proceeding without audio.")
-                     audio_file = None
             current_manim_code = video_data["manim_code"]
             current_script = script
             current_audio_file = audio_file
             for attempt in range(max_retries + 1):
                 try:
                     with st.spinner(f"Attempt {attempt + 1}: Creating Manim video..."):
                         logging.info(f"Attempt {attempt + 1} to create Manim video.")
                         final_video = create_manim_video(
-                            {"manim_code": current_manim_code, "output_file": "output.mp4"},
                             current_manim_code,
-                            audio_file=current_audio_file
                         )
                     logging.info("Manim video creation successful.")
-                    break
-                except subprocess.CalledProcessError as e:
                     logging.error(f"Manim execution failed on attempt {attempt + 1}.")
-                    st.warning(f"Attempt {attempt + 1} failed. Manim error:\n```\n{e.stderr.decode() if e.stderr else 'No stderr captured.'}\n```")
                     if attempt < max_retries:
                         st.info("Attempting to fix the code using fallback...")
                         logging.info("Calling fallback Gemini to fix code.")
-                        error_message = e.stderr.decode() if e.stderr else "Manim execution failed without specific error output."
                         fixed_video_data, fixed_script = fix_manim_code(
                             faulty_code=current_manim_code,
-                            error_message=error_message,
-                            original_context=original_context
                         )
                         if fixed_video_data and fixed_script is not None:
-                            st.success("Fallback successful! Retrying video generation with fixed code.")
                             logging.info("Fallback successful. Received fixed code.")
                             current_manim_code = fixed_video_data["manim_code"]
                             if fixed_script != current_script and fixed_script:
-                                st.info("Narration script was updated by the fallback. Regenerating audio...")
-                                logging.info("Regenerating audio for updated script.")
                                 current_script = fixed_script
                                 try:
-                                     current_audio_file = generate_audio(current_script)
-                                except ValueError as e:
-                                     st.warning(f"Could not generate audio for fixed script: {e}. Proceeding without audio.")
-                                     current_audio_file = None
-                            elif not fixed_script:
-                                 st.warning("Fallback provided code but no narration. Using original audio (if any).")
-                                 logging.warning("Fallback provided empty narration.")
-                                 current_script = ""
-                                 current_audio_file = None
                             else:
                                 logging.info("Fallback kept the original narration.")
                         else:
                             st.error("Fallback failed to fix the code. Stopping.")
-                            logging.error("Fallback failed to return valid code/script.")
                             final_video = None
                             break
                     else:
-                        st.error(f"Manim failed after {max_retries + 1} attempts. Could not generate video.")
-                        logging.error(f"Manim failed after {max_retries + 1} attempts.")
                         final_video = None
                 except Exception as e:
-                    st.error(f"An unexpected error occurred during video creation: {str(e)}")
-                    logging.exception("Unexpected error during create_manim_video call.")
                     final_video = None
                     break
             if final_video and os.path.exists(final_video):
                 st.success("Video generated successfully!")
                 st.video(final_video)
                 st.write("Generated Narration:")
-                st.text_area("Narration", current_script if current_script is not None else "Narration could not be generated.", height=150)
             elif not final_video:
-                 pass
             else:
                 st.error("Error: Generated video file not found after processing.")
                 logging.error(f"Final video file '{final_video}' not found.")
-        except FileNotFoundError as e:
-             st.error(f"Error: A required file was not found. {str(e)}")
-             logging.exception("FileNotFoundError during generation process.")
-        except ValueError as e:
-             st.error(f"Input Error: {str(e)}")
-             logging.exception("ValueError during generation process.")
         except Exception as e:
-            st.error(f"An unexpected error occurred: {str(e)}")
             logging.exception("Unhandled exception in main generation block.")
         finally:
-            if temp_pdf_file and os.path.exists(temp_pdf_file):
-                try:
-                    os.remove(temp_pdf_file)
-                    logging.info(f"Removed temporary file: {temp_pdf_file}")
-                except OSError as e:
-                    logging.error(f"Error removing temporary file {temp_pdf_file}: {e}")
-            if audio_file and os.path.exists(audio_file) and audio_file != current_audio_file:
-                 try:
-                     os.remove(audio_file)
-                     logging.info(f"Removed temporary audio file: {audio_file}")
-                 except OSError as e:
-                     logging.error(f"Error removing temporary audio file {audio_file}: {e}")
-            if current_audio_file and os.path.exists(current_audio_file):
-                 try:
-                     os.remove(current_audio_file)
-                     logging.info(f"Removed potentially updated temporary audio file: {current_audio_file}")
-                 except OSError as e:
-                     logging.error(f"Error removing potentially updated temporary audio file {current_audio_file}: {e}")
-    st.markdown("<br><br>", unsafe_allow_html=True)
     st.markdown("---")
-    st.markdown("""
         ### Want to help improve this app?
         - Give good Manim Examples and make PRs in guide.md, find it in repo [GitHub](https://github.com/mostlykiguess/Manimator)
         - Report issues on [GitHub Issues](https://github.com/mostlykiguess/Manimator/issues)
         - Email problematic prompts to me
-        """)
 if __name__ == "__main__":
     main()

 from services.manim_service import create_manim_video
 from services.tts_service import generate_audio
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 def main():
     st.title("Manimator")
+    st.write(
+        "Generate videos from text ideas or PDF files, You can also just paste arxiv links ;p"
+    )
     input_type = st.radio("Choose input type:", ("Text Idea", "Upload PDF"))
     idea = None
     uploaded_file = None
     original_context = ""
     if input_type == "Text Idea":
         idea = st.text_area("Enter your idea:")
         if idea:
             original_context = f"Summary/concept from PDF: {uploaded_file.name}"
     if st.button("Generate Video"):
+        files_to_cleanup = set()
         video_data = None
         script = None
         final_video = None
+        max_retries = 2  # retries for fallback
         try:
+            # Step 1: Generate initial script and code from Gemini
             if input_type == "Text Idea" and idea:
                 with st.spinner("Generating initial script and code from idea..."):
                     video_data, script = generate_video(idea=idea)
             elif input_type == "Upload PDF" and uploaded_file is not None:
                 with st.spinner("Generating initial script and code from PDF..."):
+                    with tempfile.NamedTemporaryFile(
+                        delete=False, suffix=".pdf"
+                    ) as temp_pdf:
                         temp_pdf.write(uploaded_file.getvalue())
                         pdf_path = temp_pdf.name
+                        files_to_cleanup.add(pdf_path)
                     video_data, script = generate_video(pdf_path=pdf_path)
             else:
                 st.error("Please provide an idea or upload a PDF.")
                 return
             if not video_data or not script:
+                st.error("Failed to generate initial script/code from Gemini.")
+                return
+            # Step 2: Generate audio and subtitles from the script
+            with st.spinner("Generating audio and subtitles..."):
+                logging.info("Generating audio and subtitles for the script.")
+                try:
+                    # Unpack both audio and subtitle file paths
+                    audio_file, subtitle_file = generate_audio(script)
+                    if audio_file:
+                        files_to_cleanup.add(audio_file)
+                    if subtitle_file:
+                        files_to_cleanup.add(subtitle_file)
+                except ValueError as e:
+                    st.warning(
+                        f"Could not generate audio: {e}. Proceeding without audio/subtitles."
+                    )
+                    audio_file, subtitle_file = None, None
             current_manim_code = video_data["manim_code"]
             current_script = script
             current_audio_file = audio_file
+            current_subtitle_file = subtitle_file
+            # Step 3: Attempt to render the video, with fallback retries
             for attempt in range(max_retries + 1):
                 try:
                     with st.spinner(f"Attempt {attempt + 1}: Creating Manim video..."):
                         logging.info(f"Attempt {attempt + 1} to create Manim video.")
                         final_video = create_manim_video(
+                            video_data,
                             current_manim_code,
+                            audio_file=current_audio_file,
+                            subtitle_file=current_subtitle_file,
                         )
                     logging.info("Manim video creation successful.")
+                    break  # Exit the loop on success
+                except (subprocess.CalledProcessError, FileNotFoundError) as e:
+                    error_output = e.stderr if hasattr(e, "stderr") else str(e)
                     logging.error(f"Manim execution failed on attempt {attempt + 1}.")
+                    st.warning(
+                        f"Attempt {attempt + 1} failed. Manim error:\n```\n{error_output}\n```"
+                    )
                     if attempt < max_retries:
                         st.info("Attempting to fix the code using fallback...")
                         logging.info("Calling fallback Gemini to fix code.")
                         fixed_video_data, fixed_script = fix_manim_code(
                             faulty_code=current_manim_code,
+                            error_message=error_output,
+                            original_context=original_context,
                         )
                         if fixed_video_data and fixed_script is not None:
+                            st.success(
+                                "Fallback successful! Retrying video generation with fixed code."
+                            )
                             logging.info("Fallback successful. Received fixed code.")
                             current_manim_code = fixed_video_data["manim_code"]
+                            # If narration changed, regenerate audio and subtitles
                             if fixed_script != current_script and fixed_script:
+                                st.info(
+                                    "Narration script was updated. Regenerating audio and subtitles..."
+                                )
                                 current_script = fixed_script
                                 try:
+                                    new_audio, new_subtitle = generate_audio(
+                                        current_script
+                                    )
+                                    if new_audio:
+                                        files_to_cleanup.add(new_audio)
+                                    if new_subtitle:
+                                        files_to_cleanup.add(new_subtitle)
+                                    current_audio_file = new_audio
+                                    current_subtitle_file = new_subtitle
+                                except ValueError as audio_e:
+                                    st.warning(
+                                        f"Could not generate new audio: {audio_e}."
+                                    )
+                                    current_audio_file, current_subtitle_file = (
+                                        None,
+                                        None,
+                                    )
                             else:
                                 logging.info("Fallback kept the original narration.")
                         else:
                             st.error("Fallback failed to fix the code. Stopping.")
                             final_video = None
                             break
                     else:
+                        st.error(
+                            f"Manim failed after {max_retries + 1} attempts. Could not generate video."
+                        )
                         final_video = None
                 except Exception as e:
+                    st.error(
+                        f"An unexpected error occurred during video creation: {str(e)}"
+                    )
+                    logging.exception(
+                        "Unexpected error during create_manim_video call."
+                    )
                     final_video = None
                     break
+            # Step 4: Display the final result
             if final_video and os.path.exists(final_video):
                 st.success("Video generated successfully!")
                 st.video(final_video)
                 st.write("Generated Narration:")
+                st.text_area(
+                    "Narration",
+                    current_script if current_script else "No narration was generated.",
+                    height=150,
+                )
+            elif not final_video and attempt >= max_retries:
+                # This message is shown if all retries failed
+                st.error("Could not generate the video after multiple attempts.")
             elif not final_video:
+                # A general failure message
+                st.error("Video generation was unsuccessful.")
             else:
                 st.error("Error: Generated video file not found after processing.")
                 logging.error(f"Final video file '{final_video}' not found.")
         except Exception as e:
+            st.error(f"An unexpected and critical error occurred: {str(e)}")
             logging.exception("Unhandled exception in main generation block.")
         finally:
+            # Step 5: Clean up all generated temporary files
+            logging.info(f"Cleaning up {len(files_to_cleanup)} temporary files.")
+            for f_path in files_to_cleanup:
+                if f_path and os.path.exists(f_path):
+                    try:
+                        os.remove(f_path)
+                        logging.info(f"Removed temporary file: {f_path}")
+                    except OSError as e:
+                        logging.error(f"Error removing temporary file {f_path}: {e}")
+    st.markdown("<br><br>", unsafe_allow_html=True)
     st.markdown("---")
+    st.markdown(
+        """
         ### Want to help improve this app?
         - Give good Manim Examples and make PRs in guide.md, find it in repo [GitHub](https://github.com/mostlykiguess/Manimator)
         - Report issues on [GitHub Issues](https://github.com/mostlykiguess/Manimator/issues)
         - Email problematic prompts to me
+        """
+    )
 if __name__ == "__main__":
     main()

src/services/manim_service.py CHANGED Viewed

@@ -3,90 +3,153 @@ import subprocess
 import os
 import glob
 import logging
 def get_scene_name(manim_code):
-    match = re.search(r'class\s+(\w+)\s*\(\s*Scene\s*\)', manim_code)
     if match:
         return match.group(1)
     raise ValueError("No Scene class found in generated code")
-def create_manim_video(video_data, manim_code, audio_file=None):
     logging.info("Starting to create Manim video")
-    with open("generated_video.py", "w") as f:
-        manim_code_clean = re.sub(r"```python", "", manim_code)
-        manim_code_clean = manim_code_clean.replace("```", "").strip()
-        f.write(manim_code_clean)
-    scene_name = get_scene_name(manim_code_clean)
     logging.info(f"Identified scene name: {scene_name}")
     command = ["manim", "-qh", "generated_video.py", scene_name]
     logging.info(f"Running Manim with command: {' '.join(command)}")
-    subprocess.run(command, check=True)
-    search_pattern = os.path.join("media", "videos", "generated_video", "1080p60", f"{scene_name}.mp4")
-    if not os.path.exists(search_pattern):
-        logging.error(f"No rendered video found at: {search_pattern}")
-        raise Exception(f"No rendered video found for scene {scene_name}")
-    output_video = search_pattern
     final_output = "final_output.mp4"
     if audio_file and os.path.exists(audio_file):
-        logging.info(f"Merging video with audio file: {audio_file}")
-        video_duration_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration",
-                             "-of", "default=noprint_wrappers=1:nokey=1", output_video]
-        audio_duration_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration",
-                             "-of", "default=noprint_wrappers=1:nokey=1", audio_file]
-        video_duration = float(subprocess.check_output(video_duration_cmd).decode('utf-8').strip())
-        audio_duration = float(subprocess.check_output(audio_duration_cmd).decode('utf-8').strip())
-        logging.info(f"Video duration: {video_duration}s, Audio duration: {audio_duration}s")
         if audio_duration > video_duration:
-            logging.info("Audio is longer than video, extending video duration")
-            extended_video = "extended_video.mp4"
-            padding_time = audio_duration - video_duration
             extend_cmd = [
-                "ffmpeg", "-y",
-                "-i", output_video,
-                "-f", "lavfi", "-i", "color=black:s=1920x1080:r=60",
-                "-filter_complex", f"[0:v][1:v]concat=n=2:v=1:a=0[outv]",
-                "-map", "[outv]",
-                "-c:v", "libx264",
-                "-t", str(audio_duration),
-                extended_video
             ]
             logging.info(f"Extending video with command: {' '.join(extend_cmd)}")
-            subprocess.run(extend_cmd, check=True)
-            output_video = extended_video
-        merge_cmd = [
-            "ffmpeg", "-y",
-            "-i", output_video,
-            "-i", audio_file,
-            "-c:v", "copy",
-            "-c:a", "aac",
-            "-map", "0:v:0",
-            "-map", "1:a:0",
-            final_output
-        ]
-        logging.info(f"Merging with command: {' '.join(merge_cmd)}")
-        subprocess.run(merge_cmd, check=True)
-        output_video = final_output
-        if os.path.exists("extended_video.mp4"):
-            os.remove("extended_video.mp4")
-            logging.info("Removed temporary extended video file")
     if os.path.exists("generated_video.py"):
         os.remove("generated_video.py")
         logging.info("Removed generated_video.py")
-    logging.info(f"Final video created at: {output_video}")
-    return output_video

 import os
 import glob
 import logging
+import platform
 def get_scene_name(manim_code):
+    """Extracts the scene class name from Manim code."""
+    # This regex looks for 'class YourSceneName(Scene):' or 'class YourSceneName(ThreeDScene):'
+    match = re.search(
+        r"class\s+(\w+)\s*\(\s*(?:ThreeD|Multi)?[Ss]cene\s*\)", manim_code
+    )
     if match:
         return match.group(1)
     raise ValueError("No Scene class found in generated code")
+def sanitize_path_for_ffmpeg(path: str) -> str:
+    if platform.system() == "Windows":
+        # For Windows
+        return path.replace("\\", "\\\\").replace(":", "\\:")
+    else:
+        # For Linux/macOS
+        return (
+            path.replace("'", "'\\''")
+            .replace(":", "\\:")
+            .replace(",", "\\,")
+            .replace("[", "\\[")
+            .replace("]", "\\]")
+        )
+def create_manim_video(video_data, manim_code, audio_file=None, subtitle_file=None):
     logging.info("Starting to create Manim video")
+    with open("generated_video.py", "w", encoding="utf-8") as f:
+        f.write(manim_code)
+    scene_name = get_scene_name(manim_code)
     logging.info(f"Identified scene name: {scene_name}")
     command = ["manim", "-qh", "generated_video.py", scene_name]
     logging.info(f"Running Manim with command: {' '.join(command)}")
+    # Use capture_output=True to get stderr for better error reporting
+    manim_process = subprocess.run(command, check=True, capture_output=True, text=True)
+    if manim_process.returncode != 0:
+        logging.error(f"Manim failed with stderr:\n{manim_process.stderr}")
+        raise subprocess.CalledProcessError(
+            manim_process.returncode, command, stderr=manim_process.stderr
+        )
+    video_path = os.path.join(
+        "media", "videos", "generated_video", "1080p60", f"{scene_name}.mp4"
+    )
+    if not os.path.exists(video_path):
+        logging.error(f"No rendered video found at: {video_path}")
+        raise FileNotFoundError(f"No rendered video found for scene {scene_name}")
+    input_video = video_path
     final_output = "final_output.mp4"
+    extended_video_temp = "extended_video.mp4"
     if audio_file and os.path.exists(audio_file):
+        logging.info(f"Audio file found: {audio_file}")
+        video_duration_cmd = [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            input_video,
+        ]
+        audio_duration_cmd = [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            audio_file,
+        ]
+        video_duration = float(
+            subprocess.check_output(video_duration_cmd).decode("utf-8").strip()
+        )
+        audio_duration = float(
+            subprocess.check_output(audio_duration_cmd).decode("utf-8").strip()
+        )
+        logging.info(
+            f"Video duration: {video_duration}s, Audio duration: {audio_duration}s"
+        )
+        # If audio is longer, extend the video with a freeze frame of the last frame
         if audio_duration > video_duration:
+            logging.info(
+                "Audio is longer than video, extending video with freeze frame."
+            )
             extend_cmd = [
+                "ffmpeg",
+                "-y",
+                "-i",
+                input_video,
+                "-vf",
+                f"tpad=stop_mode=clone:stop_duration={audio_duration - video_duration}",
+                "-c:v",
+                "libx264",
+                extended_video_temp,
             ]
             logging.info(f"Extending video with command: {' '.join(extend_cmd)}")
+            subprocess.run(extend_cmd, check=True, capture_output=True, text=True)
+            input_video = extended_video_temp  # The extended video is now our input
+    # merge
+    merge_cmd = ["ffmpeg", "-y", "-i", input_video]
+    if audio_file and os.path.exists(audio_file):
+        merge_cmd.extend(["-i", audio_file])
+    filter_complex = []
+    maps = ["-map", "0:v:0"]
+    if audio_file and os.path.exists(audio_file):
+        maps.extend(["-map", "1:a:0"])
+    # Add subtitle
+    if subtitle_file and os.path.exists(subtitle_file):
+        sanitized_path = sanitize_path_for_ffmpeg(os.path.abspath(subtitle_file))
+        filter_complex.append(f"ass='{sanitized_path}'")
+    if filter_complex:
+        merge_cmd.extend(["-vf", ",".join(filter_complex)])
+    merge_cmd.extend(maps)
+    merge_cmd.extend(["-c:v", "libx264", "-c:a", "aac", "-shortest", final_output])
+    logging.info(f"Merging with final command: {' '.join(merge_cmd)}")
+    subprocess.run(merge_cmd, check=True, capture_output=True, text=True)
+    if os.path.exists(extended_video_temp):
+        os.remove(extended_video_temp)
+        logging.info("Removed temporary extended video file.")
     if os.path.exists("generated_video.py"):
         os.remove("generated_video.py")
         logging.info("Removed generated_video.py")
+    logging.info(f"Final video created at: {final_output}")
+    return final_output

src/services/subtitle_service.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import os
+from typing import List, Dict
+def _ass_time(t: float) -> str:
+    """Helper function to format time for ASS files (H:MM:SS.cs)."""
+    if t < 0:
+        t = 0
+    h = int(t // 3600)
+    m = int((t % 3600) // 60)
+    s = int(t % 60)
+    cs = int((t - int(t)) * 100)
+    return f"{h}:{m:02}:{s:02}.{cs:02}"
+def generate_subtitle_file(
+    tokens_with_timestamps: List[Dict], output_audio_path: str
+) -> str:
+    """
+    Generates an ASS subtitle file from tokens with absolute timestamps.
+    Args:
+        tokens_with_timestamps (list): A list of token dictionaries with 'text', 'start', and 'end' keys.
+        output_audio_path (str): The path to the audio file, used to name the subtitle file.
+    Returns:
+        str: The path to the generated subtitle file.
+    """
+    subtitle_file_path = os.path.splitext(output_audio_path)[0] + ".ass"
+    with open(subtitle_file_path, "w", encoding="utf-8") as f:
+        # Write standard ASS header
+        f.write("[Script Info]\n")
+        f.write("Title: Generated by Manimator\n")
+        f.write("ScriptType: v4.00+\n\n")
+        f.write("[V4+ Styles]\n")
+        f.write(
+            "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n"
+        )
+        f.write(
+            "Style: Default,Arial,24,&H00FFFFFF,&H000000FF,&H003C3C3C,&H00000000,0,0,0,0,100,100,0,0,1,1.5,1,2,10,10,15,1\n\n"
+        )
+        f.write("[Events]\n")
+        f.write(
+            "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"
+        )
+        # Write dialogue entries word-by-word
+        for token in tokens_with_timestamps:
+            start_time = token.get("start")
+            end_time = token.get("end")
+            text = token.get("text", "").strip()
+            if (
+                start_time is not None
+                and end_time is not None
+                and text
+                and end_time > start_time
+            ):
+                start_formatted = _ass_time(start_time)
+                end_formatted = _ass_time(end_time)
+                text = text.replace(",", "\\,")
+                f.write(
+                    f"Dialogue: 0,{start_formatted},{end_formatted},Default,,0,0,0,,{text}\n"
+                )
+    return subtitle_file_path

src/services/tts_service.py CHANGED Viewed

@@ -1,44 +1,75 @@
 from kokoro import KPipeline
 import soundfile as sf
 import os
-from typing import Optional
-class TTSService:
-    def __init__(self, lang_code: str = 'a'):
-        """Initialize the TTS service with Kokoro"""
-        self.pipeline = KPipeline(lang_code=lang_code)
-        self.voice_presets = {
-            'en-us': 'af_heart',  # American English
-            'en-uk': 'bf_heart',  # British English
-            'es': 'es_heart',     # Spanish
-            'fr': 'fr_heart',     # French
-            'hi': 'hi_heart',     # Hindi
-            'it': 'it_heart',     # Italian
-            'pt-br': 'pt_heart',  # Brazilian Portuguese
-            'ja': 'ja_heart',     # Japanese
-            'zh': 'zh_heart',     # Mandarin Chinese
-        }
-    def generate(self, text: str, voice: str = 'en-us', output_path: Optional[str] = None) -> str:
-        if not text:
-            raise ValueError("Text cannot be empty")
-        if voice not in self.voice_presets:
-            raise ValueError(f"Unsupported voice: {voice}. Available voices: {list(self.voice_presets.keys())}")
-        if output_path is None:
-            output_path = f'output_{voice}.wav'
-        generator = self.pipeline(text, voice=self.voice_presets[voice], speed=1, split_pattern=r'\n+')
-        audio_data = []
-        for _, _, audio in generator:
-            audio_data.extend(audio)
-        sf.write(output_path, audio_data, 24000)
-        return output_path
-def generate_audio(text: str, voice: str = 'en-us') -> str:
-    """Generate audio from text using Kokoro TTS"""
-    service = TTSService()
-    return service.generate(text, voice)

 from kokoro import KPipeline
 import soundfile as sf
 import os
+import numpy as np
+from typing import Optional, Tuple, List, Dict
+from .subtitle_service import generate_subtitle_file
+def generate_audio(
+    text: str, voice_lang: str = "a", output_filename: str = "output_audio.wav"
+) -> Tuple[Optional[str], Optional[str]]:
+    """
+    Generate audio from text using Kokoro TTS and create a synchronized subtitle file.
+    Args:
+        text (str): The text to synthesize.
+        voice_lang (str): The language code for the voice (e.g., 'a' for American English).
+        output_filename (str): The desired output filename for the audio.
+    Returns:
+        A tuple containing the path to the audio file and the subtitle file, or (None, None) on failure.
+    """
+    if not text.strip():
+        raise ValueError("Text for TTS cannot be empty.")
+    try:
+        pipeline = KPipeline(lang_code=voice_lang)
+        voice_preset = "af_heart"
+        audio_segments = []
+        all_tokens: List[Dict] = []
+        current_time_offset = 0.0
+        rate = 24000
+        for result in pipeline(
+            text, voice=voice_preset, speed=1.0, split_pattern=r"\n+"
+        ):
+            audio_segments.append(result.audio)
+            chunk_duration = len(result.audio) / rate
+            if hasattr(result, "tokens"):
+                for token in result.tokens:
+                    start_ts = token.start_ts if token.start_ts is not None else 0
+                    end_ts = (
+                        token.end_ts if token.end_ts is not None else chunk_duration
+                    )
+                    all_tokens.append(
+                        {
+                            "text": token.text.strip(),
+                            "start": current_time_offset + start_ts,
+                            "end": current_time_offset + end_ts,
+                        }
+                    )
+            current_time_offset += chunk_duration
+        if not audio_segments:
+            return None, None
+        final_audio = np.concatenate(audio_segments)
+        sf.write(output_filename, final_audio, rate)
+        subtitle_file_path = generate_subtitle_file(all_tokens, output_filename)
+        return output_filename, subtitle_file_path
+    except Exception as e:
+        logging.error(
+            f"An error occurred during TTS or subtitle generation: {e}", exc_info=True
+        )
+        if os.path.exists(output_filename):
+            os.remove(output_filename)
+        return None, None

src/tests/test_fallback.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import unittest
+from src.api.fallback_gemini import fix_manim_code
+class TestFallbackOnly(unittest.TestCase):
+    def test_fallback_with_broken_code(self):
+        broken_code = "from manim import *\nclass Broken(Scene):\n    def construct(self):\n        self.play(Write(Text('Oops!'))"
+        error_message = "SyntaxError: unexpected EOF while parsing"
+        original_context = "Test fallback with broken code"
+        fixed_video_data, fixed_script = fix_manim_code(
+            faulty_code=broken_code,
+            error_message=error_message,
+            original_context=original_context
+        )
+        print("Fixed video data:", fixed_video_data)
+        print("Fixed script:", fixed_script)
+        self.assertTrue(fixed_video_data is not None or fixed_script is None)
+if __name__ == "__main__":
+    unittest.main()