MostlyKIGuess commited on
Commit
2e31ab2
·
1 Parent(s): f10bf2f

Refactor Gemini API integration and enhance video generation workflow

Browse files

- Updated the `gemini.py` file to include a structured output model
using Pydantic for better validation and error handling.
- Improved the system prompt to enforce stricter timing with wording!!
GGs to claude prompting guide
- use first gemini as structured, google enhanced fallback with non
strucutred because get fucked by gemini API ,you can't use tool calling
on structured output
- add a new subtitle_service.py for generating ASS subtitle files from
timestamps.
- app.py now handles the subtitle service!!!
- add subtitle support to manim service
- change tts_service.py to generate synchronized audio and subtitles
- test fallback thingy usingthe test fallback!

src/api/fallback_gemini.py CHANGED
@@ -3,11 +3,42 @@ import re
3
  from google import genai
4
  from google.genai import types as genai_types
5
  import logging
6
- from .gemini import SYSTEM_PROMPT, base_prompt_instructions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
 
10
  def fix_manim_code(faulty_code: str, error_message: str, original_context: str):
 
 
 
11
  api_key = os.getenv("GEMINI_API_KEY")
12
  if not api_key:
13
  logging.error("GEMINI_API_KEY not found in environment variables for fallback.")
@@ -15,99 +46,182 @@ def fix_manim_code(faulty_code: str, error_message: str, original_context: str):
15
 
16
  client = genai.Client(api_key=api_key)
17
 
18
- fix_prompt_text = (
19
- f"The following Manim code, intended to '{original_context}', failed with an error.\n\n"
20
- "### FAULTY CODE:\n"
21
- f"```python\n{faulty_code}\n```\n\n"
22
- "### ERROR MESSAGE:\n"
23
- f"```\n{error_message}\n```\n\n"
24
- "### INSTRUCTIONS:\n"
25
- "1. Analyze the error message and the faulty code.\n"
26
- "2. Correct the code to fix the specific error reported.\n"
27
- "3. Ensure the corrected code still fulfills the original request and adheres strictly to *all* the requirements listed below.\n"
28
- "4. Pay close attention to vector dimensions, matrix operations, allowed Manim methods, and total duration (30 seconds).\n"
29
- "5. If the code logic changes significantly, update the narration accordingly.\n"
30
- "6. Return *only* the corrected code and narration using the '### MANIM CODE:' and '### NARRATION:' delimiters, just like the original request.\n\n"
31
- "### REQUIREMENTS (Apply these to the corrected code):\n"
32
- f"{base_prompt_instructions}"
33
- )
34
-
35
- contents = [fix_prompt_text]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  logging.info("Attempting to fix Manim code via fallback...")
38
  try:
 
 
39
  generation_config = genai_types.GenerateContentConfig(
40
- system_instruction=SYSTEM_PROMPT
 
 
41
  )
42
-
43
  response = client.models.generate_content(
44
- model="gemini-2.0-flash",
45
- contents=contents,
46
- config=generation_config
47
- )
48
  if response:
 
49
  try:
50
  content = response.text
51
  logging.info("Received response from fallback attempt.")
52
-
53
- if "### NARRATION:" in content:
54
- manim_code, narration = content.split("### NARRATION:", 1)
55
- manim_code = re.sub(r"```python", "", manim_code).replace("```", "").strip()
 
 
56
  narration = narration.strip()
57
 
58
  if "from manim import *" not in manim_code:
59
- logging.warning("Adding missing 'from manim import *' (fallback fix).")
60
- manim_code = "from manim import *\nimport numpy as np\n" + manim_code
 
 
 
 
61
  elif "import numpy as np" not in manim_code:
62
- logging.warning("Adding missing 'import numpy as np' (fallback fix).")
63
- lines = manim_code.splitlines()
64
- for i, line in enumerate(lines):
65
- if "from manim import *" in line:
66
- lines.insert(i + 1, "import numpy as np")
67
- manim_code = "\n".join(lines)
68
- break
69
-
70
- logging.info("Successfully parsed fixed code and narration from fallback.")
71
- return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
 
 
 
 
 
 
 
72
  else:
73
- logging.warning("Delimiter '### NARRATION:' not found in fallback response. Attempting fallback extraction.")
74
- code_match = re.search(r'```python(.*?)```', content, re.DOTALL)
 
 
75
  if code_match:
76
  manim_code = code_match.group(1).strip()
77
- narration_part = content.split('```', 2)[-1].strip()
78
  narration = narration_part if len(narration_part) > 20 else ""
79
  if not narration:
80
- logging.warning("Fallback narration extraction resulted in empty or very short text (fallback fix).")
 
 
81
  else:
82
- logging.info("Successfully parsed code and narration using fallback regex (fallback fix).")
 
 
83
 
84
  if "from manim import *" not in manim_code:
85
- logging.warning("Adding missing 'from manim import *' (fallback fix, regex path).")
86
- manim_code = "from manim import *\nimport numpy as np\n" + manim_code
 
 
 
 
87
  elif "import numpy as np" not in manim_code:
88
- logging.warning("Adding missing 'import numpy as np' (fallback fix, regex path).")
89
- lines = manim_code.splitlines()
90
- for i, line in enumerate(lines):
91
- if "from manim import *" in line:
92
- lines.insert(i + 1, "import numpy as np")
93
- manim_code = "\n".join(lines)
94
- break
95
-
96
- logging.info("Successfully parsed fixed code using fallback extraction.")
97
- return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
 
 
 
 
 
 
 
98
  else:
99
- logging.error("Fallback extraction failed: No Python code block found in fallback response.")
100
- logging.debug(f"Fallback content without code block:\n{content}")
101
- return None, None
 
 
 
 
102
 
103
  except ValueError:
104
  logging.error("Could not extract text from the fallback response.")
105
  if response.prompt_feedback and response.prompt_feedback.block_reason:
106
- logging.error(f"Fallback content generation blocked. Reason: {response.prompt_feedback.block_reason.name}")
 
 
107
  return None, None
108
  except Exception as e:
109
- logging.exception(f"Error processing fallback response: {e}")
110
- return None, None
111
  else:
112
  logging.error("No response received from Gemini during fallback attempt.")
113
  return None, None
 
3
  from google import genai
4
  from google.genai import types as genai_types
5
  import logging
6
+ from .gemini import base_prompt_instructions
7
+
8
+ logging.basicConfig(
9
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
10
+ )
11
+
12
+ FALLBACK_SYSTEM_PROMPT = """You are an expert Manim programmer specializing in fixing broken Manim code and creating visually striking 60-second animations, strictly following Manim Community v0.19.0 standards.
13
+
14
+ CRITICAL TIMING REQUIREMENTS:
15
+ - **Total Duration:** Exactly 60 seconds (1 minute)
16
+ - **Narration:** Exactly 150-160 words (average speaking pace: 2.5 words per second)
17
+ - **Animation Structure:** Use this timing framework:
18
+ * Introduction: 8-10 seconds
19
+ * Main content: 40-45 seconds (3-4 major segments)
20
+ * Conclusion/summary: 7-10 seconds
21
+ - **Synchronization:** Each narration sentence should correspond to 3-5 seconds of animation
22
+
23
+ Core Requirements:
24
+ - **API Version:** Use only Manim Community v0.19.0 API
25
+ - **Vectors & Math:** Use 3D vectors (np.array([x, y, 0])) and ensure correct math operations
26
+ - **Matrix Visualization:** Use MathTex for matrices: r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'
27
+ - **Star Usage:** Use Star(n=5, ...) not n_points
28
+ - **Error Prevention:** Always validate Scene class exists; avoid 3D scenes
29
+ - **Visual Style:** Create vibrant, dynamic animations with smooth transitions
30
+
31
+ IMPORTANT: Your response must be formatted with clear delimiters:
32
+ - Start Manim code with: ### MANIM CODE:
33
+ - Start narration with: ### NARRATION:
34
+ - End response after narration (no additional text)
35
+ """
36
 
 
37
 
38
  def fix_manim_code(faulty_code: str, error_message: str, original_context: str):
39
+ """
40
+ Enhanced fallback function with Google Search integration.
41
+ """
42
  api_key = os.getenv("GEMINI_API_KEY")
43
  if not api_key:
44
  logging.error("GEMINI_API_KEY not found in environment variables for fallback.")
 
46
 
47
  client = genai.Client(api_key=api_key)
48
 
49
+ # Enhanced fallback prompt with better structure and error analysis
50
+ fix_prompt_text = f"""
51
+ TASK: Fix the broken Manim code that failed with a specific error.
52
+
53
+ ### ORIGINAL REQUEST:
54
+ {original_context}
55
+
56
+ ### BROKEN MANIM CODE:
57
+ ```python
58
+ {faulty_code}
59
+ ```
60
+
61
+ ### ERROR ENCOUNTERED:
62
+ ```
63
+ {error_message}
64
+ ```
65
+
66
+ ### ANALYSIS INSTRUCTIONS:
67
+ 1. **Error Analysis**: Examine the error message carefully. Common issues include:
68
+ - Import errors (missing 'from manim import *' or 'import numpy as np')
69
+ - Scene class not found (class must inherit from Scene)
70
+ - Invalid Manim methods or syntax
71
+ - Vector dimension mismatches (use np.array([x, y, 0]))
72
+ - Animation object validation errors
73
+ - Timing issues (ensure total duration = 60 seconds)
74
+
75
+ 2. **Google Search**: Use Google Search to find:
76
+ - Recent Manim Community v0.19.0 API changes
77
+ - Specific error message solutions
78
+ - Updated method signatures or deprecated features
79
+ - Working examples of similar animations
80
+
81
+ 3. **Code Fixing Strategy**:
82
+ - Keep the original animation concept intact
83
+ - Fix only what's necessary to resolve the error
84
+ - Maintain 60-second duration and 120-150 word narration
85
+ - Ensure all imports are present
86
+ - Validate Scene class exists and is properly named
87
+ - Use only verified Manim methods from the allowed list
88
+
89
+ 4. **Quality Checks**:
90
+ - Verify vector operations use 3D format: np.array([x, y, 0])
91
+ - Check all self.play() calls have valid animation objects
92
+ - Ensure run_time and self.wait() sum to exactly 60 seconds
93
+ - Count narration words (must be 120-150)
94
+
95
+ ### OUTPUT FORMAT:
96
+ Provide your response in exactly this format:
97
+
98
+ ### MANIM CODE:
99
+ [Insert the complete, fixed Manim code here - include all imports and Scene class]
100
+
101
+ ### NARRATION:
102
+ [Insert the narration script here - exactly 120-150 words, synchronized with animations]
103
+
104
+ ### REQUIREMENTS TO FOLLOW:
105
+ {base_prompt_instructions}
106
+ """
107
+
108
+ contents = [fix_prompt_text]
109
 
110
  logging.info("Attempting to fix Manim code via fallback...")
111
  try:
112
+ grounding_tool = genai_types.Tool(google_search=genai_types.GoogleSearch())
113
+
114
  generation_config = genai_types.GenerateContentConfig(
115
+ tools=[grounding_tool],
116
+ temperature=0.4, # lower coz grounding
117
+ system_instruction=FALLBACK_SYSTEM_PROMPT,
118
  )
119
+
120
  response = client.models.generate_content(
121
+ model="gemini-2.5-flash",
122
+ contents=contents, # type: ignore
123
+ config=generation_config,
124
+ )
125
  if response:
126
+ # print(response)
127
  try:
128
  content = response.text
129
  logging.info("Received response from fallback attempt.")
130
+
131
+ if "### NARRATION:" in content: # type: ignore
132
+ manim_code, narration = content.split("### NARRATION:", 1) # type: ignore
133
+ manim_code = (
134
+ re.sub(r"```python", "", manim_code).replace("```", "").strip()
135
+ )
136
  narration = narration.strip()
137
 
138
  if "from manim import *" not in manim_code:
139
+ logging.warning(
140
+ "Adding missing 'from manim import *' (fallback fix)."
141
+ )
142
+ manim_code = (
143
+ "from manim import *\nimport numpy as np\n" + manim_code
144
+ )
145
  elif "import numpy as np" not in manim_code:
146
+ logging.warning(
147
+ "Adding missing 'import numpy as np' (fallback fix)."
148
+ )
149
+ lines = manim_code.splitlines()
150
+ for i, line in enumerate(lines):
151
+ if "from manim import *" in line:
152
+ lines.insert(i + 1, "import numpy as np")
153
+ manim_code = "\n".join(lines)
154
+ break
155
+
156
+ logging.info(
157
+ "Successfully parsed fixed code and narration from fallback."
158
+ )
159
+ return {
160
+ "manim_code": manim_code,
161
+ "output_file": "output.mp4",
162
+ }, narration
163
  else:
164
+ logging.warning(
165
+ "Delimiter '### NARRATION:' not found in fallback response. Attempting fallback extraction."
166
+ )
167
+ code_match = re.search(r"```python(.*?)```", content, re.DOTALL) # type: ignore
168
  if code_match:
169
  manim_code = code_match.group(1).strip()
170
+ narration_part = content.split("```", 2)[-1].strip()
171
  narration = narration_part if len(narration_part) > 20 else ""
172
  if not narration:
173
+ logging.warning(
174
+ "Fallback narration extraction resulted in empty or very short text (fallback fix)."
175
+ )
176
  else:
177
+ logging.info(
178
+ "Successfully parsed code and narration using fallback regex (fallback fix)."
179
+ )
180
 
181
  if "from manim import *" not in manim_code:
182
+ logging.warning(
183
+ "Adding missing 'from manim import *' (fallback fix, regex path)."
184
+ )
185
+ manim_code = (
186
+ "from manim import *\nimport numpy as np\n" + manim_code
187
+ )
188
  elif "import numpy as np" not in manim_code:
189
+ logging.warning(
190
+ "Adding missing 'import numpy as np' (fallback fix, regex path)."
191
+ )
192
+ lines = manim_code.splitlines()
193
+ for i, line in enumerate(lines):
194
+ if "from manim import *" in line:
195
+ lines.insert(i + 1, "import numpy as np")
196
+ manim_code = "\n".join(lines)
197
+ break
198
+
199
+ logging.info(
200
+ "Successfully parsed fixed code using fallback extraction."
201
+ )
202
+ return {
203
+ "manim_code": manim_code,
204
+ "output_file": "output.mp4",
205
+ }, narration
206
  else:
207
+ logging.error(
208
+ "Fallback extraction failed: No Python code block found in fallback response."
209
+ )
210
+ logging.debug(
211
+ f"Fallback content without code block:\n{content}"
212
+ )
213
+ return None, None
214
 
215
  except ValueError:
216
  logging.error("Could not extract text from the fallback response.")
217
  if response.prompt_feedback and response.prompt_feedback.block_reason:
218
+ logging.error(
219
+ f"Fallback content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
220
+ )
221
  return None, None
222
  except Exception as e:
223
+ logging.exception(f"Error processing fallback response: {e}")
224
+ return None, None
225
  else:
226
  logging.error("No response received from Gemini during fallback attempt.")
227
  return None, None
src/api/gemini.py CHANGED
@@ -5,69 +5,108 @@ from dotenv import load_dotenv
5
  import os
6
  import pathlib
7
  import logging
 
8
 
9
  load_dotenv()
10
 
11
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
12
 
13
- # --- Global System Prompt ---
14
- SYSTEM_PROMPT = """You are an expert Manim programmer specializing in creating crazy, cutting-edge, and visually striking animations based on user prompts or documents, strictly following Manim Community v0.19.0 standards.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  Core Requirements:
17
- - **API Version:** Use only Manim Community v0.19.0 API.
18
- - **Vectors & Math:** Use 3D vectors (`np.array([x, y, 0])`) and ensure correct math operations.
19
- - **Allowed Methods:** Strictly use the verified list of Manim methods provided in the detailed instructions. No external images.
20
- - ** "\n - self.play(), self.wait(), Create(), Write(), Transform(), FadeIn(), FadeOut(), Add(), Remove(), MoveAlongPath(), Rotating(), Circumscribe(), Indicate(), FocusOn(), Shift(), Scale(), MoveTo(), NextTo(), Axes(), Plot(), LineGraph(), BarChart(), Dot(), Line(), Arrow(), Text(), Tex(), MathTex(), VGroup(), Mobject.animate, self.camera.frame.animate"
21
- - **Matrix Visualization:** Use `MathTex` for displaying matrices in the format `r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'`.
22
- - **Duration:** The total animation duration MUST be exactly 30 seconds.
23
- -**Error handling**:"An unexpected error occurred during video creation: No Scene class found in generated code, This error SHOULD NEVER occur. Make sure to validate the code before returning it. If this error occurs, please log the error and return None for both manim_code and narration.Make sure you don't do 3Dscene coz that gives this error"
24
- - **Engagement:** Create visually stunning and crazy animations that push creative boundaries. Use vibrant colors, dynamic movements, and unexpected transformations.
25
- - **Text Handling:** Fade out text and other elements as soon as they are no longer needed, ensuring a smooth transition.
26
- - **Synchronization:** Align animation pacing (`run_time`, `wait`) roughly with the narration segments.
27
- - **Output Format:** Return *only* the Python code and narration script, separated by '### MANIM CODE:' and '### NARRATION:' delimiters. Adhere strictly to this format.
28
- - **Code Quality:** Generate error-free, runnable code with necessary imports (`from manim import *`, `import numpy as np`) and exactly one Scene class. Validate objects and animation calls.
29
  """
30
-
31
- # --- Detailed Instructions ---
32
  base_prompt_instructions = (
33
- "\nFollow these requirements strictly:"
34
- "\n1. Use only Manim Community v0.19.0 API"
35
- "\n2. Vector operations:"
36
- "\n - All vectors must be 3D: np.array([x, y, 0])"
37
- "\n - Matrix multiplication: result = np.dot(matrix, vector[:2])"
38
- "\n - Append 0 for Z: np.append(result, 0)"
39
- "\n3. Matrix visualization:"
40
- "\n - Use MathTex for display"
41
- "\n - Format: r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'"
42
- "\n4. Use only verified Manim methods:"
43
- "\n - self.play(), self.wait(), Create(), Write(), Transform(), FadeIn(), FadeOut(), Add(), Remove(), MoveAlongPath(), Rotating(), Circumscribe(), Indicate(), FocusOn(), Shift(), Scale(), MoveTo(), NextTo(), Axes(), Plot(), LineGraph(), BarChart(), Dot(), Line(), Arrow(), Text(), Tex(), MathTex(), VGroup(), Mobject.animate, self.camera.frame.animate"
44
- "\n5. DO NOT USE IMAGES IMPORTS."
45
- "\n6. Make the video crazy and innovative by:"
46
- "\n - Fading out text and other elements gracefully once they are no longer needed"
47
- "\n - Adding creative interactive elements like arrows, labels, and transitions"
48
- "\n - Incorporating graphs/plots (Axes, Plot, LineGraph, BarChart) where appropriate"
49
- "\n - Leveraging smooth transitions and varied pacing to keep the viewer engaged."
50
- "\n7. Ensure the video is error-free by:"
51
- "\n - Validating all objects before animations"
52
- "\n - Handling exceptions gracefully (in generated code if applicable)"
53
- "\n - Ensuring operands for vector operations match in shape to avoid broadcasting errors"
54
- "\n8. Validate that every arrow creation ensures its start and end points are distinct to prevent normalization errors."
55
- "\n9. Use longer scenes (e.g., 5-6 seconds per major step) for complex transformations and shorter scenes for simple animations, with a total duration of exactly 30 seconds."
56
- "\n10. Align the narration script with the animation pace for seamless storytelling."
57
- "\n11. Ensure all objects in self.play() are valid animations (e.g., `Create(obj)`, `obj.animate.shift(UP)`)."
58
- "\n12. Use Mobject.animate for animations involving Mobject methods."
59
- "\n13. CRITICAL: DO NOT USE BARCHATS, LINEGRAPHS, OR PLOTTING WITHOUT EXPLICIT INSTRUCTIONS."
60
- "\n14. Provide creative and sometimes crazy Manim video scripts that push the conventional boundaries."
61
- "\n15. **Synchronization:** Structure the narration and Manim code for better synchronization:"
62
- "\n - Keep narration segments concise and directly tied to the visual elements."
63
- "\n - Use `self.wait(duration)` in the Manim code to match natural pauses in narration."
64
- "\n - Adjust `run_time` in `self.play()` calls to match the speaking duration of the associated narration."
65
- "\n - Ensure the animation and narration sum to exactly 30 seconds."
66
- "\n### MANIM CODE:\n"
67
- "Provide only valid Python code using Manim Community v0.19.0 to generate the video animation.\n\n"
68
- "### NARRATION:\n"
69
- "Provide a concise narration script for the video that aligns with the Manim code's pacing and visuals.DO NOT give timestamps.\n\n"
70
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
 
73
  def load_manim_examples():
@@ -75,7 +114,6 @@ def load_manim_examples():
75
  if not guide_path.exists():
76
  logging.warning(f"Manim examples guide not found at {guide_path}")
77
  return ""
78
-
79
  logging.info(f"Loading Manim examples from {guide_path}")
80
  return guide_path.read_text(encoding="utf-8")
81
 
@@ -85,7 +123,6 @@ def generate_video(idea: str | None = None, pdf_path: str | None = None):
85
  if not api_key:
86
  logging.error("GEMINI_API_KEY not found in environment variables")
87
  raise Exception("GEMINI_API_KEY not found in environment variables")
88
-
89
  if not idea and not pdf_path:
90
  raise ValueError("Either an idea or a pdf_path must be provided.")
91
  if idea and pdf_path:
@@ -97,7 +134,10 @@ def generate_video(idea: str | None = None, pdf_path: str | None = None):
97
 
98
  manim_examples = load_manim_examples()
99
  if manim_examples:
100
- examples_prompt = "Below are examples of Manim code that demonstrate proper usage patterns. Use these as reference when generating your animation:\n\n" + manim_examples
 
 
 
101
  contents.append(examples_prompt)
102
  logging.info("Added Manim examples from guide.md to prime the model")
103
  else:
@@ -108,12 +148,14 @@ def generate_video(idea: str | None = None, pdf_path: str | None = None):
108
  if pdf_path:
109
  pdf_file_path = pathlib.Path(pdf_path)
110
  if not pdf_file_path.exists():
111
- logging.error(f"PDF file not found at: {pdf_path}")
112
- raise FileNotFoundError(f"PDF file not found at: {pdf_path}")
113
 
114
  logging.info(f"Reading PDF: {pdf_path}")
115
  pdf_data = pdf_file_path.read_bytes()
116
- pdf_part = genai_types.Part.from_bytes(data=pdf_data, mime_type='application/pdf')
 
 
117
  contents.append(pdf_part)
118
 
119
  user_prompt_text = f"Create a 30-second Manim video script summarizing the key points or illustrating a core concept from the provided PDF document. {base_prompt_instructions}"
@@ -127,81 +169,63 @@ def generate_video(idea: str | None = None, pdf_path: str | None = None):
127
  logging.info("Sending request to Gemini API...")
128
  try:
129
  generation_config = genai_types.GenerateContentConfig(
130
- system_instruction=SYSTEM_PROMPT
131
- )
 
 
132
 
133
  response = client.models.generate_content(
134
- model="gemini-2.0-flash",
135
- contents=contents,
136
- config=generation_config
137
- )
138
  except Exception as e:
139
  logging.exception(f"Error calling Gemini API: {e}")
140
  raise Exception(f"Error calling Gemini API: {e}")
141
 
142
  if response:
143
  try:
144
- content = response.text
145
- logging.info("Received response from Gemini.")
146
- except ValueError:
147
- logging.warning("Could not extract text from the response. Response details:")
148
- logging.warning(response)
149
- if response.prompt_feedback and response.prompt_feedback.block_reason:
150
- logging.error(f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}")
151
- raise Exception(f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}")
152
- else:
153
- logging.error("Failed to generate content. The response was empty or malformed.")
154
- raise Exception("Failed to generate content. The response was empty or malformed.")
155
-
156
- if "### NARRATION:" in content:
157
- manim_code, narration = content.split("### NARRATION:", 1)
158
- manim_code = re.sub(r"```python", "", manim_code).replace("```", "").strip()
159
- narration = narration.strip()
160
- logging.info("Successfully parsed code and narration using delimiter.")
161
 
162
  if "from manim import *" not in manim_code:
163
- logging.warning("Adding missing 'from manim import *'.")
164
- manim_code = "from manim import *\nimport numpy as np\n" + manim_code
165
  elif "import numpy as np" not in manim_code:
166
- logging.warning("Adding missing 'import numpy as np'.")
167
- lines = manim_code.splitlines()
168
- for i, line in enumerate(lines):
169
- if "from manim import *" in line:
170
- lines.insert(i + 1, "import numpy as np")
171
- manim_code = "\n".join(lines)
172
- break
173
 
174
  return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
175
- else:
176
- logging.warning("Delimiter '### NARRATION:' not found. Attempting fallback extraction.")
177
- code_match = re.search(r'```python(.*?)```', content, re.DOTALL)
178
- if code_match:
179
- manim_code = code_match.group(1).strip()
180
- narration_part = content.split('```', 2)[-1].strip()
181
- narration = narration_part if len(narration_part) > 20 else ""
182
- if not narration:
183
- logging.warning("Fallback narration extraction resulted in empty or very short text.")
184
- else:
185
- logging.info("Successfully parsed code and narration using fallback regex.")
186
-
187
- if "from manim import *" not in manim_code:
188
- logging.warning("Adding missing 'from manim import *' (fallback).")
189
- manim_code = "from manim import *\nimport numpy as np\n" + manim_code
190
- elif "import numpy as np" not in manim_code:
191
- logging.warning("Adding missing 'import numpy as np' (fallback).")
192
- lines = manim_code.splitlines()
193
- for i, line in enumerate(lines):
194
- if "from manim import *" in line:
195
- lines.insert(i + 1, "import numpy as np")
196
- manim_code = "\n".join(lines)
197
- break
198
-
199
- return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
200
  else:
201
- logging.error("Fallback extraction failed: No Python code block found in response.")
202
- logging.debug(f"Content without code block:\n{content}")
203
- raise Exception("The response does not contain the expected '### NARRATION:' delimiter or a valid Python code block.")
204
-
 
 
205
  else:
206
- logging.error("Error generating video content. No response received from Gemini.")
 
 
207
  raise Exception("Error generating video content. No response received.")
 
5
  import os
6
  import pathlib
7
  import logging
8
+ from pydantic import BaseModel
9
 
10
  load_dotenv()
11
 
12
+ logging.basicConfig(
13
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
14
+ )
15
 
16
+
17
+ class ManimOutput(BaseModel):
18
+ manim_code: str
19
+ narration: str
20
+
21
+
22
+ SYSTEM_PROMPT = """You are an expert Manim programmer specializing in creating visually striking 60-second animations based on user prompts or documents, strictly following Manim Community v0.19.0 standards. Your output MUST be a JSON object conforming to the provided schema.
23
+
24
+ CRITICAL TIMING REQUIREMENTS:
25
+ - **Total Duration:** Exactly 60 seconds (1 minute)
26
+ - **Narration:** Exactly 150-160 words (average speaking pace: 2.5 words per second)
27
+ - **Animation Structure:** Use this timing framework:
28
+ * Introduction: 8-10 seconds
29
+ * Main content: 40-45 seconds (3-4 major segments)
30
+ * Conclusion/summary: 7-10 seconds
31
+ - **Synchronization:** Each narration sentence should correspond to 3-5 seconds of animation
32
 
33
  Core Requirements:
34
+ - **API Version:** Use only Manim Community v0.19.0 API
35
+ - **Vectors & Math:** Use 3D vectors (np.array([x, y, 0])) and ensure correct math operations
36
+ - **Matrix Visualization:** Use MathTex for matrices: r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'
37
+ - **Star Usage:** Use Star(n=5, ...) not n_points
38
+ - **Error Prevention:** Always validate Scene class exists; avoid 3D scenes
39
+ - **Visual Style:** Create vibrant, dynamic animations with smooth transitions
40
+ - **Output Format:** JSON with "manim_code" and "narration" keys
 
 
 
 
 
41
  """
42
+ # Detailed Instructions
 
43
  base_prompt_instructions = (
44
+ "\nSTRICT TIMING REQUIREMENTS:"
45
+ "\n1. **Video Duration:** Exactly 60 seconds total"
46
+ "\n2. **Narration Constraints:**"
47
+ "\n - Exactly 150-160 words (no more, no less)"
48
+ "\n - Speaking pace: 2.5 words per second"
49
+ "\n - Use short, clear sentences (8-12 words each)"
50
+ "\n - Include natural pauses between major concepts"
51
+ "\n3. **Animation Timing Structure:**"
52
+ "\n - Use self.wait() to match narration pauses"
53
+ "\n - run_time in self.play() should match sentence duration"
54
+ "\n - Fade out elements after 3-5 seconds to avoid clutter"
55
+ "\n - Example timing: self.play(Create(obj), run_time=3), self.wait(1)"
56
+ "\nTECHNICAL REQUIREMENTS:"
57
+ "\n4. Use only Manim Community v0.19.0 API"
58
+ "\n5. Vector operations (3D vectors): np.array([x, y, 0])"
59
+ "\n6. Matrix display: MathTex(r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}')"
60
+ "\n7. Verified methods only: Create(), Write(), Transform(), FadeIn(), FadeOut(), "
61
+ "\n Add(), Remove(), MoveAlongPath(), Rotating(), Circumscribe(), Indicate(), "
62
+ "\n FocusOn(), Shift(), Scale(), MoveTo(), NextTo(), Axes(), Plot(), LineGraph(), "
63
+ "\n BarChart(), Dot(), Line(), Arrow(), Text(), Tex(), MathTex(), VGroup()"
64
+ "\n8. Star shapes: Star(n=5, ...) not n_points"
65
+ "\n9. NO image imports or 3D scenes"
66
+ "\n10. There is no .to_center() method so please don't use that"
67
+ "\nVISUAL & CONTENT GUIDELINES:"
68
+ "\n10. Create 4-5 distinct visual segments matching narration flow"
69
+ "\n11. Use vibrant colors and smooth transitions"
70
+ "\n12. Fade out text/objects when no longer needed"
71
+ "\n13. Include interactive elements: arrows, labels, highlights"
72
+ "\n14. Validate all objects before animation calls"
73
+ "\n15. Use longer run_times (4-6s) for complex animations, shorter (2-3s) for simple ones"
74
+ "\nCODE STRUCTURE TEMPLATE:"
75
+ "\n16. Always follow this timing pattern:"
76
+ "\n ```python"
77
+ "\n class VideoScene(Scene):"
78
+ "\n def construct(self):"
79
+ "\n # Intro (8-10s): Title + brief setup"
80
+ "\n title = Text('Title')"
81
+ "\n self.play(Write(title), run_time=3)"
82
+ "\n self.wait(2) # Pause for narration"
83
+ "\n self.play(FadeOut(title), run_time=2)"
84
+ "\n "
85
+ "\n # Main content (40-45s): 3-4 segments"
86
+ "\n # Segment 1 (10-12s)"
87
+ "\n # Segment 2 (10-12s) "
88
+ "\n # Segment 3 (10-12s)"
89
+ "\n # Segment 4 (8-10s)"
90
+ "\n "
91
+ "\n # Conclusion (7-10s): Summary + fade out"
92
+ "\n ```"
93
+ "\nNARRATION STRUCTURE:"
94
+ "\n17. Follow this word count breakdown:"
95
+ "\n - Introduction: 15-25 words (8-10 seconds)"
96
+ "\n - Main content: 70-85 words (36-40 seconds)"
97
+ "\n - Conclusion: 20-25 words (8-10 seconds)"
98
+ "\n - Natural pauses: 3-5 seconds total"
99
+ "\n18. Use active voice, present tense"
100
+ "\n19. Include transition phrases: 'Now let's see...', 'Next, we'll explore...'"
101
+ "\n20. End with a strong concluding statement"
102
+ "\nQUALITY ASSURANCE:"
103
+ "\n21. Count words in narration before finalizing (must be 120-150)"
104
+ "\n22. Calculate total animation time (self.play + self.wait = 60s)"
105
+ "\n23. Ensure Scene class exists and imports are correct"
106
+ "\n24. Test that all animation objects are valid before use"
107
+ "\n25. No broadcasting errors in vector operations"
108
+ "\n26. Distinct start/end points for arrows to prevent normalization errors"
109
+ )
110
 
111
 
112
  def load_manim_examples():
 
114
  if not guide_path.exists():
115
  logging.warning(f"Manim examples guide not found at {guide_path}")
116
  return ""
 
117
  logging.info(f"Loading Manim examples from {guide_path}")
118
  return guide_path.read_text(encoding="utf-8")
119
 
 
123
  if not api_key:
124
  logging.error("GEMINI_API_KEY not found in environment variables")
125
  raise Exception("GEMINI_API_KEY not found in environment variables")
 
126
  if not idea and not pdf_path:
127
  raise ValueError("Either an idea or a pdf_path must be provided.")
128
  if idea and pdf_path:
 
134
 
135
  manim_examples = load_manim_examples()
136
  if manim_examples:
137
+ examples_prompt = (
138
+ "Below are examples of Manim code that demonstrate proper usage patterns. Use these as reference when generating your animation:\n\n"
139
+ + manim_examples
140
+ )
141
  contents.append(examples_prompt)
142
  logging.info("Added Manim examples from guide.md to prime the model")
143
  else:
 
148
  if pdf_path:
149
  pdf_file_path = pathlib.Path(pdf_path)
150
  if not pdf_file_path.exists():
151
+ logging.error(f"PDF file not found at: {pdf_path}")
152
+ raise FileNotFoundError(f"PDF file not found at: {pdf_path}")
153
 
154
  logging.info(f"Reading PDF: {pdf_path}")
155
  pdf_data = pdf_file_path.read_bytes()
156
+ pdf_part = genai_types.Part.from_bytes(
157
+ data=pdf_data, mime_type="application/pdf"
158
+ )
159
  contents.append(pdf_part)
160
 
161
  user_prompt_text = f"Create a 30-second Manim video script summarizing the key points or illustrating a core concept from the provided PDF document. {base_prompt_instructions}"
 
169
  logging.info("Sending request to Gemini API...")
170
  try:
171
  generation_config = genai_types.GenerateContentConfig(
172
+ response_mime_type="application/json",
173
+ response_schema=ManimOutput,
174
+ system_instruction=SYSTEM_PROMPT,
175
+ )
176
 
177
  response = client.models.generate_content(
178
+ model="gemini-2.5-flash", contents=contents, config=generation_config
179
+ )
 
 
180
  except Exception as e:
181
  logging.exception(f"Error calling Gemini API: {e}")
182
  raise Exception(f"Error calling Gemini API: {e}")
183
 
184
  if response:
185
  try:
186
+ parsed_output = response.parsed
187
+ if not parsed_output or not isinstance(parsed_output, ManimOutput):
188
+ logging.error("Failed to parse structured output from Gemini.")
189
+ raise Exception("Failed to parse structured output from Gemini.")
190
+
191
+ manim_code = parsed_output.manim_code
192
+ narration = parsed_output.narration
193
+ logging.info("Successfully parsed structured output from Gemini.")
 
 
 
 
 
 
 
 
 
194
 
195
  if "from manim import *" not in manim_code:
196
+ logging.warning("Adding missing 'from manim import *'.")
197
+ manim_code = "from manim import *\nimport numpy as np\n" + manim_code
198
  elif "import numpy as np" not in manim_code:
199
+ logging.warning("Adding missing 'import numpy as np'.")
200
+ lines = manim_code.splitlines()
201
+ for i, line in enumerate(lines):
202
+ if "from manim import *" in line:
203
+ lines.insert(i + 1, "import numpy as np")
204
+ manim_code = "\n".join(lines)
205
+ break
206
 
207
  return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
208
+ except (ValueError, AttributeError) as e:
209
+ logging.warning(
210
+ f"Could not parse the response. Error: {e}. Response details:"
211
+ )
212
+ logging.warning(response)
213
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
214
+ logging.error(
215
+ f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
216
+ )
217
+ raise Exception(
218
+ f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
219
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  else:
221
+ logging.error(
222
+ "Failed to generate content. The response was empty or malformed."
223
+ )
224
+ raise Exception(
225
+ "Failed to generate content. The response was empty or malformed."
226
+ )
227
  else:
228
+ logging.error(
229
+ "Error generating video content. No response received from Gemini."
230
+ )
231
  raise Exception("Error generating video content. No response received.")
src/app.py CHANGED
@@ -9,19 +9,22 @@ from api.fallback_gemini import fix_manim_code
9
  from services.manim_service import create_manim_video
10
  from services.tts_service import generate_audio
11
 
12
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
13
 
14
  def main():
15
  st.title("Manimator")
16
- st.write("Generate videos from text ideas or PDF files, You can also just paste arxiv links ;p")
 
 
17
  input_type = st.radio("Choose input type:", ("Text Idea", "Upload PDF"))
18
 
19
  idea = None
20
  uploaded_file = None
21
- pdf_path = None
22
  original_context = ""
23
- audio_file = None
24
- current_audio_file = None
25
  if input_type == "Text Idea":
26
  idea = st.text_area("Enter your idea:")
27
  if idea:
@@ -32,156 +35,182 @@ def main():
32
  original_context = f"Summary/concept from PDF: {uploaded_file.name}"
33
 
34
  if st.button("Generate Video"):
35
- temp_pdf_file = None
36
  video_data = None
37
  script = None
38
- audio_file = None
39
  final_video = None
40
- max_retries = 1
41
-
42
  try:
 
43
  if input_type == "Text Idea" and idea:
44
  with st.spinner("Generating initial script and code from idea..."):
45
- logging.info(f"Generating video from idea: {idea[:50]}...")
46
  video_data, script = generate_video(idea=idea)
47
  elif input_type == "Upload PDF" and uploaded_file is not None:
48
  with st.spinner("Generating initial script and code from PDF..."):
49
- logging.info(f"Generating video from PDF: {uploaded_file.name}")
50
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
 
51
  temp_pdf.write(uploaded_file.getvalue())
52
  pdf_path = temp_pdf.name
53
- temp_pdf_file = pdf_path
54
  video_data, script = generate_video(pdf_path=pdf_path)
55
  else:
56
  st.error("Please provide an idea or upload a PDF.")
57
  return
58
 
59
  if not video_data or not script:
60
- st.error("Failed to generate initial script/code from Gemini.")
61
- return
62
 
63
- with st.spinner("Generating audio..."):
64
- logging.info("Generating audio for the script.")
65
- try:
66
- audio_file = generate_audio(script)
67
- except ValueError as e:
68
- st.warning(f"Could not generate audio: {e}. Proceeding without audio.")
69
- audio_file = None
 
 
 
 
 
 
 
 
70
 
71
  current_manim_code = video_data["manim_code"]
72
  current_script = script
73
  current_audio_file = audio_file
 
74
 
 
75
  for attempt in range(max_retries + 1):
76
  try:
77
  with st.spinner(f"Attempt {attempt + 1}: Creating Manim video..."):
78
  logging.info(f"Attempt {attempt + 1} to create Manim video.")
79
  final_video = create_manim_video(
80
- {"manim_code": current_manim_code, "output_file": "output.mp4"},
81
  current_manim_code,
82
- audio_file=current_audio_file
 
83
  )
84
  logging.info("Manim video creation successful.")
85
- break
86
- except subprocess.CalledProcessError as e:
 
87
  logging.error(f"Manim execution failed on attempt {attempt + 1}.")
88
- st.warning(f"Attempt {attempt + 1} failed. Manim error:\n```\n{e.stderr.decode() if e.stderr else 'No stderr captured.'}\n```")
 
 
 
89
  if attempt < max_retries:
90
  st.info("Attempting to fix the code using fallback...")
91
  logging.info("Calling fallback Gemini to fix code.")
92
- error_message = e.stderr.decode() if e.stderr else "Manim execution failed without specific error output."
93
 
94
  fixed_video_data, fixed_script = fix_manim_code(
95
  faulty_code=current_manim_code,
96
- error_message=error_message,
97
- original_context=original_context
98
  )
99
 
100
  if fixed_video_data and fixed_script is not None:
101
- st.success("Fallback successful! Retrying video generation with fixed code.")
 
 
102
  logging.info("Fallback successful. Received fixed code.")
103
  current_manim_code = fixed_video_data["manim_code"]
 
 
104
  if fixed_script != current_script and fixed_script:
105
- st.info("Narration script was updated by the fallback. Regenerating audio...")
106
- logging.info("Regenerating audio for updated script.")
 
107
  current_script = fixed_script
108
  try:
109
- current_audio_file = generate_audio(current_script)
110
- except ValueError as e:
111
- st.warning(f"Could not generate audio for fixed script: {e}. Proceeding without audio.")
112
- current_audio_file = None
113
- elif not fixed_script:
114
- st.warning("Fallback provided code but no narration. Using original audio (if any).")
115
- logging.warning("Fallback provided empty narration.")
116
- current_script = ""
117
- current_audio_file = None
 
 
 
 
 
 
 
 
118
  else:
119
  logging.info("Fallback kept the original narration.")
120
  else:
121
  st.error("Fallback failed to fix the code. Stopping.")
122
- logging.error("Fallback failed to return valid code/script.")
123
  final_video = None
124
  break
125
  else:
126
- st.error(f"Manim failed after {max_retries + 1} attempts. Could not generate video.")
127
- logging.error(f"Manim failed after {max_retries + 1} attempts.")
 
128
  final_video = None
129
  except Exception as e:
130
- st.error(f"An unexpected error occurred during video creation: {str(e)}")
131
- logging.exception("Unexpected error during create_manim_video call.")
 
 
 
 
132
  final_video = None
133
  break
134
 
 
135
  if final_video and os.path.exists(final_video):
136
  st.success("Video generated successfully!")
137
  st.video(final_video)
138
  st.write("Generated Narration:")
139
- st.text_area("Narration", current_script if current_script is not None else "Narration could not be generated.", height=150)
 
 
 
 
 
 
 
140
  elif not final_video:
141
- pass
 
142
  else:
143
  st.error("Error: Generated video file not found after processing.")
144
  logging.error(f"Final video file '{final_video}' not found.")
145
 
146
- except FileNotFoundError as e:
147
- st.error(f"Error: A required file was not found. {str(e)}")
148
- logging.exception("FileNotFoundError during generation process.")
149
- except ValueError as e:
150
- st.error(f"Input Error: {str(e)}")
151
- logging.exception("ValueError during generation process.")
152
  except Exception as e:
153
- st.error(f"An unexpected error occurred: {str(e)}")
154
  logging.exception("Unhandled exception in main generation block.")
155
  finally:
156
- if temp_pdf_file and os.path.exists(temp_pdf_file):
157
- try:
158
- os.remove(temp_pdf_file)
159
- logging.info(f"Removed temporary file: {temp_pdf_file}")
160
- except OSError as e:
161
- logging.error(f"Error removing temporary file {temp_pdf_file}: {e}")
162
- if audio_file and os.path.exists(audio_file) and audio_file != current_audio_file:
163
- try:
164
- os.remove(audio_file)
165
- logging.info(f"Removed temporary audio file: {audio_file}")
166
- except OSError as e:
167
- logging.error(f"Error removing temporary audio file {audio_file}: {e}")
168
- if current_audio_file and os.path.exists(current_audio_file):
169
- try:
170
- os.remove(current_audio_file)
171
- logging.info(f"Removed potentially updated temporary audio file: {current_audio_file}")
172
- except OSError as e:
173
- logging.error(f"Error removing potentially updated temporary audio file {current_audio_file}: {e}")
174
- st.markdown("<br><br>", unsafe_allow_html=True)
175
  st.markdown("---")
176
-
177
-
178
- st.markdown("""
179
  ### Want to help improve this app?
180
  - Give good Manim Examples and make PRs in guide.md, find it in repo [GitHub](https://github.com/mostlykiguess/Manimator)
181
  - Report issues on [GitHub Issues](https://github.com/mostlykiguess/Manimator/issues)
182
  - Email problematic prompts to me
183
- """)
184
-
 
185
 
186
  if __name__ == "__main__":
187
  main()
 
9
  from services.manim_service import create_manim_video
10
  from services.tts_service import generate_audio
11
 
12
+ logging.basicConfig(
13
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
14
+ )
15
+
16
 
17
  def main():
18
  st.title("Manimator")
19
+ st.write(
20
+ "Generate videos from text ideas or PDF files, You can also just paste arxiv links ;p"
21
+ )
22
  input_type = st.radio("Choose input type:", ("Text Idea", "Upload PDF"))
23
 
24
  idea = None
25
  uploaded_file = None
 
26
  original_context = ""
27
+
 
28
  if input_type == "Text Idea":
29
  idea = st.text_area("Enter your idea:")
30
  if idea:
 
35
  original_context = f"Summary/concept from PDF: {uploaded_file.name}"
36
 
37
  if st.button("Generate Video"):
38
+ files_to_cleanup = set()
39
  video_data = None
40
  script = None
 
41
  final_video = None
42
+ max_retries = 2 # retries for fallback
 
43
  try:
44
+ # Step 1: Generate initial script and code from Gemini
45
  if input_type == "Text Idea" and idea:
46
  with st.spinner("Generating initial script and code from idea..."):
 
47
  video_data, script = generate_video(idea=idea)
48
  elif input_type == "Upload PDF" and uploaded_file is not None:
49
  with st.spinner("Generating initial script and code from PDF..."):
50
+ with tempfile.NamedTemporaryFile(
51
+ delete=False, suffix=".pdf"
52
+ ) as temp_pdf:
53
  temp_pdf.write(uploaded_file.getvalue())
54
  pdf_path = temp_pdf.name
55
+ files_to_cleanup.add(pdf_path)
56
  video_data, script = generate_video(pdf_path=pdf_path)
57
  else:
58
  st.error("Please provide an idea or upload a PDF.")
59
  return
60
 
61
  if not video_data or not script:
62
+ st.error("Failed to generate initial script/code from Gemini.")
63
+ return
64
 
65
+ # Step 2: Generate audio and subtitles from the script
66
+ with st.spinner("Generating audio and subtitles..."):
67
+ logging.info("Generating audio and subtitles for the script.")
68
+ try:
69
+ # Unpack both audio and subtitle file paths
70
+ audio_file, subtitle_file = generate_audio(script)
71
+ if audio_file:
72
+ files_to_cleanup.add(audio_file)
73
+ if subtitle_file:
74
+ files_to_cleanup.add(subtitle_file)
75
+ except ValueError as e:
76
+ st.warning(
77
+ f"Could not generate audio: {e}. Proceeding without audio/subtitles."
78
+ )
79
+ audio_file, subtitle_file = None, None
80
 
81
  current_manim_code = video_data["manim_code"]
82
  current_script = script
83
  current_audio_file = audio_file
84
+ current_subtitle_file = subtitle_file
85
 
86
+ # Step 3: Attempt to render the video, with fallback retries
87
  for attempt in range(max_retries + 1):
88
  try:
89
  with st.spinner(f"Attempt {attempt + 1}: Creating Manim video..."):
90
  logging.info(f"Attempt {attempt + 1} to create Manim video.")
91
  final_video = create_manim_video(
92
+ video_data,
93
  current_manim_code,
94
+ audio_file=current_audio_file,
95
+ subtitle_file=current_subtitle_file,
96
  )
97
  logging.info("Manim video creation successful.")
98
+ break # Exit the loop on success
99
+ except (subprocess.CalledProcessError, FileNotFoundError) as e:
100
+ error_output = e.stderr if hasattr(e, "stderr") else str(e)
101
  logging.error(f"Manim execution failed on attempt {attempt + 1}.")
102
+ st.warning(
103
+ f"Attempt {attempt + 1} failed. Manim error:\n```\n{error_output}\n```"
104
+ )
105
+
106
  if attempt < max_retries:
107
  st.info("Attempting to fix the code using fallback...")
108
  logging.info("Calling fallback Gemini to fix code.")
 
109
 
110
  fixed_video_data, fixed_script = fix_manim_code(
111
  faulty_code=current_manim_code,
112
+ error_message=error_output,
113
+ original_context=original_context,
114
  )
115
 
116
  if fixed_video_data and fixed_script is not None:
117
+ st.success(
118
+ "Fallback successful! Retrying video generation with fixed code."
119
+ )
120
  logging.info("Fallback successful. Received fixed code.")
121
  current_manim_code = fixed_video_data["manim_code"]
122
+
123
+ # If narration changed, regenerate audio and subtitles
124
  if fixed_script != current_script and fixed_script:
125
+ st.info(
126
+ "Narration script was updated. Regenerating audio and subtitles..."
127
+ )
128
  current_script = fixed_script
129
  try:
130
+ new_audio, new_subtitle = generate_audio(
131
+ current_script
132
+ )
133
+ if new_audio:
134
+ files_to_cleanup.add(new_audio)
135
+ if new_subtitle:
136
+ files_to_cleanup.add(new_subtitle)
137
+ current_audio_file = new_audio
138
+ current_subtitle_file = new_subtitle
139
+ except ValueError as audio_e:
140
+ st.warning(
141
+ f"Could not generate new audio: {audio_e}."
142
+ )
143
+ current_audio_file, current_subtitle_file = (
144
+ None,
145
+ None,
146
+ )
147
  else:
148
  logging.info("Fallback kept the original narration.")
149
  else:
150
  st.error("Fallback failed to fix the code. Stopping.")
 
151
  final_video = None
152
  break
153
  else:
154
+ st.error(
155
+ f"Manim failed after {max_retries + 1} attempts. Could not generate video."
156
+ )
157
  final_video = None
158
  except Exception as e:
159
+ st.error(
160
+ f"An unexpected error occurred during video creation: {str(e)}"
161
+ )
162
+ logging.exception(
163
+ "Unexpected error during create_manim_video call."
164
+ )
165
  final_video = None
166
  break
167
 
168
+ # Step 4: Display the final result
169
  if final_video and os.path.exists(final_video):
170
  st.success("Video generated successfully!")
171
  st.video(final_video)
172
  st.write("Generated Narration:")
173
+ st.text_area(
174
+ "Narration",
175
+ current_script if current_script else "No narration was generated.",
176
+ height=150,
177
+ )
178
+ elif not final_video and attempt >= max_retries:
179
+ # This message is shown if all retries failed
180
+ st.error("Could not generate the video after multiple attempts.")
181
  elif not final_video:
182
+ # A general failure message
183
+ st.error("Video generation was unsuccessful.")
184
  else:
185
  st.error("Error: Generated video file not found after processing.")
186
  logging.error(f"Final video file '{final_video}' not found.")
187
 
 
 
 
 
 
 
188
  except Exception as e:
189
+ st.error(f"An unexpected and critical error occurred: {str(e)}")
190
  logging.exception("Unhandled exception in main generation block.")
191
  finally:
192
+ # Step 5: Clean up all generated temporary files
193
+ logging.info(f"Cleaning up {len(files_to_cleanup)} temporary files.")
194
+ for f_path in files_to_cleanup:
195
+ if f_path and os.path.exists(f_path):
196
+ try:
197
+ os.remove(f_path)
198
+ logging.info(f"Removed temporary file: {f_path}")
199
+ except OSError as e:
200
+ logging.error(f"Error removing temporary file {f_path}: {e}")
201
+
202
+ st.markdown("<br><br>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
203
  st.markdown("---")
204
+
205
+ st.markdown(
206
+ """
207
  ### Want to help improve this app?
208
  - Give good Manim Examples and make PRs in guide.md, find it in repo [GitHub](https://github.com/mostlykiguess/Manimator)
209
  - Report issues on [GitHub Issues](https://github.com/mostlykiguess/Manimator/issues)
210
  - Email problematic prompts to me
211
+ """
212
+ )
213
+
214
 
215
  if __name__ == "__main__":
216
  main()
src/services/manim_service.py CHANGED
@@ -3,90 +3,153 @@ import subprocess
3
  import os
4
  import glob
5
  import logging
 
 
6
 
7
  def get_scene_name(manim_code):
8
- match = re.search(r'class\s+(\w+)\s*\(\s*Scene\s*\)', manim_code)
 
 
 
 
9
  if match:
10
  return match.group(1)
11
  raise ValueError("No Scene class found in generated code")
12
 
13
- def create_manim_video(video_data, manim_code, audio_file=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  logging.info("Starting to create Manim video")
15
- with open("generated_video.py", "w") as f:
16
- manim_code_clean = re.sub(r"```python", "", manim_code)
17
- manim_code_clean = manim_code_clean.replace("```", "").strip()
18
- f.write(manim_code_clean)
19
-
20
- scene_name = get_scene_name(manim_code_clean)
21
  logging.info(f"Identified scene name: {scene_name}")
22
-
23
  command = ["manim", "-qh", "generated_video.py", scene_name]
24
  logging.info(f"Running Manim with command: {' '.join(command)}")
25
- subprocess.run(command, check=True)
26
-
27
- search_pattern = os.path.join("media", "videos", "generated_video", "1080p60", f"{scene_name}.mp4")
28
- if not os.path.exists(search_pattern):
29
- logging.error(f"No rendered video found at: {search_pattern}")
30
- raise Exception(f"No rendered video found for scene {scene_name}")
31
-
32
- output_video = search_pattern
 
 
 
 
 
 
 
 
 
33
  final_output = "final_output.mp4"
 
34
 
35
  if audio_file and os.path.exists(audio_file):
36
- logging.info(f"Merging video with audio file: {audio_file}")
37
-
38
- video_duration_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration",
39
- "-of", "default=noprint_wrappers=1:nokey=1", output_video]
40
- audio_duration_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration",
41
- "-of", "default=noprint_wrappers=1:nokey=1", audio_file]
42
-
43
- video_duration = float(subprocess.check_output(video_duration_cmd).decode('utf-8').strip())
44
- audio_duration = float(subprocess.check_output(audio_duration_cmd).decode('utf-8').strip())
45
-
46
- logging.info(f"Video duration: {video_duration}s, Audio duration: {audio_duration}s")
47
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  if audio_duration > video_duration:
49
- logging.info("Audio is longer than video, extending video duration")
50
- extended_video = "extended_video.mp4"
51
- padding_time = audio_duration - video_duration
52
-
53
  extend_cmd = [
54
- "ffmpeg", "-y",
55
- "-i", output_video,
56
- "-f", "lavfi", "-i", "color=black:s=1920x1080:r=60",
57
- "-filter_complex", f"[0:v][1:v]concat=n=2:v=1:a=0[outv]",
58
- "-map", "[outv]",
59
- "-c:v", "libx264",
60
- "-t", str(audio_duration),
61
- extended_video
 
62
  ]
63
-
64
  logging.info(f"Extending video with command: {' '.join(extend_cmd)}")
65
- subprocess.run(extend_cmd, check=True)
66
- output_video = extended_video
67
-
68
- merge_cmd = [
69
- "ffmpeg", "-y",
70
- "-i", output_video,
71
- "-i", audio_file,
72
- "-c:v", "copy",
73
- "-c:a", "aac",
74
- "-map", "0:v:0",
75
- "-map", "1:a:0",
76
- final_output
77
- ]
78
-
79
- logging.info(f"Merging with command: {' '.join(merge_cmd)}")
80
- subprocess.run(merge_cmd, check=True)
81
- output_video = final_output
82
-
83
- if os.path.exists("extended_video.mp4"):
84
- os.remove("extended_video.mp4")
85
- logging.info("Removed temporary extended video file")
 
 
 
 
 
 
86
 
 
 
 
87
  if os.path.exists("generated_video.py"):
88
  os.remove("generated_video.py")
89
  logging.info("Removed generated_video.py")
90
 
91
- logging.info(f"Final video created at: {output_video}")
92
- return output_video
 
3
  import os
4
  import glob
5
  import logging
6
+ import platform
7
+
8
 
9
  def get_scene_name(manim_code):
10
+ """Extracts the scene class name from Manim code."""
11
+ # This regex looks for 'class YourSceneName(Scene):' or 'class YourSceneName(ThreeDScene):'
12
+ match = re.search(
13
+ r"class\s+(\w+)\s*\(\s*(?:ThreeD|Multi)?[Ss]cene\s*\)", manim_code
14
+ )
15
  if match:
16
  return match.group(1)
17
  raise ValueError("No Scene class found in generated code")
18
 
19
+
20
+ def sanitize_path_for_ffmpeg(path: str) -> str:
21
+ if platform.system() == "Windows":
22
+ # For Windows
23
+ return path.replace("\\", "\\\\").replace(":", "\\:")
24
+ else:
25
+ # For Linux/macOS
26
+ return (
27
+ path.replace("'", "'\\''")
28
+ .replace(":", "\\:")
29
+ .replace(",", "\\,")
30
+ .replace("[", "\\[")
31
+ .replace("]", "\\]")
32
+ )
33
+
34
+
35
+ def create_manim_video(video_data, manim_code, audio_file=None, subtitle_file=None):
36
  logging.info("Starting to create Manim video")
37
+ with open("generated_video.py", "w", encoding="utf-8") as f:
38
+ f.write(manim_code)
39
+
40
+ scene_name = get_scene_name(manim_code)
 
 
41
  logging.info(f"Identified scene name: {scene_name}")
42
+
43
  command = ["manim", "-qh", "generated_video.py", scene_name]
44
  logging.info(f"Running Manim with command: {' '.join(command)}")
45
+
46
+ # Use capture_output=True to get stderr for better error reporting
47
+ manim_process = subprocess.run(command, check=True, capture_output=True, text=True)
48
+ if manim_process.returncode != 0:
49
+ logging.error(f"Manim failed with stderr:\n{manim_process.stderr}")
50
+ raise subprocess.CalledProcessError(
51
+ manim_process.returncode, command, stderr=manim_process.stderr
52
+ )
53
+
54
+ video_path = os.path.join(
55
+ "media", "videos", "generated_video", "1080p60", f"{scene_name}.mp4"
56
+ )
57
+ if not os.path.exists(video_path):
58
+ logging.error(f"No rendered video found at: {video_path}")
59
+ raise FileNotFoundError(f"No rendered video found for scene {scene_name}")
60
+
61
+ input_video = video_path
62
  final_output = "final_output.mp4"
63
+ extended_video_temp = "extended_video.mp4"
64
 
65
  if audio_file and os.path.exists(audio_file):
66
+ logging.info(f"Audio file found: {audio_file}")
67
+
68
+ video_duration_cmd = [
69
+ "ffprobe",
70
+ "-v",
71
+ "error",
72
+ "-show_entries",
73
+ "format=duration",
74
+ "-of",
75
+ "default=noprint_wrappers=1:nokey=1",
76
+ input_video,
77
+ ]
78
+ audio_duration_cmd = [
79
+ "ffprobe",
80
+ "-v",
81
+ "error",
82
+ "-show_entries",
83
+ "format=duration",
84
+ "-of",
85
+ "default=noprint_wrappers=1:nokey=1",
86
+ audio_file,
87
+ ]
88
+
89
+ video_duration = float(
90
+ subprocess.check_output(video_duration_cmd).decode("utf-8").strip()
91
+ )
92
+ audio_duration = float(
93
+ subprocess.check_output(audio_duration_cmd).decode("utf-8").strip()
94
+ )
95
+
96
+ logging.info(
97
+ f"Video duration: {video_duration}s, Audio duration: {audio_duration}s"
98
+ )
99
+
100
+ # If audio is longer, extend the video with a freeze frame of the last frame
101
  if audio_duration > video_duration:
102
+ logging.info(
103
+ "Audio is longer than video, extending video with freeze frame."
104
+ )
105
+
106
  extend_cmd = [
107
+ "ffmpeg",
108
+ "-y",
109
+ "-i",
110
+ input_video,
111
+ "-vf",
112
+ f"tpad=stop_mode=clone:stop_duration={audio_duration - video_duration}",
113
+ "-c:v",
114
+ "libx264",
115
+ extended_video_temp,
116
  ]
117
+
118
  logging.info(f"Extending video with command: {' '.join(extend_cmd)}")
119
+ subprocess.run(extend_cmd, check=True, capture_output=True, text=True)
120
+ input_video = extended_video_temp # The extended video is now our input
121
+
122
+ # merge
123
+ merge_cmd = ["ffmpeg", "-y", "-i", input_video]
124
+
125
+ if audio_file and os.path.exists(audio_file):
126
+ merge_cmd.extend(["-i", audio_file])
127
+
128
+ filter_complex = []
129
+ maps = ["-map", "0:v:0"]
130
+ if audio_file and os.path.exists(audio_file):
131
+ maps.extend(["-map", "1:a:0"])
132
+
133
+ # Add subtitle
134
+ if subtitle_file and os.path.exists(subtitle_file):
135
+ sanitized_path = sanitize_path_for_ffmpeg(os.path.abspath(subtitle_file))
136
+ filter_complex.append(f"ass='{sanitized_path}'")
137
+
138
+ if filter_complex:
139
+ merge_cmd.extend(["-vf", ",".join(filter_complex)])
140
+
141
+ merge_cmd.extend(maps)
142
+ merge_cmd.extend(["-c:v", "libx264", "-c:a", "aac", "-shortest", final_output])
143
+
144
+ logging.info(f"Merging with final command: {' '.join(merge_cmd)}")
145
+ subprocess.run(merge_cmd, check=True, capture_output=True, text=True)
146
 
147
+ if os.path.exists(extended_video_temp):
148
+ os.remove(extended_video_temp)
149
+ logging.info("Removed temporary extended video file.")
150
  if os.path.exists("generated_video.py"):
151
  os.remove("generated_video.py")
152
  logging.info("Removed generated_video.py")
153
 
154
+ logging.info(f"Final video created at: {final_output}")
155
+ return final_output
src/services/subtitle_service.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict
3
+
4
+
5
+ def _ass_time(t: float) -> str:
6
+ """Helper function to format time for ASS files (H:MM:SS.cs)."""
7
+ if t < 0:
8
+ t = 0
9
+ h = int(t // 3600)
10
+ m = int((t % 3600) // 60)
11
+ s = int(t % 60)
12
+ cs = int((t - int(t)) * 100)
13
+ return f"{h}:{m:02}:{s:02}.{cs:02}"
14
+
15
+
16
+ def generate_subtitle_file(
17
+ tokens_with_timestamps: List[Dict], output_audio_path: str
18
+ ) -> str:
19
+ """
20
+ Generates an ASS subtitle file from tokens with absolute timestamps.
21
+
22
+ Args:
23
+ tokens_with_timestamps (list): A list of token dictionaries with 'text', 'start', and 'end' keys.
24
+ output_audio_path (str): The path to the audio file, used to name the subtitle file.
25
+
26
+ Returns:
27
+ str: The path to the generated subtitle file.
28
+ """
29
+ subtitle_file_path = os.path.splitext(output_audio_path)[0] + ".ass"
30
+
31
+ with open(subtitle_file_path, "w", encoding="utf-8") as f:
32
+ # Write standard ASS header
33
+ f.write("[Script Info]\n")
34
+ f.write("Title: Generated by Manimator\n")
35
+ f.write("ScriptType: v4.00+\n\n")
36
+ f.write("[V4+ Styles]\n")
37
+ f.write(
38
+ "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n"
39
+ )
40
+ f.write(
41
+ "Style: Default,Arial,24,&H00FFFFFF,&H000000FF,&H003C3C3C,&H00000000,0,0,0,0,100,100,0,0,1,1.5,1,2,10,10,15,1\n\n"
42
+ )
43
+ f.write("[Events]\n")
44
+ f.write(
45
+ "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"
46
+ )
47
+
48
+ # Write dialogue entries word-by-word
49
+ for token in tokens_with_timestamps:
50
+ start_time = token.get("start")
51
+ end_time = token.get("end")
52
+ text = token.get("text", "").strip()
53
+
54
+ if (
55
+ start_time is not None
56
+ and end_time is not None
57
+ and text
58
+ and end_time > start_time
59
+ ):
60
+ start_formatted = _ass_time(start_time)
61
+ end_formatted = _ass_time(end_time)
62
+ text = text.replace(",", "\\,")
63
+ f.write(
64
+ f"Dialogue: 0,{start_formatted},{end_formatted},Default,,0,0,0,,{text}\n"
65
+ )
66
+
67
+ return subtitle_file_path
src/services/tts_service.py CHANGED
@@ -1,44 +1,75 @@
1
  from kokoro import KPipeline
2
  import soundfile as sf
3
  import os
4
- from typing import Optional
5
-
6
- class TTSService:
7
- def __init__(self, lang_code: str = 'a'):
8
- """Initialize the TTS service with Kokoro"""
9
- self.pipeline = KPipeline(lang_code=lang_code)
10
- self.voice_presets = {
11
- 'en-us': 'af_heart', # American English
12
- 'en-uk': 'bf_heart', # British English
13
- 'es': 'es_heart', # Spanish
14
- 'fr': 'fr_heart', # French
15
- 'hi': 'hi_heart', # Hindi
16
- 'it': 'it_heart', # Italian
17
- 'pt-br': 'pt_heart', # Brazilian Portuguese
18
- 'ja': 'ja_heart', # Japanese
19
- 'zh': 'zh_heart', # Mandarin Chinese
20
- }
21
-
22
- def generate(self, text: str, voice: str = 'en-us', output_path: Optional[str] = None) -> str:
23
- if not text:
24
- raise ValueError("Text cannot be empty")
25
-
26
- if voice not in self.voice_presets:
27
- raise ValueError(f"Unsupported voice: {voice}. Available voices: {list(self.voice_presets.keys())}")
28
-
29
- if output_path is None:
30
- output_path = f'output_{voice}.wav'
31
-
32
- generator = self.pipeline(text, voice=self.voice_presets[voice], speed=1, split_pattern=r'\n+')
33
- audio_data = []
34
- for _, _, audio in generator:
35
- audio_data.extend(audio)
36
-
37
- sf.write(output_path, audio_data, 24000)
38
-
39
- return output_path
40
-
41
- def generate_audio(text: str, voice: str = 'en-us') -> str:
42
- """Generate audio from text using Kokoro TTS"""
43
- service = TTSService()
44
- return service.generate(text, voice)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from kokoro import KPipeline
2
  import soundfile as sf
3
  import os
4
+ import numpy as np
5
+ from typing import Optional, Tuple, List, Dict
6
+ from .subtitle_service import generate_subtitle_file
7
+
8
+
9
+ def generate_audio(
10
+ text: str, voice_lang: str = "a", output_filename: str = "output_audio.wav"
11
+ ) -> Tuple[Optional[str], Optional[str]]:
12
+ """
13
+ Generate audio from text using Kokoro TTS and create a synchronized subtitle file.
14
+
15
+ Args:
16
+ text (str): The text to synthesize.
17
+ voice_lang (str): The language code for the voice (e.g., 'a' for American English).
18
+ output_filename (str): The desired output filename for the audio.
19
+
20
+ Returns:
21
+ A tuple containing the path to the audio file and the subtitle file, or (None, None) on failure.
22
+ """
23
+ if not text.strip():
24
+ raise ValueError("Text for TTS cannot be empty.")
25
+
26
+ try:
27
+ pipeline = KPipeline(lang_code=voice_lang)
28
+ voice_preset = "af_heart"
29
+
30
+ audio_segments = []
31
+ all_tokens: List[Dict] = []
32
+ current_time_offset = 0.0
33
+ rate = 24000
34
+
35
+ for result in pipeline(
36
+ text, voice=voice_preset, speed=1.0, split_pattern=r"\n+"
37
+ ):
38
+ audio_segments.append(result.audio)
39
+
40
+ chunk_duration = len(result.audio) / rate
41
+
42
+ if hasattr(result, "tokens"):
43
+ for token in result.tokens:
44
+ start_ts = token.start_ts if token.start_ts is not None else 0
45
+ end_ts = (
46
+ token.end_ts if token.end_ts is not None else chunk_duration
47
+ )
48
+
49
+ all_tokens.append(
50
+ {
51
+ "text": token.text.strip(),
52
+ "start": current_time_offset + start_ts,
53
+ "end": current_time_offset + end_ts,
54
+ }
55
+ )
56
+
57
+ current_time_offset += chunk_duration
58
+
59
+ if not audio_segments:
60
+ return None, None
61
+
62
+ final_audio = np.concatenate(audio_segments)
63
+ sf.write(output_filename, final_audio, rate)
64
+
65
+ subtitle_file_path = generate_subtitle_file(all_tokens, output_filename)
66
+
67
+ return output_filename, subtitle_file_path
68
+
69
+ except Exception as e:
70
+ logging.error(
71
+ f"An error occurred during TTS or subtitle generation: {e}", exc_info=True
72
+ )
73
+ if os.path.exists(output_filename):
74
+ os.remove(output_filename)
75
+ return None, None
src/tests/test_fallback.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from src.api.fallback_gemini import fix_manim_code
4
+
5
+ class TestFallbackOnly(unittest.TestCase):
6
+ def test_fallback_with_broken_code(self):
7
+ broken_code = "from manim import *\nclass Broken(Scene):\n def construct(self):\n self.play(Write(Text('Oops!'))"
8
+ error_message = "SyntaxError: unexpected EOF while parsing"
9
+ original_context = "Test fallback with broken code"
10
+ fixed_video_data, fixed_script = fix_manim_code(
11
+ faulty_code=broken_code,
12
+ error_message=error_message,
13
+ original_context=original_context
14
+ )
15
+ print("Fixed video data:", fixed_video_data)
16
+ print("Fixed script:", fixed_script)
17
+ self.assertTrue(fixed_video_data is not None or fixed_script is None)
18
+
19
+ if __name__ == "__main__":
20
+ unittest.main()