File size: 10,473 Bytes
d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 f10bf2f d970572 2e31ab2 d970572 f10bf2f d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 f10bf2f d970572 c4a80a5 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 2e31ab2 d970572 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
import re
from google import genai
from google.genai import types as genai_types
from dotenv import load_dotenv
import os
import pathlib
import logging
from pydantic import BaseModel
load_dotenv()
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
class ManimOutput(BaseModel):
manim_code: str
narration: str
SYSTEM_PROMPT = """You are an expert Manim programmer specializing in creating visually striking 60-second animations based on user prompts or documents, strictly following Manim Community v0.19.0 standards. Your output MUST be a JSON object conforming to the provided schema.
CRITICAL TIMING REQUIREMENTS:
- **Total Duration:** Exactly 60 seconds (1 minute)
- **Narration:** Exactly 150-160 words (average speaking pace: 2.5 words per second)
- **Animation Structure:** Use this timing framework:
* Introduction: 8-10 seconds
* Main content: 40-45 seconds (3-4 major segments)
* Conclusion/summary: 7-10 seconds
- **Synchronization:** Each narration sentence should correspond to 3-5 seconds of animation
Core Requirements:
- **API Version:** Use only Manim Community v0.19.0 API
- **Vectors & Math:** Use 3D vectors (np.array([x, y, 0])) and ensure correct math operations
- **Matrix Visualization:** Use MathTex for matrices: r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'
- **Star Usage:** Use Star(n=5, ...) not n_points
- **Error Prevention:** Always validate Scene class exists; avoid 3D scenes
- **Visual Style:** Create vibrant, dynamic animations with smooth transitions
- **Output Format:** JSON with "manim_code" and "narration" keys
"""
# Detailed Instructions
base_prompt_instructions = (
"\nSTRICT TIMING REQUIREMENTS:"
"\n1. **Video Duration:** Exactly 60 seconds total"
"\n2. **Narration Constraints:**"
"\n - Exactly 150-160 words (no more, no less)"
"\n - Speaking pace: 2.5 words per second"
"\n - Use short, clear sentences (8-12 words each)"
"\n - Include natural pauses between major concepts"
"\n3. **Animation Timing Structure:**"
"\n - Use self.wait() to match narration pauses"
"\n - run_time in self.play() should match sentence duration"
"\n - Fade out elements after 3-5 seconds to avoid clutter"
"\n - Example timing: self.play(Create(obj), run_time=3), self.wait(1)"
"\nTECHNICAL REQUIREMENTS:"
"\n4. Use only Manim Community v0.19.0 API"
"\n5. Vector operations (3D vectors): np.array([x, y, 0])"
"\n6. Matrix display: MathTex(r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}')"
"\n7. Verified methods only: Create(), Write(), Transform(), FadeIn(), FadeOut(), "
"\n Add(), Remove(), MoveAlongPath(), Rotating(), Circumscribe(), Indicate(), "
"\n FocusOn(), Shift(), Scale(), MoveTo(), NextTo(), Axes(), Plot(), LineGraph(), "
"\n BarChart(), Dot(), Line(), Arrow(), Text(), Tex(), MathTex(), VGroup()"
"\n8. Star shapes: Star(n=5, ...) not n_points"
"\n9. NO image imports or 3D scenes"
"\n10. There is no .to_center() method so please don't use that"
"\nVISUAL & CONTENT GUIDELINES:"
"\n10. Create 4-5 distinct visual segments matching narration flow"
"\n11. Use vibrant colors and smooth transitions"
"\n12. Fade out text/objects when no longer needed"
"\n13. Include interactive elements: arrows, labels, highlights"
"\n14. Validate all objects before animation calls"
"\n15. Use longer run_times (4-6s) for complex animations, shorter (2-3s) for simple ones"
"\nCODE STRUCTURE TEMPLATE:"
"\n16. Always follow this timing pattern:"
"\n ```python"
"\n class VideoScene(Scene):"
"\n def construct(self):"
"\n # Intro (8-10s): Title + brief setup"
"\n title = Text('Title')"
"\n self.play(Write(title), run_time=3)"
"\n self.wait(2) # Pause for narration"
"\n self.play(FadeOut(title), run_time=2)"
"\n "
"\n # Main content (40-45s): 3-4 segments"
"\n # Segment 1 (10-12s)"
"\n # Segment 2 (10-12s) "
"\n # Segment 3 (10-12s)"
"\n # Segment 4 (8-10s)"
"\n "
"\n # Conclusion (7-10s): Summary + fade out"
"\n ```"
"\nNARRATION STRUCTURE:"
"\n17. Follow this word count breakdown:"
"\n - Introduction: 15-25 words (8-10 seconds)"
"\n - Main content: 70-85 words (36-40 seconds)"
"\n - Conclusion: 20-25 words (8-10 seconds)"
"\n - Natural pauses: 3-5 seconds total"
"\n18. Use active voice, present tense"
"\n19. Include transition phrases: 'Now let's see...', 'Next, we'll explore...'"
"\n20. End with a strong concluding statement"
"\nQUALITY ASSURANCE:"
"\n21. Count words in narration before finalizing (must be 120-150)"
"\n22. Calculate total animation time (self.play + self.wait = 60s)"
"\n23. Ensure Scene class exists and imports are correct"
"\n24. Test that all animation objects are valid before use"
"\n25. No broadcasting errors in vector operations"
"\n26. Distinct start/end points for arrows to prevent normalization errors"
)
def load_manim_examples():
guide_path = pathlib.Path(__file__).parent / "guide.md"
if not guide_path.exists():
logging.warning(f"Manim examples guide not found at {guide_path}")
return ""
logging.info(f"Loading Manim examples from {guide_path}")
return guide_path.read_text(encoding="utf-8")
def generate_video(idea: str | None = None, pdf_path: str | None = None):
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
logging.error("GEMINI_API_KEY not found in environment variables")
raise Exception("GEMINI_API_KEY not found in environment variables")
if not idea and not pdf_path:
raise ValueError("Either an idea or a pdf_path must be provided.")
if idea and pdf_path:
logging.warning("Both idea and pdf_path provided. Using pdf_path.")
idea = None
client = genai.Client(api_key=api_key)
contents = []
manim_examples = load_manim_examples()
if manim_examples:
examples_prompt = (
"Below are examples of Manim code that demonstrate proper usage patterns. Use these as reference when generating your animation:\n\n"
+ manim_examples
)
contents.append(examples_prompt)
logging.info("Added Manim examples from guide.md to prime the model")
else:
logging.warning("No Manim examples were loaded from guide.md")
user_prompt_text = ""
if pdf_path:
pdf_file_path = pathlib.Path(pdf_path)
if not pdf_file_path.exists():
logging.error(f"PDF file not found at: {pdf_path}")
raise FileNotFoundError(f"PDF file not found at: {pdf_path}")
logging.info(f"Reading PDF: {pdf_path}")
pdf_data = pdf_file_path.read_bytes()
pdf_part = genai_types.Part.from_bytes(
data=pdf_data, mime_type="application/pdf"
)
contents.append(pdf_part)
user_prompt_text = f"Create a 30-second Manim video script summarizing the key points or illustrating a core concept from the provided PDF document. {base_prompt_instructions}"
contents.append(user_prompt_text)
elif idea:
logging.info(f"Generating video based on idea: {idea[:50]}...")
user_prompt_text = f"Create a 30-second Manim video script about '{idea}'. {base_prompt_instructions}"
contents.append(user_prompt_text)
logging.info("Sending request to Gemini API...")
try:
generation_config = genai_types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=ManimOutput,
system_instruction=SYSTEM_PROMPT,
)
response = client.models.generate_content(
model="gemini-2.5-pro", contents=contents, config=generation_config
)
except Exception as e:
logging.exception(f"Error calling Gemini API: {e}")
raise Exception(f"Error calling Gemini API: {e}")
if response:
try:
parsed_output = response.parsed
if not parsed_output or not isinstance(parsed_output, ManimOutput):
logging.error("Failed to parse structured output from Gemini.")
raise Exception("Failed to parse structured output from Gemini.")
manim_code = parsed_output.manim_code
narration = parsed_output.narration
logging.info("Successfully parsed structured output from Gemini.")
if "from manim import *" not in manim_code:
logging.warning("Adding missing 'from manim import *'.")
manim_code = "from manim import *\nimport numpy as np\n" + manim_code
elif "import numpy as np" not in manim_code:
logging.warning("Adding missing 'import numpy as np'.")
lines = manim_code.splitlines()
for i, line in enumerate(lines):
if "from manim import *" in line:
lines.insert(i + 1, "import numpy as np")
manim_code = "\n".join(lines)
break
return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
except (ValueError, AttributeError) as e:
logging.warning(
f"Could not parse the response. Error: {e}. Response details:"
)
logging.warning(response)
if response.prompt_feedback and response.prompt_feedback.block_reason:
logging.error(
f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
)
raise Exception(
f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
)
else:
logging.error(
"Failed to generate content. The response was empty or malformed."
)
raise Exception(
"Failed to generate content. The response was empty or malformed."
)
else:
logging.error(
"Error generating video content. No response received from Gemini."
)
raise Exception("Error generating video content. No response received.")
|