File size: 10,473 Bytes
d970572
 
 
 
 
 
 
2e31ab2
d970572
 
 
2e31ab2
 
 
d970572
2e31ab2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d970572
 
2e31ab2
 
 
 
 
 
 
d970572
2e31ab2
d970572
2e31ab2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d970572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f10bf2f
d970572
 
2e31ab2
 
 
 
d970572
 
 
 
f10bf2f
d970572
 
 
 
 
2e31ab2
 
d970572
 
 
2e31ab2
 
 
d970572
 
 
 
 
 
 
 
 
 
 
 
 
2e31ab2
 
 
 
f10bf2f
d970572
c4a80a5
2e31ab2
d970572
 
 
 
 
 
2e31ab2
 
 
 
 
 
 
 
d970572
 
2e31ab2
 
d970572
2e31ab2
 
 
 
 
 
 
d970572
 
2e31ab2
 
 
 
 
 
 
 
 
 
 
 
d970572
2e31ab2
 
 
 
 
 
d970572
2e31ab2
 
 
d970572
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import re
from google import genai
from google.genai import types as genai_types
from dotenv import load_dotenv
import os
import pathlib
import logging
from pydantic import BaseModel

load_dotenv()

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


class ManimOutput(BaseModel):
    manim_code: str
    narration: str


SYSTEM_PROMPT = """You are an expert Manim programmer specializing in creating visually striking 60-second animations based on user prompts or documents, strictly following Manim Community v0.19.0 standards. Your output MUST be a JSON object conforming to the provided schema.

CRITICAL TIMING REQUIREMENTS:
- **Total Duration:** Exactly 60 seconds (1 minute)
- **Narration:** Exactly 150-160 words (average speaking pace: 2.5 words per second)
- **Animation Structure:** Use this timing framework:
  * Introduction: 8-10 seconds
  * Main content: 40-45 seconds (3-4 major segments)
  * Conclusion/summary: 7-10 seconds
- **Synchronization:** Each narration sentence should correspond to 3-5 seconds of animation

Core Requirements:
- **API Version:** Use only Manim Community v0.19.0 API
- **Vectors & Math:** Use 3D vectors (np.array([x, y, 0])) and ensure correct math operations
- **Matrix Visualization:** Use MathTex for matrices: r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}'
- **Star Usage:** Use Star(n=5, ...) not n_points
- **Error Prevention:** Always validate Scene class exists; avoid 3D scenes
- **Visual Style:** Create vibrant, dynamic animations with smooth transitions
- **Output Format:** JSON with "manim_code" and "narration" keys
"""
# Detailed Instructions
base_prompt_instructions = (
    "\nSTRICT TIMING REQUIREMENTS:"
    "\n1. **Video Duration:** Exactly 60 seconds total"
    "\n2. **Narration Constraints:**"
    "\n   - Exactly 150-160 words (no more, no less)"
    "\n   - Speaking pace: 2.5 words per second"
    "\n   - Use short, clear sentences (8-12 words each)"
    "\n   - Include natural pauses between major concepts"
    "\n3. **Animation Timing Structure:**"
    "\n   - Use self.wait() to match narration pauses"
    "\n   - run_time in self.play() should match sentence duration"
    "\n   - Fade out elements after 3-5 seconds to avoid clutter"
    "\n   - Example timing: self.play(Create(obj), run_time=3), self.wait(1)"
    "\nTECHNICAL REQUIREMENTS:"
    "\n4. Use only Manim Community v0.19.0 API"
    "\n5. Vector operations (3D vectors): np.array([x, y, 0])"
    "\n6. Matrix display: MathTex(r'\\begin{bmatrix} a & b \\\\ c & d \\end{bmatrix}')"
    "\n7. Verified methods only: Create(), Write(), Transform(), FadeIn(), FadeOut(), "
    "\n   Add(), Remove(), MoveAlongPath(), Rotating(), Circumscribe(), Indicate(), "
    "\n   FocusOn(), Shift(), Scale(), MoveTo(), NextTo(), Axes(), Plot(), LineGraph(), "
    "\n   BarChart(), Dot(), Line(), Arrow(), Text(), Tex(), MathTex(), VGroup()"
    "\n8. Star shapes: Star(n=5, ...) not n_points"
    "\n9. NO image imports or 3D scenes"
    "\n10. There is no .to_center() method so please don't use that"
    "\nVISUAL & CONTENT GUIDELINES:"
    "\n10. Create 4-5 distinct visual segments matching narration flow"
    "\n11. Use vibrant colors and smooth transitions"
    "\n12. Fade out text/objects when no longer needed"
    "\n13. Include interactive elements: arrows, labels, highlights"
    "\n14. Validate all objects before animation calls"
    "\n15. Use longer run_times (4-6s) for complex animations, shorter (2-3s) for simple ones"
    "\nCODE STRUCTURE TEMPLATE:"
    "\n16. Always follow this timing pattern:"
    "\n    ```python"
    "\n    class VideoScene(Scene):"
    "\n        def construct(self):"
    "\n            # Intro (8-10s): Title + brief setup"
    "\n            title = Text('Title')"
    "\n            self.play(Write(title), run_time=3)"
    "\n            self.wait(2)  # Pause for narration"
    "\n            self.play(FadeOut(title), run_time=2)"
    "\n            "
    "\n            # Main content (40-45s): 3-4 segments"
    "\n            # Segment 1 (10-12s)"
    "\n            # Segment 2 (10-12s)  "
    "\n            # Segment 3 (10-12s)"
    "\n            # Segment 4 (8-10s)"
    "\n            "
    "\n            # Conclusion (7-10s): Summary + fade out"
    "\n    ```"
    "\nNARRATION STRUCTURE:"
    "\n17. Follow this word count breakdown:"
    "\n    - Introduction: 15-25 words (8-10 seconds)"
    "\n    - Main content: 70-85 words (36-40 seconds)"
    "\n    - Conclusion: 20-25 words (8-10 seconds)"
    "\n    - Natural pauses: 3-5 seconds total"
    "\n18. Use active voice, present tense"
    "\n19. Include transition phrases: 'Now let's see...', 'Next, we'll explore...'"
    "\n20. End with a strong concluding statement"
    "\nQUALITY ASSURANCE:"
    "\n21. Count words in narration before finalizing (must be 120-150)"
    "\n22. Calculate total animation time (self.play + self.wait = 60s)"
    "\n23. Ensure Scene class exists and imports are correct"
    "\n24. Test that all animation objects are valid before use"
    "\n25. No broadcasting errors in vector operations"
    "\n26. Distinct start/end points for arrows to prevent normalization errors"
)


def load_manim_examples():
    guide_path = pathlib.Path(__file__).parent / "guide.md"
    if not guide_path.exists():
        logging.warning(f"Manim examples guide not found at {guide_path}")
        return ""
    logging.info(f"Loading Manim examples from {guide_path}")
    return guide_path.read_text(encoding="utf-8")


def generate_video(idea: str | None = None, pdf_path: str | None = None):
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
        logging.error("GEMINI_API_KEY not found in environment variables")
        raise Exception("GEMINI_API_KEY not found in environment variables")
    if not idea and not pdf_path:
        raise ValueError("Either an idea or a pdf_path must be provided.")
    if idea and pdf_path:
        logging.warning("Both idea and pdf_path provided. Using pdf_path.")
        idea = None

    client = genai.Client(api_key=api_key)
    contents = []

    manim_examples = load_manim_examples()
    if manim_examples:
        examples_prompt = (
            "Below are examples of Manim code that demonstrate proper usage patterns. Use these as reference when generating your animation:\n\n"
            + manim_examples
        )
        contents.append(examples_prompt)
        logging.info("Added Manim examples from guide.md to prime the model")
    else:
        logging.warning("No Manim examples were loaded from guide.md")

    user_prompt_text = ""

    if pdf_path:
        pdf_file_path = pathlib.Path(pdf_path)
        if not pdf_file_path.exists():
            logging.error(f"PDF file not found at: {pdf_path}")
            raise FileNotFoundError(f"PDF file not found at: {pdf_path}")

        logging.info(f"Reading PDF: {pdf_path}")
        pdf_data = pdf_file_path.read_bytes()
        pdf_part = genai_types.Part.from_bytes(
            data=pdf_data, mime_type="application/pdf"
        )
        contents.append(pdf_part)

        user_prompt_text = f"Create a 30-second Manim video script summarizing the key points or illustrating a core concept from the provided PDF document. {base_prompt_instructions}"
        contents.append(user_prompt_text)

    elif idea:
        logging.info(f"Generating video based on idea: {idea[:50]}...")
        user_prompt_text = f"Create a 30-second Manim video script about '{idea}'. {base_prompt_instructions}"
        contents.append(user_prompt_text)

    logging.info("Sending request to Gemini API...")
    try:
        generation_config = genai_types.GenerateContentConfig(
            response_mime_type="application/json",
            response_schema=ManimOutput,
            system_instruction=SYSTEM_PROMPT,
        )

        response = client.models.generate_content(
            model="gemini-2.5-pro", contents=contents, config=generation_config
        )
    except Exception as e:
        logging.exception(f"Error calling Gemini API: {e}")
        raise Exception(f"Error calling Gemini API: {e}")

    if response:
        try:
            parsed_output = response.parsed
            if not parsed_output or not isinstance(parsed_output, ManimOutput):
                logging.error("Failed to parse structured output from Gemini.")
                raise Exception("Failed to parse structured output from Gemini.")

            manim_code = parsed_output.manim_code
            narration = parsed_output.narration
            logging.info("Successfully parsed structured output from Gemini.")

            if "from manim import *" not in manim_code:
                logging.warning("Adding missing 'from manim import *'.")
                manim_code = "from manim import *\nimport numpy as np\n" + manim_code
            elif "import numpy as np" not in manim_code:
                logging.warning("Adding missing 'import numpy as np'.")
                lines = manim_code.splitlines()
                for i, line in enumerate(lines):
                    if "from manim import *" in line:
                        lines.insert(i + 1, "import numpy as np")
                        manim_code = "\n".join(lines)
                        break

            return {"manim_code": manim_code, "output_file": "output.mp4"}, narration
        except (ValueError, AttributeError) as e:
            logging.warning(
                f"Could not parse the response. Error: {e}. Response details:"
            )
            logging.warning(response)
            if response.prompt_feedback and response.prompt_feedback.block_reason:
                logging.error(
                    f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
                )
                raise Exception(
                    f"Content generation blocked. Reason: {response.prompt_feedback.block_reason.name}"
                )
            else:
                logging.error(
                    "Failed to generate content. The response was empty or malformed."
                )
                raise Exception(
                    "Failed to generate content. The response was empty or malformed."
                )
    else:
        logging.error(
            "Error generating video content. No response received from Gemini."
        )
        raise Exception("Error generating video content. No response received.")