Spaces:
Runtime error
Runtime error
File size: 22,017 Bytes
d9486d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 |
import os
import re
import subprocess
import asyncio
from PIL import Image
from typing import Optional, List
import traceback
import sys
from src.core.parse_video import (
get_images_from_video,
image_with_most_non_black_space
)
from mllm_tools.vertex_ai import VertexAIWrapper
from mllm_tools.gemini import GeminiWrapper
class VideoRenderer:
"""Class for rendering and combining Manim animation videos."""
def __init__(self, output_dir="output", print_response=False, use_visual_fix_code=False):
"""Initialize the VideoRenderer.
Args:
output_dir (str, optional): Directory for output files. Defaults to "output".
print_response (bool, optional): Whether to print responses. Defaults to False.
use_visual_fix_code (bool, optional): Whether to use visual fix code. Defaults to False.
"""
self.output_dir = output_dir
self.print_response = print_response
self.use_visual_fix_code = use_visual_fix_code
async def render_scene(self, code: str, file_prefix: str, curr_scene: int, curr_version: int, code_dir: str, media_dir: str, max_retries: int = 3, use_visual_fix_code=False, visual_self_reflection_func=None, banned_reasonings=None, scene_trace_id=None, topic=None, session_id=None):
"""Render a single scene and handle error retries and visual fixes.
Args:
code (str): The Manim code to render
file_prefix (str): Prefix for output files
curr_scene (int): Current scene number
curr_version (int): Current version number
code_dir (str): Directory for code files
media_dir (str): Directory for media output
max_retries (int, optional): Maximum retry attempts. Defaults to 3.
use_visual_fix_code (bool, optional): Whether to use visual fix code. Defaults to False.
visual_self_reflection_func (callable, optional): Function for visual self-reflection. Defaults to None.
banned_reasonings (list, optional): List of banned reasoning strings. Defaults to None.
scene_trace_id (str, optional): Scene trace identifier. Defaults to None.
topic (str, optional): Topic name. Defaults to None.
session_id (str, optional): Session identifier. Defaults to None.
Returns:
tuple: (code, error_message) where error_message is None on success
"""
retries = 0
while retries < max_retries:
try:
# Execute manim in a thread to prevent blocking
file_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py")
result = await asyncio.to_thread(
subprocess.run,
["manim", "-qh", file_path, "--media_dir", media_dir, "--progress_bar", "none"],
capture_output=True,
text=True
)
# if result.returncode != 0, it means that the code is not rendered successfully
# so we need to fix the code by returning the code and the error message
if result.returncode != 0:
raise Exception(result.stderr)
if use_visual_fix_code and visual_self_reflection_func and banned_reasonings:
# Get the rendered video path
video_path = os.path.join(
media_dir,
"videos",
f"{file_prefix}_scene{curr_scene}_v{curr_version}.mp4"
)
# For Gemini/Vertex AI models, pass the video directly
if self.scene_model.model_name.startswith(('gemini/', 'vertex_ai/')):
media_input = video_path
else:
# For other models, use image snapshot
media_input = self.create_snapshot_scene(
topic, curr_scene, curr_version, return_type="path"
)
new_code, log = visual_self_reflection_func(
code,
media_input,
scene_trace_id=scene_trace_id,
topic=topic,
scene_number=curr_scene,
session_id=session_id
)
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_vfix_log.txt"), "w") as f:
f.write(log)
# Check for termination markers
if "<LGTM>" in new_code or any(word in new_code for word in banned_reasonings):
break
code = new_code
curr_version += 1
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py"), "w") as f:
f.write(code)
print(f"Code saved to scene{curr_scene}/code/{file_prefix}_scene{curr_scene}_v{curr_version}.py")
retries = 0
continue
break # Exit retry loop on success
except Exception as e:
print(f"Error: {e}")
print(f"Retrying {retries+1} of {max_retries}...")
with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_error.log"), "a") as f:
f.write(f"\nError in attempt {retries}:\n{str(e)}\n")
retries += 1
return code, str(e) # Indicate failure and return error message
print(f"Successfully rendered {file_path}")
with open(os.path.join(self.output_dir, file_prefix, f"scene{curr_scene}", "succ_rendered.txt"), "w") as f:
f.write("")
return code, None # Indicate success
def run_manim_process(self,
topic: str):
"""Run manim on all generated manim code for a specific topic.
Args:
topic (str): Topic name to process
Returns:
subprocess.CompletedProcess: Result of the final manim process
"""
file_prefix = topic.lower()
file_prefix = re.sub(r'[^a-z0-9_]+', '_', file_prefix)
search_path = os.path.join(self.output_dir, file_prefix)
# Iterate through scene folders
scene_folders = [f for f in os.listdir(search_path) if os.path.isdir(os.path.join(search_path, f))]
scene_folders.sort() # Sort to process scenes in order
for folder in scene_folders:
folder_path = os.path.join(search_path, folder)
# Get all Python files in version order
py_files = [f for f in os.listdir(folder_path) if f.endswith('.py')]
py_files.sort(key=lambda x: int(x.split('_v')[-1].split('.')[0])) # Sort by version number
for file in py_files:
file_path = os.path.join(folder_path, file)
try:
media_dir = os.path.join(self.output_dir, file_prefix, "media")
result = subprocess.run(
f"manim -qh {file_path} --media_dir {media_dir}",
shell=True,
capture_output=True,
text=True
)
if result.returncode != 0:
raise Exception(result.stderr)
print(f"Successfully rendered {file}")
break # Move to next scene folder if successful
except Exception as e:
print(f"Error rendering {file}: {e}")
error_log_path = os.path.join(folder_path, f"{file.split('.')[0]}_error.log") # drop the extra py
with open(error_log_path, "w") as f:
f.write(f"Error:\n{str(e)}\n")
print(f"Error log saved to {error_log_path}")
return result
def create_snapshot_scene(self, topic: str, scene_number: int, version_number: int, return_type: str = "image"):
"""Create a snapshot of the video for a specific topic and scene.
Args:
topic (str): Topic name
scene_number (int): Scene number
version_number (int): Version number
return_type (str, optional): Type of return value - "path" or "image". Defaults to "image".
Returns:
Union[str, PIL.Image]: Path to saved image or PIL Image object
Raises:
FileNotFoundError: If no mp4 files found in video folder
"""
file_prefix = topic.lower()
file_prefix = re.sub(r'[^a-z0-9_]+', '_', file_prefix)
search_path = os.path.join(self.output_dir, file_prefix)
video_folder_path = os.path.join(search_path, "media", "videos", f"{file_prefix}_scene{scene_number}_v{version_number}", "1080p60")
os.makedirs(video_folder_path, exist_ok=True)
snapshot_path = os.path.join(video_folder_path, "snapshot.png")
# Get the mp4 video file from the video folder path
video_files = [f for f in os.listdir(video_folder_path) if f.endswith('.mp4')]
if not video_files:
raise FileNotFoundError(f"No mp4 files found in {video_folder_path}")
video_path = os.path.join(video_folder_path, video_files[0])
saved_image = image_with_most_non_black_space(get_images_from_video(video_path), snapshot_path, return_type=return_type)
return saved_image
def combine_videos(self, topic: str):
"""Combine all videos and subtitle files for a specific topic using ffmpeg.
Args:
topic (str): Topic name to combine videos for
This function will:
- Find all scene videos and subtitles
- Combine videos with or without audio
- Merge subtitle files with correct timing
- Save combined video and subtitles to output directory
"""
file_prefix = topic.lower()
file_prefix = re.sub(r'[^a-z0-9_]+', '_', file_prefix)
search_path = os.path.join(self.output_dir, file_prefix, "media", "videos")
# Create output directory if it doesn't exist
video_output_dir = os.path.join(self.output_dir, file_prefix)
os.makedirs(video_output_dir, exist_ok=True)
output_video_path = os.path.join(video_output_dir, f"{file_prefix}_combined.mp4")
output_srt_path = os.path.join(video_output_dir, f"{file_prefix}_combined.srt")
if os.path.exists(output_video_path) and os.path.exists(output_srt_path):
print(f"Combined video and subtitles already exist at {output_video_path}, not combining again.")
return
# Get scene count from outline
scene_outline_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt")
if not os.path.exists(scene_outline_path):
print(f"Warning: Scene outline file not found at {scene_outline_path}. Cannot determine scene count.")
return
with open(scene_outline_path) as f:
plan = f.read()
scene_outline = re.search(r'(<SCENE_OUTLINE>.*?</SCENE_OUTLINE>)', plan, re.DOTALL).group(1)
scene_count = len(re.findall(r'<SCENE_(\d+)>[^<]', scene_outline))
# Find all scene folders and videos
scene_folders = []
for root, dirs, files in os.walk(search_path):
for dir in dirs:
if dir.startswith(file_prefix + "_scene"):
scene_folders.append(os.path.join(root, dir))
scene_videos = []
scene_subtitles = []
for scene_num in range(1, scene_count + 1):
folders = [f for f in scene_folders if int(f.split("scene")[-1].split("_")[0]) == scene_num]
if not folders:
print(f"Warning: Missing scene {scene_num}")
continue
folders.sort(key=lambda f: int(f.split("_v")[-1]))
folder = folders[-1]
video_found = False
subtitles_found = False
for filename in os.listdir(os.path.join(folder, "1080p60")):
if filename.endswith('.mp4'):
scene_videos.append(os.path.join(folder, "1080p60", filename))
video_found = True
elif filename.endswith('.srt'):
scene_subtitles.append(os.path.join(folder, "1080p60", filename))
subtitles_found = True
if not video_found:
print(f"Warning: Missing video for scene {scene_num}")
if not subtitles_found:
scene_subtitles.append(None)
if len(scene_videos) != scene_count:
print("Not all videos/subtitles are found, aborting video combination.")
return
try:
import ffmpeg # You might need to install ffmpeg-python package: pip install ffmpeg-python
from tqdm import tqdm
print("Analyzing video streams...")
# Check if videos have audio streams
has_audio = []
for video in tqdm(scene_videos, desc="Checking audio streams"):
probe = ffmpeg.probe(video)
audio_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'audio']
has_audio.append(len(audio_streams) > 0)
print("Preparing video combination...")
# If any video has audio, we need to ensure all videos have audio streams
if any(has_audio):
# Create list to store video and audio streams
streams = []
for video, has_aud in tqdm(list(zip(scene_videos, has_audio)), desc="Processing videos"):
if has_aud:
# Video has audio, use as is
input_vid = ffmpeg.input(video)
streams.extend([input_vid['v'], input_vid['a']])
else:
# Video lacks audio, add silent audio
input_vid = ffmpeg.input(video)
# Generate silent audio for the duration of the video
probe = ffmpeg.probe(video)
duration = float(probe['streams'][0]['duration'])
silent_audio = ffmpeg.input(f'anullsrc=channel_layout=stereo:sample_rate=44100',
f='lavfi', t=duration)['a']
streams.extend([input_vid['v'], silent_audio])
print("Combining videos with audio...")
try:
# Concatenate all streams using optimized CPU encoding settings
concat = ffmpeg.concat(*streams, v=1, a=1, unsafe=True)
process = (
concat
.output(output_video_path,
**{'c:v': 'libx264',
'c:a': 'aac',
'preset': 'veryfast', # Changed from ultrafast for better speed/quality balance
'crf': '28', # Same quality setting
'threads': '0', # Use all CPU threads
'tune': 'fastdecode', # Optimize for decoding speed
'profile:v': 'baseline', # Simpler profile for faster encoding
'level': '4.0',
'x264-params': 'aq-mode=0:no-deblock:no-cabac:ref=1:subme=0:trellis=0:weightp=0', # Added aggressive speed optimizations
'movflags': '+faststart',
'stats': None,
'progress': 'pipe:1'})
.overwrite_output()
.run_async(pipe_stdout=True, pipe_stderr=True)
)
# Process progress output
while True:
line = process.stdout.readline().decode('utf-8')
if not line:
break
if 'frame=' in line:
sys.stdout.write('\rProcessing: ' + line.strip())
sys.stdout.flush()
# Wait for the process to complete and capture output
stdout, stderr = process.communicate()
print("\nEncoding complete!")
except ffmpeg.Error as e:
print(f"FFmpeg stdout:\n{e.stdout.decode('utf8')}")
print(f"FFmpeg stderr:\n{e.stderr.decode('utf8')}")
raise
else:
# No videos have audio, concatenate video streams only
streams = []
for video in tqdm(scene_videos, desc="Processing videos"):
streams.append(ffmpeg.input(video)['v'])
print("Combining videos without audio...")
try:
concat = ffmpeg.concat(*streams, v=1, unsafe=True)
process = (
concat
.output(output_video_path,
**{'c:v': 'libx264',
'preset': 'medium',
'crf': '23',
'stats': None, # Enable progress stats
'progress': 'pipe:1'}) # Output progress to pipe
.overwrite_output()
.run_async(pipe_stdout=True, pipe_stderr=True)
)
# Process progress output
while True:
line = process.stdout.readline().decode('utf-8')
if not line:
break
if 'frame=' in line:
sys.stdout.write('\rProcessing: ' + line.strip())
sys.stdout.flush()
# Wait for the process to complete and capture output
stdout, stderr = process.communicate()
print("\nEncoding complete!")
except ffmpeg.Error as e:
print(f"FFmpeg stdout:\n{e.stdout.decode('utf8')}")
print(f"FFmpeg stderr:\n{e.stderr.decode('utf8')}")
raise
print(f"Successfully combined videos into {output_video_path}")
# Handle subtitle combination (existing subtitle code remains the same)
if scene_subtitles:
with open(output_srt_path, 'w', encoding='utf-8') as outfile:
current_time_offset = 0
subtitle_index = 1
for srt_file, video_file in zip(scene_subtitles, scene_videos):
if srt_file is None:
continue
with open(srt_file, 'r', encoding='utf-8') as infile:
lines = infile.readlines()
i = 0
while i < len(lines):
line = lines[i].strip()
if line.isdigit(): # Subtitle index
outfile.write(f"{subtitle_index}\n")
subtitle_index += 1
i += 1
# Time codes line
time_line = lines[i].strip()
start_time, end_time = time_line.split(' --> ')
# Convert time codes and add offset
def adjust_time(time_str, offset):
h, m, s = time_str.replace(',', '.').split(':')
total_seconds = float(h) * 3600 + float(m) * 60 + float(s) + offset
h = int(total_seconds // 3600)
m = int((total_seconds % 3600) // 60)
s = total_seconds % 60
return f"{h:02d}:{m:02d}:{s:06.3f}".replace('.', ',')
new_start = adjust_time(start_time, current_time_offset)
new_end = adjust_time(end_time, current_time_offset)
outfile.write(f"{new_start} --> {new_end}\n")
i += 1
# Subtitle text (could be multiple lines)
while i < len(lines) and lines[i].strip():
outfile.write(lines[i])
i += 1
outfile.write('\n')
else:
i += 1
# Update time offset using ffprobe
probe = ffmpeg.probe(video_file)
duration = float(probe['streams'][0]['duration'])
current_time_offset += duration
print(f"Successfully combined videos into {output_video_path}")
if scene_subtitles:
print(f"Successfully combined subtitles into {output_srt_path}")
except Exception as e:
print(f"Error combining videos and subtitles: {e}")
traceback.print_exc() |