Spaces:

ManojINaik
/

TheoremExplainAgent

Runtime error

App Files Files Community

TheoremExplainAgent / src /core /video_renderer.py

dfdfdsfgs

Upload project files

d9486d1 2 months ago

raw

history blame contribute delete

22 kB

	import os
	import re
	import subprocess
	import asyncio
	from PIL import Image
	from typing import Optional, List
	import traceback
	import sys

	from src.core.parse_video import (
	get_images_from_video,
	image_with_most_non_black_space
	)
	from mllm_tools.vertex_ai import VertexAIWrapper
	from mllm_tools.gemini import GeminiWrapper

	class VideoRenderer:
	"""Class for rendering and combining Manim animation videos."""

	def __init__(self, output_dir="output", print_response=False, use_visual_fix_code=False):
	"""Initialize the VideoRenderer.

	Args:
	output_dir (str, optional): Directory for output files. Defaults to "output".
	print_response (bool, optional): Whether to print responses. Defaults to False.
	use_visual_fix_code (bool, optional): Whether to use visual fix code. Defaults to False.
	"""
	self.output_dir = output_dir
	self.print_response = print_response
	self.use_visual_fix_code = use_visual_fix_code

	async def render_scene(self, code: str, file_prefix: str, curr_scene: int, curr_version: int, code_dir: str, media_dir: str, max_retries: int = 3, use_visual_fix_code=False, visual_self_reflection_func=None, banned_reasonings=None, scene_trace_id=None, topic=None, session_id=None):
	"""Render a single scene and handle error retries and visual fixes.

	Args:
	code (str): The Manim code to render
	file_prefix (str): Prefix for output files
	curr_scene (int): Current scene number
	curr_version (int): Current version number
	code_dir (str): Directory for code files
	media_dir (str): Directory for media output
	max_retries (int, optional): Maximum retry attempts. Defaults to 3.
	use_visual_fix_code (bool, optional): Whether to use visual fix code. Defaults to False.
	visual_self_reflection_func (callable, optional): Function for visual self-reflection. Defaults to None.
	banned_reasonings (list, optional): List of banned reasoning strings. Defaults to None.
	scene_trace_id (str, optional): Scene trace identifier. Defaults to None.
	topic (str, optional): Topic name. Defaults to None.
	session_id (str, optional): Session identifier. Defaults to None.

	Returns:
	tuple: (code, error_message) where error_message is None on success
	"""
	retries = 0
	while retries < max_retries:
	try:
	# Execute manim in a thread to prevent blocking
	file_path = os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py")
	result = await asyncio.to_thread(
	subprocess.run,
	["manim", "-qh", file_path, "--media_dir", media_dir, "--progress_bar", "none"],
	capture_output=True,
	text=True
	)

	# if result.returncode != 0, it means that the code is not rendered successfully
	# so we need to fix the code by returning the code and the error message
	if result.returncode != 0:
	raise Exception(result.stderr)

	if use_visual_fix_code and visual_self_reflection_func and banned_reasonings:
	# Get the rendered video path
	video_path = os.path.join(
	media_dir,
	"videos",
	f"{file_prefix}_scene{curr_scene}_v{curr_version}.mp4"
	)

	# For Gemini/Vertex AI models, pass the video directly
	if self.scene_model.model_name.startswith(('gemini/', 'vertex_ai/')):
	media_input = video_path
	else:
	# For other models, use image snapshot
	media_input = self.create_snapshot_scene(
	topic, curr_scene, curr_version, return_type="path"
	)

	new_code, log = visual_self_reflection_func(
	code,
	media_input,
	scene_trace_id=scene_trace_id,
	topic=topic,
	scene_number=curr_scene,
	session_id=session_id
	)

	with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_vfix_log.txt"), "w") as f:
	f.write(log)

	# Check for termination markers
	if "<LGTM>" in new_code or any(word in new_code for word in banned_reasonings):
	break

	code = new_code
	curr_version += 1
	with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}.py"), "w") as f:
	f.write(code)
	print(f"Code saved to scene{curr_scene}/code/{file_prefix}_scene{curr_scene}_v{curr_version}.py")
	retries = 0
	continue

	break # Exit retry loop on success

	except Exception as e:
	print(f"Error: {e}")
	print(f"Retrying {retries+1} of {max_retries}...")

	with open(os.path.join(code_dir, f"{file_prefix}_scene{curr_scene}_v{curr_version}_error.log"), "a") as f:
	f.write(f"\nError in attempt {retries}:\n{str(e)}\n")
	retries += 1
	return code, str(e) # Indicate failure and return error message

	print(f"Successfully rendered {file_path}")
	with open(os.path.join(self.output_dir, file_prefix, f"scene{curr_scene}", "succ_rendered.txt"), "w") as f:
	f.write("")

	return code, None # Indicate success

	def run_manim_process(self,
	topic: str):
	"""Run manim on all generated manim code for a specific topic.

	Args:
	topic (str): Topic name to process

	Returns:
	subprocess.CompletedProcess: Result of the final manim process
	"""
	file_prefix = topic.lower()
	file_prefix = re.sub(r'[^a-z0-9_]+', '_', file_prefix)
	search_path = os.path.join(self.output_dir, file_prefix)
	# Iterate through scene folders
	scene_folders = [f for f in os.listdir(search_path) if os.path.isdir(os.path.join(search_path, f))]
	scene_folders.sort() # Sort to process scenes in order

	for folder in scene_folders:
	folder_path = os.path.join(search_path, folder)

	# Get all Python files in version order
	py_files = [f for f in os.listdir(folder_path) if f.endswith('.py')]
	py_files.sort(key=lambda x: int(x.split('_v')[-1].split('.')[0])) # Sort by version number

	for file in py_files:
	file_path = os.path.join(folder_path, file)
	try:
	media_dir = os.path.join(self.output_dir, file_prefix, "media")
	result = subprocess.run(
	f"manim -qh {file_path} --media_dir {media_dir}",
	shell=True,
	capture_output=True,
	text=True
	)
	if result.returncode != 0:
	raise Exception(result.stderr)
	print(f"Successfully rendered {file}")
	break # Move to next scene folder if successful
	except Exception as e:
	print(f"Error rendering {file}: {e}")
	error_log_path = os.path.join(folder_path, f"{file.split('.')[0]}_error.log") # drop the extra py
	with open(error_log_path, "w") as f:
	f.write(f"Error:\n{str(e)}\n")
	print(f"Error log saved to {error_log_path}")
	return result

	def create_snapshot_scene(self, topic: str, scene_number: int, version_number: int, return_type: str = "image"):
	"""Create a snapshot of the video for a specific topic and scene.

	Args:
	topic (str): Topic name
	scene_number (int): Scene number
	version_number (int): Version number
	return_type (str, optional): Type of return value - "path" or "image". Defaults to "image".

	Returns:
	Union[str, PIL.Image]: Path to saved image or PIL Image object

	Raises:
	FileNotFoundError: If no mp4 files found in video folder
	"""
	file_prefix = topic.lower()
	file_prefix = re.sub(r'[^a-z0-9_]+', '_', file_prefix)
	search_path = os.path.join(self.output_dir, file_prefix)
	video_folder_path = os.path.join(search_path, "media", "videos", f"{file_prefix}_scene{scene_number}_v{version_number}", "1080p60")
	os.makedirs(video_folder_path, exist_ok=True)
	snapshot_path = os.path.join(video_folder_path, "snapshot.png")
	# Get the mp4 video file from the video folder path
	video_files = [f for f in os.listdir(video_folder_path) if f.endswith('.mp4')]
	if not video_files:
	raise FileNotFoundError(f"No mp4 files found in {video_folder_path}")
	video_path = os.path.join(video_folder_path, video_files[0])
	saved_image = image_with_most_non_black_space(get_images_from_video(video_path), snapshot_path, return_type=return_type)
	return saved_image

	def combine_videos(self, topic: str):
	"""Combine all videos and subtitle files for a specific topic using ffmpeg.

	Args:
	topic (str): Topic name to combine videos for

	This function will:
	- Find all scene videos and subtitles
	- Combine videos with or without audio
	- Merge subtitle files with correct timing
	- Save combined video and subtitles to output directory
	"""
	file_prefix = topic.lower()
	file_prefix = re.sub(r'[^a-z0-9_]+', '_', file_prefix)
	search_path = os.path.join(self.output_dir, file_prefix, "media", "videos")

	# Create output directory if it doesn't exist
	video_output_dir = os.path.join(self.output_dir, file_prefix)
	os.makedirs(video_output_dir, exist_ok=True)

	output_video_path = os.path.join(video_output_dir, f"{file_prefix}_combined.mp4")
	output_srt_path = os.path.join(video_output_dir, f"{file_prefix}_combined.srt")

	if os.path.exists(output_video_path) and os.path.exists(output_srt_path):
	print(f"Combined video and subtitles already exist at {output_video_path}, not combining again.")
	return

	# Get scene count from outline
	scene_outline_path = os.path.join(self.output_dir, file_prefix, f"{file_prefix}_scene_outline.txt")
	if not os.path.exists(scene_outline_path):
	print(f"Warning: Scene outline file not found at {scene_outline_path}. Cannot determine scene count.")
	return
	with open(scene_outline_path) as f:
	plan = f.read()
	scene_outline = re.search(r'(<SCENE_OUTLINE>.*?</SCENE_OUTLINE>)', plan, re.DOTALL).group(1)
	scene_count = len(re.findall(r'<SCENE_(\d+)>[^<]', scene_outline))

	# Find all scene folders and videos
	scene_folders = []
	for root, dirs, files in os.walk(search_path):
	for dir in dirs:
	if dir.startswith(file_prefix + "_scene"):
	scene_folders.append(os.path.join(root, dir))

	scene_videos = []
	scene_subtitles = []

	for scene_num in range(1, scene_count + 1):
	folders = [f for f in scene_folders if int(f.split("scene")[-1].split("_")[0]) == scene_num]
	if not folders:
	print(f"Warning: Missing scene {scene_num}")
	continue

	folders.sort(key=lambda f: int(f.split("_v")[-1]))
	folder = folders[-1]

	video_found = False
	subtitles_found = False
	for filename in os.listdir(os.path.join(folder, "1080p60")):
	if filename.endswith('.mp4'):
	scene_videos.append(os.path.join(folder, "1080p60", filename))
	video_found = True
	elif filename.endswith('.srt'):
	scene_subtitles.append(os.path.join(folder, "1080p60", filename))
	subtitles_found = True

	if not video_found:
	print(f"Warning: Missing video for scene {scene_num}")
	if not subtitles_found:
	scene_subtitles.append(None)

	if len(scene_videos) != scene_count:
	print("Not all videos/subtitles are found, aborting video combination.")
	return

	try:
	import ffmpeg # You might need to install ffmpeg-python package: pip install ffmpeg-python
	from tqdm import tqdm

	print("Analyzing video streams...")
	# Check if videos have audio streams
	has_audio = []
	for video in tqdm(scene_videos, desc="Checking audio streams"):
	probe = ffmpeg.probe(video)
	audio_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'audio']
	has_audio.append(len(audio_streams) > 0)

	print("Preparing video combination...")
	# If any video has audio, we need to ensure all videos have audio streams
	if any(has_audio):
	# Create list to store video and audio streams
	streams = []
	for video, has_aud in tqdm(list(zip(scene_videos, has_audio)), desc="Processing videos"):
	if has_aud:
	# Video has audio, use as is
	input_vid = ffmpeg.input(video)
	streams.extend([input_vid['v'], input_vid['a']])
	else:
	# Video lacks audio, add silent audio
	input_vid = ffmpeg.input(video)
	# Generate silent audio for the duration of the video
	probe = ffmpeg.probe(video)
	duration = float(probe['streams'][0]['duration'])
	silent_audio = ffmpeg.input(f'anullsrc=channel_layout=stereo:sample_rate=44100',
	f='lavfi', t=duration)['a']
	streams.extend([input_vid['v'], silent_audio])

	print("Combining videos with audio...")
	try:
	# Concatenate all streams using optimized CPU encoding settings
	concat = ffmpeg.concat(*streams, v=1, a=1, unsafe=True)
	process = (
	concat
	.output(output_video_path,
	**{'c:v': 'libx264',
	'c:a': 'aac',
	'preset': 'veryfast', # Changed from ultrafast for better speed/quality balance
	'crf': '28', # Same quality setting
	'threads': '0', # Use all CPU threads
	'tune': 'fastdecode', # Optimize for decoding speed
	'profile:v': 'baseline', # Simpler profile for faster encoding
	'level': '4.0',
	'x264-params': 'aq-mode=0:no-deblock:no-cabac:ref=1:subme=0:trellis=0:weightp=0', # Added aggressive speed optimizations
	'movflags': '+faststart',
	'stats': None,
	'progress': 'pipe:1'})
	.overwrite_output()
	.run_async(pipe_stdout=True, pipe_stderr=True)
	)

	# Process progress output
	while True:
	line = process.stdout.readline().decode('utf-8')
	if not line:
	break
	if 'frame=' in line:
	sys.stdout.write('\rProcessing: ' + line.strip())
	sys.stdout.flush()

	# Wait for the process to complete and capture output
	stdout, stderr = process.communicate()
	print("\nEncoding complete!")

	except ffmpeg.Error as e:
	print(f"FFmpeg stdout:\n{e.stdout.decode('utf8')}")
	print(f"FFmpeg stderr:\n{e.stderr.decode('utf8')}")
	raise
	else:
	# No videos have audio, concatenate video streams only
	streams = []
	for video in tqdm(scene_videos, desc="Processing videos"):
	streams.append(ffmpeg.input(video)['v'])

	print("Combining videos without audio...")
	try:
	concat = ffmpeg.concat(*streams, v=1, unsafe=True)
	process = (
	concat
	.output(output_video_path,
	**{'c:v': 'libx264',
	'preset': 'medium',
	'crf': '23',
	'stats': None, # Enable progress stats
	'progress': 'pipe:1'}) # Output progress to pipe
	.overwrite_output()
	.run_async(pipe_stdout=True, pipe_stderr=True)
	)

	# Process progress output
	while True:
	line = process.stdout.readline().decode('utf-8')
	if not line:
	break
	if 'frame=' in line:
	sys.stdout.write('\rProcessing: ' + line.strip())
	sys.stdout.flush()

	# Wait for the process to complete and capture output
	stdout, stderr = process.communicate()
	print("\nEncoding complete!")

	except ffmpeg.Error as e:
	print(f"FFmpeg stdout:\n{e.stdout.decode('utf8')}")
	print(f"FFmpeg stderr:\n{e.stderr.decode('utf8')}")
	raise

	print(f"Successfully combined videos into {output_video_path}")

	# Handle subtitle combination (existing subtitle code remains the same)
	if scene_subtitles:
	with open(output_srt_path, 'w', encoding='utf-8') as outfile:
	current_time_offset = 0
	subtitle_index = 1

	for srt_file, video_file in zip(scene_subtitles, scene_videos):
	if srt_file is None:
	continue

	with open(srt_file, 'r', encoding='utf-8') as infile:
	lines = infile.readlines()
	i = 0
	while i < len(lines):
	line = lines[i].strip()
	if line.isdigit(): # Subtitle index
	outfile.write(f"{subtitle_index}\n")
	subtitle_index += 1
	i += 1

	# Time codes line
	time_line = lines[i].strip()
	start_time, end_time = time_line.split(' --> ')

	# Convert time codes and add offset
	def adjust_time(time_str, offset):
	h, m, s = time_str.replace(',', '.').split(':')
	total_seconds = float(h) * 3600 + float(m) * 60 + float(s) + offset
	h = int(total_seconds // 3600)
	m = int((total_seconds % 3600) // 60)
	s = total_seconds % 60
	return f"{h:02d}:{m:02d}:{s:06.3f}".replace('.', ',')

	new_start = adjust_time(start_time, current_time_offset)
	new_end = adjust_time(end_time, current_time_offset)
	outfile.write(f"{new_start} --> {new_end}\n")
	i += 1

	# Subtitle text (could be multiple lines)
	while i < len(lines) and lines[i].strip():
	outfile.write(lines[i])
	i += 1
	outfile.write('\n')
	else:
	i += 1

	# Update time offset using ffprobe
	probe = ffmpeg.probe(video_file)
	duration = float(probe['streams'][0]['duration'])
	current_time_offset += duration

	print(f"Successfully combined videos into {output_video_path}")
	if scene_subtitles:
	print(f"Successfully combined subtitles into {output_srt_path}")

	except Exception as e:
	print(f"Error combining videos and subtitles: {e}")
	traceback.print_exc()