Spaces:
Running
Running
removed unnecessary imports and code, improving prompt
Browse files- response_schemas.py +1 -1
- utils.py +4 -183
response_schemas.py
CHANGED
@@ -2,7 +2,7 @@ from pydantic import BaseModel, Field
|
|
2 |
from typing import List
|
3 |
|
4 |
class SingleScene(BaseModel):
|
5 |
-
text: str = Field(description="Actual Segment of text from the complete story")
|
6 |
image_prompts: List[str] = Field(
|
7 |
description="""List of detailed and descriptive image prompts for the segment
|
8 |
prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"
|
|
|
2 |
from typing import List
|
3 |
|
4 |
class SingleScene(BaseModel):
|
5 |
+
text: str = Field(description="Actual Segment of text(a scene) from the complete story")
|
6 |
image_prompts: List[str] = Field(
|
7 |
description="""List of detailed and descriptive image prompts for the segment
|
8 |
prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"
|
utils.py
CHANGED
@@ -3,13 +3,9 @@ import os
|
|
3 |
from PIL import Image
|
4 |
from gradio_client import Client
|
5 |
import moviepy.editor as mp
|
6 |
-
from moviepy.video.VideoClip import ImageClip
|
7 |
-
from moviepy.editor import AudioFileClip
|
8 |
from structured_output_extractor import StructuredOutputExtractor
|
9 |
from response_schemas import ScenesResponseSchema
|
10 |
from typing import List, Dict
|
11 |
-
import tempfile
|
12 |
-
import os
|
13 |
|
14 |
|
15 |
def get_scenes(text_script: str):
|
@@ -19,11 +15,11 @@ def get_scenes(text_script: str):
|
|
19 |
ROLE: Story to Scene Generator
|
20 |
Tasks: For the given story
|
21 |
1. Read it Completely and Understand the Complete Context
|
22 |
-
2. Rewrite the story in tiny scenes(but without even changing a word) with highly detailed and context aware
|
23 |
-
|
24 |
-
|
25 |
|
26 |
-
here is the Estimated Read Time of the complete story: {read_time}\n\n
|
27 |
and Here is the Complete Story: {text_script}
|
28 |
"""
|
29 |
|
@@ -32,87 +28,6 @@ def get_scenes(text_script: str):
|
|
32 |
result = extractor.extract(prompt)
|
33 |
return result.model_dump() # returns dictionary version pydantic model
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
# def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media"):
|
39 |
-
# """
|
40 |
-
# Generate video assets (images and audio) for each scene in a structured folder hierarchy.
|
41 |
-
|
42 |
-
# Args:
|
43 |
-
# scenes (Dict): A dictionary containing a list of scenes under the key "scenes".
|
44 |
-
# language (str): The language code for audio generation.
|
45 |
-
# speaker (str): The speaker for audio generation.
|
46 |
-
# base_path (str): The base folder where all assets will be stored. Default is "media".
|
47 |
-
|
48 |
-
# Returns:
|
49 |
-
# Dict: A dictionary containing the paths to the generated assets.
|
50 |
-
# """
|
51 |
-
# try:
|
52 |
-
# # Ensure the base folder exists
|
53 |
-
# if not os.path.exists(base_path):
|
54 |
-
# os.makedirs(base_path)
|
55 |
-
|
56 |
-
# # Extract scenes from the input dictionary
|
57 |
-
# scenes_list = scenes.get("scenes", [])
|
58 |
-
# print(f"Total Scenes: {len(scenes_list)}")
|
59 |
-
|
60 |
-
# # Dictionary to store asset paths
|
61 |
-
# assets = {"scenes": []}
|
62 |
-
|
63 |
-
# # Create a folder for the current video
|
64 |
-
# video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
|
65 |
-
# if not os.path.exists(video_folder):
|
66 |
-
# os.makedirs(video_folder)
|
67 |
-
|
68 |
-
# # Create 'images' and 'audio' folders inside the video folder
|
69 |
-
# images_folder = os.path.join(video_folder, "images")
|
70 |
-
# audio_folder = os.path.join(video_folder, "audio")
|
71 |
-
# os.makedirs(images_folder, exist_ok=True)
|
72 |
-
# os.makedirs(audio_folder, exist_ok=True)
|
73 |
-
|
74 |
-
# for scene_count, scene in enumerate(scenes_list):
|
75 |
-
# text: str = scene.get("text", "")
|
76 |
-
# image_prompts: List[str] = scene.get("image_prompts", [])
|
77 |
-
|
78 |
-
# # Create a folder for the current scene inside the 'images' folder
|
79 |
-
# scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
|
80 |
-
# os.makedirs(scene_images_folder, exist_ok=True)
|
81 |
-
|
82 |
-
# # Generate audio for the scene
|
83 |
-
# audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
|
84 |
-
# audio_result = generate_audio(text, language, speaker, path=audio_path)
|
85 |
-
|
86 |
-
# if "error" in audio_result:
|
87 |
-
# print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
|
88 |
-
# continue
|
89 |
-
|
90 |
-
# # Generate images for the scene
|
91 |
-
# image_paths = []
|
92 |
-
# for count, prompt in enumerate(image_prompts):
|
93 |
-
# image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
|
94 |
-
# image_result = generate_image(prompt=prompt, path=image_path)
|
95 |
-
|
96 |
-
# if "error" in image_result:
|
97 |
-
# print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
|
98 |
-
# else:
|
99 |
-
# image_paths.append(image_path)
|
100 |
-
|
101 |
-
# # Add the scene's asset paths to the dictionary
|
102 |
-
# assets["scenes"].append({
|
103 |
-
# "scene_number": scene_count + 1,
|
104 |
-
# "audio_path": audio_path,
|
105 |
-
# "image_paths": image_paths
|
106 |
-
# })
|
107 |
-
|
108 |
-
# print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
|
109 |
-
|
110 |
-
# return assets
|
111 |
-
|
112 |
-
# except Exception as e:
|
113 |
-
# print(f"Error during video asset generation: {e}")
|
114 |
-
# return {"error": str(e)}
|
115 |
-
|
116 |
def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
|
117 |
try:
|
118 |
# Ensure the base folder exists
|
@@ -254,17 +169,6 @@ def tmp_folder(folder_name: str) -> str:
|
|
254 |
|
255 |
return folder_path
|
256 |
|
257 |
-
|
258 |
-
|
259 |
-
from moviepy.editor import *
|
260 |
-
|
261 |
-
|
262 |
-
import os
|
263 |
-
import tempfile
|
264 |
-
from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
|
269 |
audio_folder = os.path.join(video_folder, "audio")
|
270 |
images_folder = os.path.join(video_folder, "images")
|
@@ -313,89 +217,6 @@ def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
|
|
313 |
|
314 |
return output_path
|
315 |
|
316 |
-
def generate_video_old(audio_file, images, segments):
|
317 |
-
try:
|
318 |
-
# Save the uploaded audio file to a temporary location
|
319 |
-
file_extension = os.path.splitext(audio_file.name)[1]
|
320 |
-
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
|
321 |
-
temp_audio_path.write(audio_file.read())
|
322 |
-
temp_audio_path.close()
|
323 |
-
|
324 |
-
# Load the audio file using MoviePy
|
325 |
-
audio = AudioFileClip(temp_audio_path.name)
|
326 |
-
|
327 |
-
# Define YouTube-like dimensions (16:9 aspect ratio)
|
328 |
-
frame_width = 1280
|
329 |
-
frame_height = 720
|
330 |
-
|
331 |
-
video_clips = []
|
332 |
-
total_segments = len(segments)
|
333 |
-
|
334 |
-
for i, current_segment in enumerate(segments):
|
335 |
-
start_time = current_segment["start"]
|
336 |
-
end_time = current_segment["end"]
|
337 |
-
|
338 |
-
# Calculate the actual duration including any gap until the next segment
|
339 |
-
if i < total_segments - 1:
|
340 |
-
# If there's a next segment, extend until it starts
|
341 |
-
next_segment = segments[i + 1]
|
342 |
-
actual_end_time = next_segment["start"]
|
343 |
-
else:
|
344 |
-
# For the last segment, use its end time
|
345 |
-
actual_end_time = end_time
|
346 |
-
|
347 |
-
# Calculate total duration including any gap
|
348 |
-
segment_duration = actual_end_time - start_time
|
349 |
-
|
350 |
-
print(f"\nProcessing segment {i + 1}/{total_segments}:")
|
351 |
-
print(f" Start time: {start_time}s")
|
352 |
-
print(f" Base end time: {end_time}s")
|
353 |
-
print(f" Actual end time: {actual_end_time}s")
|
354 |
-
print(f" Total duration: {segment_duration}s")
|
355 |
-
print(f" Text: '{current_segment['text']}'")
|
356 |
-
|
357 |
-
# Ensure the image index is within bounds
|
358 |
-
image_path = images[min(i, len(images) - 1)]
|
359 |
-
|
360 |
-
# Create an ImageClip for the current segment
|
361 |
-
image_clip = ImageClip(image_path)
|
362 |
-
|
363 |
-
# Resize and pad the image to fit a 16:9 aspect ratio
|
364 |
-
image_clip = image_clip.resize(height=frame_height).on_color(
|
365 |
-
size=(frame_width, frame_height),
|
366 |
-
color=(0, 0, 0), # Black background
|
367 |
-
pos="center" # Center the image
|
368 |
-
)
|
369 |
-
|
370 |
-
# Set the duration and start time for the clip
|
371 |
-
image_clip = image_clip.set_duration(segment_duration)
|
372 |
-
image_clip = image_clip.set_start(start_time) # Set the start time explicitly
|
373 |
-
|
374 |
-
video_clips.append(image_clip)
|
375 |
-
|
376 |
-
# Concatenate all the image clips to form the video
|
377 |
-
print("Concatenating video clips...")
|
378 |
-
video = concatenate_videoclips(video_clips, method="compose")
|
379 |
-
|
380 |
-
# Add the audio to the video
|
381 |
-
video = video.set_audio(audio)
|
382 |
-
|
383 |
-
# Save the video to a temporary file
|
384 |
-
temp_dir = tempfile.gettempdir()
|
385 |
-
video_path = os.path.join(temp_dir, "generated_video.mp4")
|
386 |
-
print(f"Writing video file to {video_path}...")
|
387 |
-
video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")
|
388 |
-
|
389 |
-
# Clean up the temporary audio file
|
390 |
-
os.remove(temp_audio_path.name)
|
391 |
-
print("Temporary audio file removed.")
|
392 |
-
|
393 |
-
return video_path
|
394 |
-
|
395 |
-
except Exception as e:
|
396 |
-
print(f"Error generating video: {e}")
|
397 |
-
return None
|
398 |
-
|
399 |
|
400 |
|
401 |
def calculate_read_time(text: str, words_per_minute: int = 155) -> str:
|
|
|
3 |
from PIL import Image
|
4 |
from gradio_client import Client
|
5 |
import moviepy.editor as mp
|
|
|
|
|
6 |
from structured_output_extractor import StructuredOutputExtractor
|
7 |
from response_schemas import ScenesResponseSchema
|
8 |
from typing import List, Dict
|
|
|
|
|
9 |
|
10 |
|
11 |
def get_scenes(text_script: str):
|
|
|
15 |
ROLE: Story to Scene Generator
|
16 |
Tasks: For the given story
|
17 |
1. Read it Completely and Understand the Complete Context
|
18 |
+
2. Rewrite the story in tiny scenes(but without even changing a word) with highly detailed and context aware list of image prompts to visualize each scene
|
19 |
+
|
20 |
+
RULE OF THUMB: 12 image prompts / 1 min audio (Note: You will be provided with estimated read Time for each story or script)
|
21 |
|
22 |
+
here is the Estimated Read Time of the complete story: {read_time}\n\n
|
23 |
and Here is the Complete Story: {text_script}
|
24 |
"""
|
25 |
|
|
|
28 |
result = extractor.extract(prompt)
|
29 |
return result.model_dump() # returns dictionary version pydantic model
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
|
32 |
try:
|
33 |
# Ensure the base folder exists
|
|
|
169 |
|
170 |
return folder_path
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
|
173 |
audio_folder = os.path.join(video_folder, "audio")
|
174 |
images_folder = os.path.join(video_folder, "images")
|
|
|
217 |
|
218 |
return output_path
|
219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
|
222 |
def calculate_read_time(text: str, words_per_minute: int = 155) -> str:
|