wower99 commited on
Commit
ab4d4f6
·
1 Parent(s): b3d9d61

removed unnecessary imports and code, improving prompt

Browse files
Files changed (2) hide show
  1. response_schemas.py +1 -1
  2. utils.py +4 -183
response_schemas.py CHANGED
@@ -2,7 +2,7 @@ from pydantic import BaseModel, Field
2
  from typing import List
3
 
4
  class SingleScene(BaseModel):
5
- text: str = Field(description="Actual Segment of text from the complete story")
6
  image_prompts: List[str] = Field(
7
  description="""List of detailed and descriptive image prompts for the segment
8
  prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"
 
2
  from typing import List
3
 
4
  class SingleScene(BaseModel):
5
+ text: str = Field(description="Actual Segment of text(a scene) from the complete story")
6
  image_prompts: List[str] = Field(
7
  description="""List of detailed and descriptive image prompts for the segment
8
  prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"
utils.py CHANGED
@@ -3,13 +3,9 @@ import os
3
  from PIL import Image
4
  from gradio_client import Client
5
  import moviepy.editor as mp
6
- from moviepy.video.VideoClip import ImageClip
7
- from moviepy.editor import AudioFileClip
8
  from structured_output_extractor import StructuredOutputExtractor
9
  from response_schemas import ScenesResponseSchema
10
  from typing import List, Dict
11
- import tempfile
12
- import os
13
 
14
 
15
  def get_scenes(text_script: str):
@@ -19,11 +15,11 @@ def get_scenes(text_script: str):
19
  ROLE: Story to Scene Generator
20
  Tasks: For the given story
21
  1. Read it Completely and Understand the Complete Context
22
- 2. Rewrite the story in tiny scenes(but without even changing a word) with highly detailed and context aware image or list of image prompts to visualize each scene
23
- 3. If necessary, a scene can have more than one image prompts
24
- 4. Make sure there is an image prompt for every 4-5 sec
25
 
26
- here is the Estimated Read Time of the complete story: {read_time}\n\n
27
  and Here is the Complete Story: {text_script}
28
  """
29
 
@@ -32,87 +28,6 @@ def get_scenes(text_script: str):
32
  result = extractor.extract(prompt)
33
  return result.model_dump() # returns dictionary version pydantic model
34
 
35
-
36
-
37
-
38
- # def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media"):
39
- # """
40
- # Generate video assets (images and audio) for each scene in a structured folder hierarchy.
41
-
42
- # Args:
43
- # scenes (Dict): A dictionary containing a list of scenes under the key "scenes".
44
- # language (str): The language code for audio generation.
45
- # speaker (str): The speaker for audio generation.
46
- # base_path (str): The base folder where all assets will be stored. Default is "media".
47
-
48
- # Returns:
49
- # Dict: A dictionary containing the paths to the generated assets.
50
- # """
51
- # try:
52
- # # Ensure the base folder exists
53
- # if not os.path.exists(base_path):
54
- # os.makedirs(base_path)
55
-
56
- # # Extract scenes from the input dictionary
57
- # scenes_list = scenes.get("scenes", [])
58
- # print(f"Total Scenes: {len(scenes_list)}")
59
-
60
- # # Dictionary to store asset paths
61
- # assets = {"scenes": []}
62
-
63
- # # Create a folder for the current video
64
- # video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
65
- # if not os.path.exists(video_folder):
66
- # os.makedirs(video_folder)
67
-
68
- # # Create 'images' and 'audio' folders inside the video folder
69
- # images_folder = os.path.join(video_folder, "images")
70
- # audio_folder = os.path.join(video_folder, "audio")
71
- # os.makedirs(images_folder, exist_ok=True)
72
- # os.makedirs(audio_folder, exist_ok=True)
73
-
74
- # for scene_count, scene in enumerate(scenes_list):
75
- # text: str = scene.get("text", "")
76
- # image_prompts: List[str] = scene.get("image_prompts", [])
77
-
78
- # # Create a folder for the current scene inside the 'images' folder
79
- # scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
80
- # os.makedirs(scene_images_folder, exist_ok=True)
81
-
82
- # # Generate audio for the scene
83
- # audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
84
- # audio_result = generate_audio(text, language, speaker, path=audio_path)
85
-
86
- # if "error" in audio_result:
87
- # print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
88
- # continue
89
-
90
- # # Generate images for the scene
91
- # image_paths = []
92
- # for count, prompt in enumerate(image_prompts):
93
- # image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
94
- # image_result = generate_image(prompt=prompt, path=image_path)
95
-
96
- # if "error" in image_result:
97
- # print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
98
- # else:
99
- # image_paths.append(image_path)
100
-
101
- # # Add the scene's asset paths to the dictionary
102
- # assets["scenes"].append({
103
- # "scene_number": scene_count + 1,
104
- # "audio_path": audio_path,
105
- # "image_paths": image_paths
106
- # })
107
-
108
- # print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
109
-
110
- # return assets
111
-
112
- # except Exception as e:
113
- # print(f"Error during video asset generation: {e}")
114
- # return {"error": str(e)}
115
-
116
  def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
117
  try:
118
  # Ensure the base folder exists
@@ -254,17 +169,6 @@ def tmp_folder(folder_name: str) -> str:
254
 
255
  return folder_path
256
 
257
-
258
-
259
- from moviepy.editor import *
260
-
261
-
262
- import os
263
- import tempfile
264
- from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips
265
-
266
-
267
-
268
  def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
269
  audio_folder = os.path.join(video_folder, "audio")
270
  images_folder = os.path.join(video_folder, "images")
@@ -313,89 +217,6 @@ def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
313
 
314
  return output_path
315
 
316
- def generate_video_old(audio_file, images, segments):
317
- try:
318
- # Save the uploaded audio file to a temporary location
319
- file_extension = os.path.splitext(audio_file.name)[1]
320
- temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
321
- temp_audio_path.write(audio_file.read())
322
- temp_audio_path.close()
323
-
324
- # Load the audio file using MoviePy
325
- audio = AudioFileClip(temp_audio_path.name)
326
-
327
- # Define YouTube-like dimensions (16:9 aspect ratio)
328
- frame_width = 1280
329
- frame_height = 720
330
-
331
- video_clips = []
332
- total_segments = len(segments)
333
-
334
- for i, current_segment in enumerate(segments):
335
- start_time = current_segment["start"]
336
- end_time = current_segment["end"]
337
-
338
- # Calculate the actual duration including any gap until the next segment
339
- if i < total_segments - 1:
340
- # If there's a next segment, extend until it starts
341
- next_segment = segments[i + 1]
342
- actual_end_time = next_segment["start"]
343
- else:
344
- # For the last segment, use its end time
345
- actual_end_time = end_time
346
-
347
- # Calculate total duration including any gap
348
- segment_duration = actual_end_time - start_time
349
-
350
- print(f"\nProcessing segment {i + 1}/{total_segments}:")
351
- print(f" Start time: {start_time}s")
352
- print(f" Base end time: {end_time}s")
353
- print(f" Actual end time: {actual_end_time}s")
354
- print(f" Total duration: {segment_duration}s")
355
- print(f" Text: '{current_segment['text']}'")
356
-
357
- # Ensure the image index is within bounds
358
- image_path = images[min(i, len(images) - 1)]
359
-
360
- # Create an ImageClip for the current segment
361
- image_clip = ImageClip(image_path)
362
-
363
- # Resize and pad the image to fit a 16:9 aspect ratio
364
- image_clip = image_clip.resize(height=frame_height).on_color(
365
- size=(frame_width, frame_height),
366
- color=(0, 0, 0), # Black background
367
- pos="center" # Center the image
368
- )
369
-
370
- # Set the duration and start time for the clip
371
- image_clip = image_clip.set_duration(segment_duration)
372
- image_clip = image_clip.set_start(start_time) # Set the start time explicitly
373
-
374
- video_clips.append(image_clip)
375
-
376
- # Concatenate all the image clips to form the video
377
- print("Concatenating video clips...")
378
- video = concatenate_videoclips(video_clips, method="compose")
379
-
380
- # Add the audio to the video
381
- video = video.set_audio(audio)
382
-
383
- # Save the video to a temporary file
384
- temp_dir = tempfile.gettempdir()
385
- video_path = os.path.join(temp_dir, "generated_video.mp4")
386
- print(f"Writing video file to {video_path}...")
387
- video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")
388
-
389
- # Clean up the temporary audio file
390
- os.remove(temp_audio_path.name)
391
- print("Temporary audio file removed.")
392
-
393
- return video_path
394
-
395
- except Exception as e:
396
- print(f"Error generating video: {e}")
397
- return None
398
-
399
 
400
 
401
  def calculate_read_time(text: str, words_per_minute: int = 155) -> str:
 
3
  from PIL import Image
4
  from gradio_client import Client
5
  import moviepy.editor as mp
 
 
6
  from structured_output_extractor import StructuredOutputExtractor
7
  from response_schemas import ScenesResponseSchema
8
  from typing import List, Dict
 
 
9
 
10
 
11
  def get_scenes(text_script: str):
 
15
  ROLE: Story to Scene Generator
16
  Tasks: For the given story
17
  1. Read it Completely and Understand the Complete Context
18
+ 2. Rewrite the story in tiny scenes(but without even changing a word) with highly detailed and context aware list of image prompts to visualize each scene
19
+
20
+ RULE OF THUMB: 12 image prompts / 1 min audio (Note: You will be provided with estimated read Time for each story or script)
21
 
22
+ here is the Estimated Read Time of the complete story: {read_time}\n\n
23
  and Here is the Complete Story: {text_script}
24
  """
25
 
 
28
  result = extractor.extract(prompt)
29
  return result.model_dump() # returns dictionary version pydantic model
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
32
  try:
33
  # Ensure the base folder exists
 
169
 
170
  return folder_path
171
 
 
 
 
 
 
 
 
 
 
 
 
172
  def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
173
  audio_folder = os.path.join(video_folder, "audio")
174
  images_folder = os.path.join(video_folder, "images")
 
217
 
218
  return output_path
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
 
222
  def calculate_read_time(text: str, words_per_minute: int = 155) -> str: