Text2Video

Running

App Files Files Community

wower99 commited on Feb 15

Commit

ab4d4f6

1 Parent(s): b3d9d61

removed unnecessary imports and code, improving prompt

Browse files

Files changed (2) hide show

response_schemas.py +1 -1
utils.py +4 -183

response_schemas.py CHANGED Viewed

@@ -2,7 +2,7 @@ from pydantic import BaseModel, Field
 from typing import List
 class SingleScene(BaseModel):
-    text: str = Field(description="Actual Segment of text from the complete story")
     image_prompts: List[str] = Field(
         description="""List of detailed and descriptive image prompts for the segment
         prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"

 from typing import List
 class SingleScene(BaseModel):
+    text: str = Field(description="Actual Segment of text(a scene) from the complete story")
     image_prompts: List[str] = Field(
         description="""List of detailed and descriptive image prompts for the segment
         prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"

utils.py CHANGED Viewed

@@ -3,13 +3,9 @@ import os
 from PIL import Image
 from gradio_client import Client
 import moviepy.editor as mp
-from moviepy.video.VideoClip import ImageClip
-from moviepy.editor import AudioFileClip
 from structured_output_extractor import StructuredOutputExtractor
 from response_schemas import ScenesResponseSchema
 from typing import List, Dict
-import tempfile
-import os
 def get_scenes(text_script: str):
@@ -19,11 +15,11 @@ def get_scenes(text_script: str):
     ROLE: Story to Scene Generator
     Tasks: For the given story
     1. Read it Completely and Understand the Complete Context
-    2. Rewrite the story in tiny scenes(but without even changing a word) with highly detailed and context aware image or list of image prompts to visualize each scene
-    3. If necessary, a scene can have more than one image prompts
-    4. Make sure there is an image prompt for every 4-5 sec
-    here is the Estimated Read Time of the complete story: {read_time}\n\n
     and Here is the Complete Story: {text_script}
     """
@@ -32,87 +28,6 @@ def get_scenes(text_script: str):
     result = extractor.extract(prompt)
     return result.model_dump()   # returns dictionary version pydantic model
-# def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media"):
-#     """
-#     Generate video assets (images and audio) for each scene in a structured folder hierarchy.
-#     Args:
-#         scenes (Dict): A dictionary containing a list of scenes under the key "scenes".
-#         language (str): The language code for audio generation.
-#         speaker (str): The speaker for audio generation.
-#         base_path (str): The base folder where all assets will be stored. Default is "media".
-#     Returns:
-#         Dict: A dictionary containing the paths to the generated assets.
-#     """
-#     try:
-#         # Ensure the base folder exists
-#         if not os.path.exists(base_path):
-#             os.makedirs(base_path)
-#         # Extract scenes from the input dictionary
-#         scenes_list = scenes.get("scenes", [])
-#         print(f"Total Scenes: {len(scenes_list)}")
-#         # Dictionary to store asset paths
-#         assets = {"scenes": []}
-#         # Create a folder for the current video
-#         video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
-#         if not os.path.exists(video_folder):
-#             os.makedirs(video_folder)
-#         # Create 'images' and 'audio' folders inside the video folder
-#         images_folder = os.path.join(video_folder, "images")
-#         audio_folder = os.path.join(video_folder, "audio")
-#         os.makedirs(images_folder, exist_ok=True)
-#         os.makedirs(audio_folder, exist_ok=True)
-#         for scene_count, scene in enumerate(scenes_list):
-#             text: str = scene.get("text", "")
-#             image_prompts: List[str] = scene.get("image_prompts", [])
-#             # Create a folder for the current scene inside the 'images' folder
-#             scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
-#             os.makedirs(scene_images_folder, exist_ok=True)
-#             # Generate audio for the scene
-#             audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
-#             audio_result = generate_audio(text, language, speaker, path=audio_path)
-#             if "error" in audio_result:
-#                 print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
-#                 continue
-#             # Generate images for the scene
-#             image_paths = []
-#             for count, prompt in enumerate(image_prompts):
-#                 image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
-#                 image_result = generate_image(prompt=prompt, path=image_path)
-#                 if "error" in image_result:
-#                     print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
-#                 else:
-#                     image_paths.append(image_path)
-#             # Add the scene's asset paths to the dictionary
-#             assets["scenes"].append({
-#                 "scene_number": scene_count + 1,
-#                 "audio_path": audio_path,
-#                 "image_paths": image_paths
-#             })
-#             print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
-#         return assets
-#     except Exception as e:
-#         print(f"Error during video asset generation: {e}")
-#         return {"error": str(e)}
 def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
     try:
         # Ensure the base folder exists
@@ -254,17 +169,6 @@ def tmp_folder(folder_name: str) -> str:
     return folder_path
-from moviepy.editor import *
-import os
-import tempfile
-from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips
 def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
     audio_folder = os.path.join(video_folder, "audio")
     images_folder = os.path.join(video_folder, "images")
@@ -313,89 +217,6 @@ def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
     return output_path
-def generate_video_old(audio_file, images, segments):
-    try:
-        # Save the uploaded audio file to a temporary location
-        file_extension = os.path.splitext(audio_file.name)[1]
-        temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
-        temp_audio_path.write(audio_file.read())
-        temp_audio_path.close()
-        # Load the audio file using MoviePy
-        audio = AudioFileClip(temp_audio_path.name)
-        # Define YouTube-like dimensions (16:9 aspect ratio)
-        frame_width = 1280
-        frame_height = 720
-        video_clips = []
-        total_segments = len(segments)
-        for i, current_segment in enumerate(segments):
-            start_time = current_segment["start"]
-            end_time = current_segment["end"]
-            # Calculate the actual duration including any gap until the next segment
-            if i < total_segments - 1:
-                # If there's a next segment, extend until it starts
-                next_segment = segments[i + 1]
-                actual_end_time = next_segment["start"]
-            else:
-                # For the last segment, use its end time
-                actual_end_time = end_time
-            # Calculate total duration including any gap
-            segment_duration = actual_end_time - start_time
-            print(f"\nProcessing segment {i + 1}/{total_segments}:")
-            print(f"  Start time: {start_time}s")
-            print(f"  Base end time: {end_time}s")
-            print(f"  Actual end time: {actual_end_time}s")
-            print(f"  Total duration: {segment_duration}s")
-            print(f"  Text: '{current_segment['text']}'")
-            # Ensure the image index is within bounds
-            image_path = images[min(i, len(images) - 1)]
-            # Create an ImageClip for the current segment
-            image_clip = ImageClip(image_path)
-            # Resize and pad the image to fit a 16:9 aspect ratio
-            image_clip = image_clip.resize(height=frame_height).on_color(
-                size=(frame_width, frame_height),
-                color=(0, 0, 0),  # Black background
-                pos="center"      # Center the image
-            )
-            # Set the duration and start time for the clip
-            image_clip = image_clip.set_duration(segment_duration)
-            image_clip = image_clip.set_start(start_time)  # Set the start time explicitly
-            video_clips.append(image_clip)
-        # Concatenate all the image clips to form the video
-        print("Concatenating video clips...")
-        video = concatenate_videoclips(video_clips, method="compose")
-        # Add the audio to the video
-        video = video.set_audio(audio)
-        # Save the video to a temporary file
-        temp_dir = tempfile.gettempdir()
-        video_path = os.path.join(temp_dir, "generated_video.mp4")
-        print(f"Writing video file to {video_path}...")
-        video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")
-        # Clean up the temporary audio file
-        os.remove(temp_audio_path.name)
-        print("Temporary audio file removed.")
-        return video_path
-    except Exception as e:
-        print(f"Error generating video: {e}")
-        return None
 def calculate_read_time(text: str, words_per_minute: int = 155) -> str:

 from PIL import Image
 from gradio_client import Client
 import moviepy.editor as mp
 from structured_output_extractor import StructuredOutputExtractor
 from response_schemas import ScenesResponseSchema
 from typing import List, Dict
 def get_scenes(text_script: str):
     ROLE: Story to Scene Generator
     Tasks: For the given story
     1. Read it Completely and Understand the Complete Context
+    2. Rewrite the story in tiny scenes(but without even changing a word) with highly detailed and context aware list of image prompts to visualize each scene
+    RULE OF THUMB:  12 image prompts / 1 min audio  (Note: You will be provided with estimated read Time for each story or script)
+    here is the Estimated Read Time of the complete story: {read_time}\n\n
     and Here is the Complete Story: {text_script}
     """
     result = extractor.extract(prompt)
     return result.model_dump()   # returns dictionary version pydantic model
 def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
     try:
         # Ensure the base folder exists
     return folder_path
 def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
     audio_folder = os.path.join(video_folder, "audio")
     images_folder = os.path.join(video_folder, "images")
     return output_path
 def calculate_read_time(text: str, words_per_minute: int = 155) -> str: