wower99 commited on
Commit
e98f22b
·
1 Parent(s): fa55127

text to video: v1

Browse files
Files changed (9) hide show
  1. .gitignore +162 -0
  2. README.md +1 -1
  3. app.py +38 -0
  4. constants.py +14 -0
  5. env.example +2 -0
  6. requirements.txt +85 -0
  7. response_schemas.py +14 -0
  8. structured_output_extractor.py +102 -0
  9. utils.py +419 -0
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .env
2
+ venv/
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ # db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  title: Text To Video Generator
3
  emoji: 🚀
4
- colorFrom: blue
5
  colorTo: yellow
6
  sdk: streamlit
7
  sdk_version: 1.42.0
 
1
  ---
2
  title: Text To Video Generator
3
  emoji: 🚀
4
+ colorFrom: green
5
  colorTo: yellow
6
  sdk: streamlit
7
  sdk_version: 1.42.0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from gradio_client import Client
3
+ from utils import get_scenes, generate_video_assets, generate_video # Import the function from utils.py
4
+
5
+ # Streamlit app
6
+ st.title("Text to Video Generator")
7
+
8
+ # Text input box with a max of 1500 characters
9
+ text_script = st.text_area("Enter your text (max 1500 characters):", max_chars=1500)
10
+
11
+
12
+ # Initialize the client with the hosted model
13
+ client = Client("habib926653/Multilingual-TTS")
14
+
15
+ # Dropdown for language selection
16
+ language = st.selectbox("Choose Language:", ["Urdu", "English"]) # Add more languages as needed
17
+
18
+ # Get available speakers for the selected language
19
+ speakers_response = client.predict(language=language, api_name="/get_speakers")
20
+
21
+ # Extract speakers list
22
+ speakers = [choice[0] for choice in speakers_response["choices"]]
23
+ selected_speaker = st.selectbox("Choose Speaker:", speakers)
24
+
25
+
26
+
27
+ # Button to trigger the processing
28
+ if st.button("Generate Video"):
29
+ if text_script:
30
+ # Call the function from utils.py to process the text
31
+ scenes = get_scenes(text_script)
32
+ video_assets_folder = generate_video_assets(scenes, language, selected_speaker)
33
+ st.write(video_assets_folder)
34
+ generated_video_path = generate_video(video_assets_folder)
35
+ st.video(generated_video_path)
36
+
37
+ else:
38
+ st.warning("Please enter some text to generate prompts.")
constants.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+
4
+ load_dotenv()
5
+
6
+ HF_TOKEN = os.getenv("HF_TOKEN", None)
7
+
8
+ SUMMARIZATION_ENDPOINT="https://habib926653-text-translator-agent-api.hf.space/generate"
9
+ IMAGE_GENERATION_SPACE_NAME="habib926653/stabilityai-stable-diffusion-3.5-large-turbo"
10
+
11
+ # Supported formats
12
+ SUPPORTED_FORMATS = ["mp3", "wav", "ogg", "flac", "aac", "m4a"]
13
+
14
+
env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ HF_TOKEN=HUGGING_FACE_TOKEN
2
+ GROQ_API_KEY=GROQ_API_KEY
requirements.txt ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==5.5.0
2
+ annotated-types==0.7.0
3
+ anyio==4.8.0
4
+ attrs==24.3.0
5
+ audeer==2.2.1
6
+ audiofile==1.5.1
7
+ audmath==1.4.1
8
+ blinker==1.9.0
9
+ cachetools==5.5.0
10
+ certifi==2024.12.14
11
+ cffi==1.17.1
12
+ charset-normalizer==3.4.1
13
+ click==8.1.8
14
+ decorator==4.4.2
15
+ distro==1.9.0
16
+ exceptiongroup==1.2.2
17
+ filelock==3.16.1
18
+ fsspec==2024.12.0
19
+ gitdb==4.0.12
20
+ GitPython==3.1.44
21
+ gradio_client==1.5.4
22
+ groq==0.15.0
23
+ h11==0.14.0
24
+ httpcore==1.0.7
25
+ httpx==0.28.1
26
+ huggingface-hub==0.27.1
27
+ idna==3.10
28
+ imageio==2.36.1
29
+ imageio-ffmpeg==0.5.1
30
+ Jinja2==3.1.5
31
+ jsonpatch==1.33
32
+ jsonpointer==3.0.0
33
+ jsonschema==4.23.0
34
+ jsonschema-specifications==2024.10.1
35
+ langchain-core==0.3.29
36
+ langchain-groq==0.2.3
37
+ langgraph==0.2.62
38
+ langgraph-checkpoint==2.0.9
39
+ langgraph-sdk==0.1.51
40
+ langsmith==0.2.10
41
+ markdown-it-py==3.0.0
42
+ MarkupSafe==3.0.2
43
+ mdurl==0.1.2
44
+ moviepy==1.0.3
45
+ msgpack==1.1.0
46
+ narwhals==1.21.1
47
+ numpy==2.2.1
48
+ opencv-python==4.10.0.84
49
+ orjson==3.10.14
50
+ packaging==24.2
51
+ pandas==2.2.3
52
+ pillow==11.1.0
53
+ proglog==0.1.10
54
+ protobuf==5.29.3
55
+ pyarrow==18.1.0
56
+ pycparser==2.22
57
+ pydantic==2.10.5
58
+ pydantic_core==2.27.2
59
+ pydeck==0.9.1
60
+ pydub==0.25.1
61
+ Pygments==2.19.1
62
+ python-dateutil==2.9.0.post0
63
+ python-dotenv==1.0.1
64
+ pytz==2024.2
65
+ PyYAML==6.0.2
66
+ referencing==0.35.1
67
+ requests==2.32.3
68
+ requests-toolbelt==1.0.0
69
+ rich==13.9.4
70
+ rpds-py==0.22.3
71
+ scipy==1.15.1
72
+ six==1.17.0
73
+ smmap==5.0.2
74
+ sniffio==1.3.1
75
+ soundfile==0.13.0
76
+ streamlit==1.41.1
77
+ tenacity==9.0.0
78
+ toml==0.10.2
79
+ tornado==6.4.2
80
+ tqdm==4.67.1
81
+ typing_extensions==4.12.2
82
+ tzdata==2024.2
83
+ urllib3==2.3.0
84
+ watchdog==6.0.0
85
+ websockets==14.1
response_schemas.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List
3
+
4
+ class SingleScene(BaseModel):
5
+ text: str = Field(description="Actual Segment of text from the complete story")
6
+ image_prompts: List[str] = Field(
7
+ description="""List of detailed and descriptive image prompts for the segment
8
+ prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"
9
+ Example: "theme: eerie forest | style: cinematic realism | focus: abandoned cabin | details: broken windows, overgrown vines | lighting: moonlit fog | perspective: wide-angle shot"
10
+ """
11
+ )
12
+
13
+ class ScenesResponseSchema(BaseModel):
14
+ scenes: List[SingleScene]
structured_output_extractor.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Type, Optional
2
+ from pydantic import BaseModel
3
+ from langgraph.graph import StateGraph, START, END
4
+ from typing import TypedDict
5
+ import constants # Assuming constants.py holds LLM provider configurations
6
+ from langchain_groq import ChatGroq
7
+
8
+
9
+ # Define the State structure (similar to previous definition)
10
+ class State(TypedDict):
11
+ messages: list
12
+ output: Optional[BaseModel]
13
+
14
+
15
+ # Generic Pydantic model-based structured output extractor
16
+ class StructuredOutputExtractor:
17
+ def __init__(self, response_schema: Type[BaseModel]):
18
+ """
19
+ Initializes the extractor for any given structured output model.
20
+
21
+ :param response_schema: Pydantic model class used for structured output extraction
22
+ """
23
+ self.response_schema = response_schema
24
+
25
+ # Initialize language model (provider and API keys come from constants.py)
26
+ # self.llm = ChatGroq(model="llama-3.3-70b-versatile") # token limit 100k tokens
27
+ self.llm = ChatGroq(model="deepseek-r1-distill-llama-70b") # currently no limit per day
28
+
29
+ # Bind the model with structured output capability
30
+ self.structured_llm = self.llm.with_structured_output(response_schema)
31
+
32
+ # Build the graph for structured output
33
+ self._build_graph()
34
+
35
+ def _build_graph(self):
36
+ """
37
+ Build the LangGraph computational graph for structured extraction.
38
+ """
39
+ graph_builder = StateGraph(State)
40
+
41
+ # Add nodes and edges for structured output
42
+ graph_builder.add_node("extract", self._extract_structured_info)
43
+ graph_builder.add_edge(START, "extract")
44
+ graph_builder.add_edge("extract", END)
45
+
46
+ self.graph = graph_builder.compile()
47
+
48
+ def _extract_structured_info(self, state: dict):
49
+ """
50
+ Extract structured information using the specified response model.
51
+
52
+ :param state: Current graph state
53
+ :return: Updated state with structured output
54
+ """
55
+ query = state['messages'][-1].content
56
+ print(f"Processing query: {query}")
57
+ try:
58
+ # Extract details using the structured model
59
+ output = self.structured_llm.invoke(query)
60
+ # Return the structured response
61
+ return {"output": output}
62
+ except Exception as e:
63
+ print(f"Error during extraction: {e}")
64
+ return {"output": None}
65
+
66
+ def extract(self, query: str) -> Optional[BaseModel]:
67
+ """
68
+ Public method to extract structured information.
69
+
70
+ :param query: Input query for structured output extraction
71
+ :return: Structured model object or None
72
+ """
73
+ from langchain_core.messages import SystemMessage
74
+
75
+ result = self.graph.invoke({
76
+ "messages": [SystemMessage(content=query)]
77
+ })
78
+ # Return the structured model response, if available
79
+ result = result.get('output')
80
+ return result
81
+
82
+
83
+ if __name__ == '__main__':
84
+
85
+ # Example Pydantic model (e.g., Movie)
86
+ class Movie(BaseModel):
87
+ title: str
88
+ year: int
89
+ genre: str
90
+ rating: Optional[float] = None
91
+ actors: list[str] = []
92
+
93
+
94
+ # Example usage with a generic structured extractor
95
+ extractor = StructuredOutputExtractor(response_schema=Movie)
96
+
97
+ query = "Tell me about the movie Inception. Provide details about its title, year, genre, rating, and main actors."
98
+
99
+ result = extractor.extract(query)
100
+ print(type(result))
101
+ if result:
102
+ print(result)
utils.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import constants
2
+ import os
3
+ from PIL import Image
4
+ from gradio_client import Client
5
+ import moviepy.editor as mp
6
+ from moviepy.video.VideoClip import ImageClip
7
+ from moviepy.editor import AudioFileClip
8
+ from structured_output_extractor import StructuredOutputExtractor
9
+ from response_schemas import ScenesResponseSchema
10
+ from typing import List, Dict
11
+ import tempfile
12
+ import os
13
+
14
+
15
+ def get_scenes(text_script: str):
16
+
17
+ prompt = f"""
18
+ ROLE: Story to Scene Generator
19
+ Tasks: For the given story
20
+ 1. Read it Completely and Understand the Complete Context
21
+ 2. Rewrite the story in tiny segments(but without even changing a word) and a descriptive image or list of image prompts to visualize each segment
22
+ 3. Make sure each image prompt matches the theme of overall scene and ultimately the story
23
+ 4. If necessary, a scene can have more than one image prompts
24
+
25
+ Here is the Complete Story: {text_script}
26
+ """
27
+
28
+
29
+ extractor = StructuredOutputExtractor(response_schema=ScenesResponseSchema)
30
+ result = extractor.extract(prompt)
31
+ return result.model_dump() # returns dictionary version pydantic model
32
+
33
+
34
+
35
+
36
+ # def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media"):
37
+ # """
38
+ # Generate video assets (images and audio) for each scene in a structured folder hierarchy.
39
+
40
+ # Args:
41
+ # scenes (Dict): A dictionary containing a list of scenes under the key "scenes".
42
+ # language (str): The language code for audio generation.
43
+ # speaker (str): The speaker for audio generation.
44
+ # base_path (str): The base folder where all assets will be stored. Default is "media".
45
+
46
+ # Returns:
47
+ # Dict: A dictionary containing the paths to the generated assets.
48
+ # """
49
+ # try:
50
+ # # Ensure the base folder exists
51
+ # if not os.path.exists(base_path):
52
+ # os.makedirs(base_path)
53
+
54
+ # # Extract scenes from the input dictionary
55
+ # scenes_list = scenes.get("scenes", [])
56
+ # print(f"Total Scenes: {len(scenes_list)}")
57
+
58
+ # # Dictionary to store asset paths
59
+ # assets = {"scenes": []}
60
+
61
+ # # Create a folder for the current video
62
+ # video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
63
+ # if not os.path.exists(video_folder):
64
+ # os.makedirs(video_folder)
65
+
66
+ # # Create 'images' and 'audio' folders inside the video folder
67
+ # images_folder = os.path.join(video_folder, "images")
68
+ # audio_folder = os.path.join(video_folder, "audio")
69
+ # os.makedirs(images_folder, exist_ok=True)
70
+ # os.makedirs(audio_folder, exist_ok=True)
71
+
72
+ # for scene_count, scene in enumerate(scenes_list):
73
+ # text: str = scene.get("text", "")
74
+ # image_prompts: List[str] = scene.get("image_prompts", [])
75
+
76
+ # # Create a folder for the current scene inside the 'images' folder
77
+ # scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
78
+ # os.makedirs(scene_images_folder, exist_ok=True)
79
+
80
+ # # Generate audio for the scene
81
+ # audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
82
+ # audio_result = generate_audio(text, language, speaker, path=audio_path)
83
+
84
+ # if "error" in audio_result:
85
+ # print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
86
+ # continue
87
+
88
+ # # Generate images for the scene
89
+ # image_paths = []
90
+ # for count, prompt in enumerate(image_prompts):
91
+ # image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
92
+ # image_result = generate_image(prompt=prompt, path=image_path)
93
+
94
+ # if "error" in image_result:
95
+ # print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
96
+ # else:
97
+ # image_paths.append(image_path)
98
+
99
+ # # Add the scene's asset paths to the dictionary
100
+ # assets["scenes"].append({
101
+ # "scene_number": scene_count + 1,
102
+ # "audio_path": audio_path,
103
+ # "image_paths": image_paths
104
+ # })
105
+
106
+ # print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
107
+
108
+ # return assets
109
+
110
+ # except Exception as e:
111
+ # print(f"Error during video asset generation: {e}")
112
+ # return {"error": str(e)}
113
+
114
+ def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
115
+ try:
116
+ # Ensure the base folder exists
117
+ if not os.path.exists(base_path):
118
+ os.makedirs(base_path)
119
+
120
+ # Extract scenes from the input dictionary
121
+ scenes_list = scenes.get("scenes", [])
122
+ print(f"Total Scenes: {len(scenes_list)}")
123
+
124
+ # Create a folder for the current video
125
+ video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
126
+ if not os.path.exists(video_folder):
127
+ os.makedirs(video_folder)
128
+
129
+ # Create 'images' and 'audio' folders inside the video folder
130
+ images_folder = os.path.join(video_folder, "images")
131
+ audio_folder = os.path.join(video_folder, "audio")
132
+ os.makedirs(images_folder, exist_ok=True)
133
+ os.makedirs(audio_folder, exist_ok=True)
134
+
135
+ for scene_count, scene in enumerate(scenes_list):
136
+ text: str = scene.get("text", "")
137
+ image_prompts: List[str] = scene.get("image_prompts", [])
138
+
139
+ # Create a folder for the current scene inside the 'images' folder
140
+ scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
141
+ os.makedirs(scene_images_folder, exist_ok=True)
142
+
143
+ # Generate audio for the scene
144
+ audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
145
+ audio_result = generate_audio(text, language, speaker, path=audio_path)
146
+
147
+ if "error" in audio_result:
148
+ print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
149
+ continue
150
+
151
+ # Generate images for the scene
152
+ image_paths = []
153
+ for count, prompt in enumerate(image_prompts):
154
+ image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
155
+ image_result = generate_image(prompt=prompt, path=image_path)
156
+
157
+ if "error" in image_result:
158
+ print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
159
+ else:
160
+ image_paths.append(image_path)
161
+
162
+ print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
163
+
164
+ # Return the path of the main video folder
165
+ return video_folder
166
+
167
+ except Exception as e:
168
+ print(f"Error during video asset generation: {e}")
169
+ return {"error": str(e)}
170
+
171
+
172
+ def generate_audio(text, language_code, speaker, path='test_audio.mp3'):
173
+ try:
174
+ # Initialize the Gradio Client with the hosted model
175
+ client = Client("habib926653/Multilingual-TTS")
176
+
177
+ # Make the API request
178
+ result = client.predict(
179
+ text=text, # Text input for audio generation
180
+ language_code=language_code, # Language code (e.g., "Urdu")
181
+ speaker=speaker, # Selected speaker (e.g., "Asad")
182
+ api_name="/text_to_speech_edge"
183
+ )
184
+
185
+ # The result is a tuple: (text, audio_file_path)
186
+ audio_file_path = result[1] # The generated audio file path
187
+
188
+ # Read the audio file as bytes
189
+ with open(audio_file_path, 'rb') as f:
190
+ audio_bytes = f.read()
191
+
192
+ # Save the audio bytes to the specified path
193
+ with open(path, 'wb') as f:
194
+ f.write(audio_bytes)
195
+
196
+ # Return the result (which includes the file path)
197
+ return {"audio_file": path}
198
+
199
+ except Exception as e:
200
+ print(f"Error during audio generation: {e}")
201
+ return {"error": str(e)}
202
+
203
+
204
+ def generate_image(prompt, path='test_image.png'):
205
+ try:
206
+ # Initialize the Gradio Client with Hugging Face token
207
+ client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)
208
+
209
+ # Make the API request
210
+ result = client.predict(
211
+ prompt=prompt, # Text prompt for image generation
212
+ width=1280,
213
+ height=720,
214
+ api_name="/generate_image"
215
+ )
216
+
217
+ image = Image.open(result)
218
+ image.save(path)
219
+
220
+ # Return the result (which includes the URL or file path)
221
+ return result
222
+
223
+ except Exception as e:
224
+ print(f"Error during image generation: {e}")
225
+ return {"error": str(e)}
226
+
227
+ def generate_images(image_prompts, folder_name='test_folder'):
228
+ folder_path = tmp_folder(folder_name)
229
+ for index, prompt in enumerate(image_prompts):
230
+ print(index, prompt)
231
+ image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
232
+ yield prompt, image_path
233
+
234
+
235
+
236
+ def tmp_folder(folder_name: str) -> str:
237
+ # Use the current working directory or any other accessible path for temp folders
238
+ base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") # Change this to any path you prefer
239
+
240
+ # Ensure that the base temp folder exists
241
+ if not os.path.exists(base_tmp_path):
242
+ os.makedirs(base_tmp_path)
243
+ print(f"Base temporary folder '{base_tmp_path}' created.")
244
+
245
+ # Define the path for the specific temporary folder
246
+ folder_path = os.path.join(base_tmp_path, folder_name)
247
+
248
+ # Create the specific temporary folder if it doesn't exist
249
+ os.makedirs(folder_path, exist_ok=True)
250
+
251
+ print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")
252
+
253
+ return folder_path
254
+
255
+
256
+
257
+ from moviepy.editor import *
258
+
259
+
260
+ import os
261
+ import tempfile
262
+ from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips
263
+
264
+
265
+
266
+ def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
267
+ audio_folder = os.path.join(video_folder, "audio")
268
+ images_folder = os.path.join(video_folder, "images")
269
+ final_clips = []
270
+
271
+ # Get all scene folders
272
+ scene_folders = sorted(os.listdir(images_folder))
273
+
274
+ for scene in scene_folders:
275
+ scene_path = os.path.join(images_folder, scene)
276
+ audio_path = os.path.join(audio_folder, f"{scene}.mp3")
277
+
278
+ if not os.path.exists(audio_path):
279
+ print(f"Warning: Audio file {audio_path} not found. Skipping scene {scene}.")
280
+ continue
281
+
282
+ # Get all images for the scene
283
+ image_files = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if img.endswith(('.png', '.jpg', '.jpeg'))])
284
+
285
+ if not image_files:
286
+ print(f"Warning: No images found in {scene_path}. Skipping scene {scene}.")
287
+ continue
288
+
289
+ # Load audio file
290
+ audio_clip = mp.AudioFileClip(audio_path)
291
+
292
+ # Calculate duration per image
293
+ duration_per_image = audio_clip.duration / len(image_files)
294
+
295
+ # Create image clips
296
+ image_clips = [mp.ImageClip(img).set_duration(duration_per_image) for img in image_files]
297
+
298
+ # Concatenate image clips
299
+ scene_video = mp.concatenate_videoclips(image_clips, method="compose").set_audio(audio_clip)
300
+
301
+ final_clips.append(scene_video)
302
+
303
+ if not final_clips:
304
+ print("Error: No valid scenes processed.")
305
+ return None
306
+
307
+ # Concatenate all scenes
308
+ final_video = mp.concatenate_videoclips(final_clips, method="compose")
309
+ output_path = os.path.join(video_folder, output_filename)
310
+ final_video.write_videofile(output_path, fps=24, codec='libx264')
311
+
312
+ return output_path
313
+
314
+ def generate_video_old(audio_file, images, segments):
315
+ try:
316
+ # Save the uploaded audio file to a temporary location
317
+ file_extension = os.path.splitext(audio_file.name)[1]
318
+ temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
319
+ temp_audio_path.write(audio_file.read())
320
+ temp_audio_path.close()
321
+
322
+ # Load the audio file using MoviePy
323
+ audio = AudioFileClip(temp_audio_path.name)
324
+
325
+ # Define YouTube-like dimensions (16:9 aspect ratio)
326
+ frame_width = 1280
327
+ frame_height = 720
328
+
329
+ video_clips = []
330
+ total_segments = len(segments)
331
+
332
+ for i, current_segment in enumerate(segments):
333
+ start_time = current_segment["start"]
334
+ end_time = current_segment["end"]
335
+
336
+ # Calculate the actual duration including any gap until the next segment
337
+ if i < total_segments - 1:
338
+ # If there's a next segment, extend until it starts
339
+ next_segment = segments[i + 1]
340
+ actual_end_time = next_segment["start"]
341
+ else:
342
+ # For the last segment, use its end time
343
+ actual_end_time = end_time
344
+
345
+ # Calculate total duration including any gap
346
+ segment_duration = actual_end_time - start_time
347
+
348
+ print(f"\nProcessing segment {i + 1}/{total_segments}:")
349
+ print(f" Start time: {start_time}s")
350
+ print(f" Base end time: {end_time}s")
351
+ print(f" Actual end time: {actual_end_time}s")
352
+ print(f" Total duration: {segment_duration}s")
353
+ print(f" Text: '{current_segment['text']}'")
354
+
355
+ # Ensure the image index is within bounds
356
+ image_path = images[min(i, len(images) - 1)]
357
+
358
+ # Create an ImageClip for the current segment
359
+ image_clip = ImageClip(image_path)
360
+
361
+ # Resize and pad the image to fit a 16:9 aspect ratio
362
+ image_clip = image_clip.resize(height=frame_height).on_color(
363
+ size=(frame_width, frame_height),
364
+ color=(0, 0, 0), # Black background
365
+ pos="center" # Center the image
366
+ )
367
+
368
+ # Set the duration and start time for the clip
369
+ image_clip = image_clip.set_duration(segment_duration)
370
+ image_clip = image_clip.set_start(start_time) # Set the start time explicitly
371
+
372
+ video_clips.append(image_clip)
373
+
374
+ # Concatenate all the image clips to form the video
375
+ print("Concatenating video clips...")
376
+ video = concatenate_videoclips(video_clips, method="compose")
377
+
378
+ # Add the audio to the video
379
+ video = video.set_audio(audio)
380
+
381
+ # Save the video to a temporary file
382
+ temp_dir = tempfile.gettempdir()
383
+ video_path = os.path.join(temp_dir, "generated_video.mp4")
384
+ print(f"Writing video file to {video_path}...")
385
+ video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")
386
+
387
+ # Clean up the temporary audio file
388
+ os.remove(temp_audio_path.name)
389
+ print("Temporary audio file removed.")
390
+
391
+ return video_path
392
+
393
+ except Exception as e:
394
+ print(f"Error generating video: {e}")
395
+ return None
396
+
397
+
398
+
399
+
400
+
401
+
402
+ # Example usage:
403
+ if __name__ == "__main__":
404
+ short_story = """
405
+ In a quiet village, a young girl named Lily discovered a hidden garden.
406
+ Every flower in the garden glowed with a magical light, revealing secrets of the past.
407
+ Lily knew she had found something truly extraordinary.
408
+ """
409
+ generate_audio(short_story, "Urdu", "Asad")
410
+ # scenes_response = get_scenes(short_story)
411
+ # scenes = scenes_response.get("scenes")
412
+ # print("total scenes: ", len(scenes))
413
+ # for scene in scenes:
414
+ # print("image prompts for this scene", len(scene.get("image_prompts")))
415
+ # print("\n\n")
416
+ # for scene_count,scene in enumerate(scenes):
417
+ # image_prompts = scene.get("image_prompts")
418
+ # for count, prompt in enumerate(image_prompts):
419
+ # generate_image(prompt=prompt, path=f"scene_{scene_count+1}_image_{count+1}.png")