Spaces:
Running
Running
text to video: v1
Browse files- .gitignore +162 -0
- README.md +1 -1
- app.py +38 -0
- constants.py +14 -0
- env.example +2 -0
- requirements.txt +85 -0
- response_schemas.py +14 -0
- structured_output_extractor.py +102 -0
- utils.py +419 -0
.gitignore
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
venv/
|
3 |
+
|
4 |
+
# Byte-compiled / optimized / DLL files
|
5 |
+
__pycache__/
|
6 |
+
*.py[cod]
|
7 |
+
*$py.class
|
8 |
+
|
9 |
+
# C extensions
|
10 |
+
*.so
|
11 |
+
|
12 |
+
# Distribution / packaging
|
13 |
+
.Python
|
14 |
+
build/
|
15 |
+
develop-eggs/
|
16 |
+
dist/
|
17 |
+
downloads/
|
18 |
+
eggs/
|
19 |
+
.eggs/
|
20 |
+
lib/
|
21 |
+
lib64/
|
22 |
+
parts/
|
23 |
+
sdist/
|
24 |
+
var/
|
25 |
+
wheels/
|
26 |
+
share/python-wheels/
|
27 |
+
*.egg-info/
|
28 |
+
.installed.cfg
|
29 |
+
*.egg
|
30 |
+
MANIFEST
|
31 |
+
|
32 |
+
# PyInstaller
|
33 |
+
# Usually these files are written by a python script from a template
|
34 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
35 |
+
*.manifest
|
36 |
+
*.spec
|
37 |
+
|
38 |
+
# Installer logs
|
39 |
+
pip-log.txt
|
40 |
+
pip-delete-this-directory.txt
|
41 |
+
|
42 |
+
# Unit test / coverage reports
|
43 |
+
htmlcov/
|
44 |
+
.tox/
|
45 |
+
.nox/
|
46 |
+
.coverage
|
47 |
+
.coverage.*
|
48 |
+
.cache
|
49 |
+
nosetests.xml
|
50 |
+
coverage.xml
|
51 |
+
*.cover
|
52 |
+
*.py,cover
|
53 |
+
.hypothesis/
|
54 |
+
.pytest_cache/
|
55 |
+
cover/
|
56 |
+
|
57 |
+
# Translations
|
58 |
+
*.mo
|
59 |
+
*.pot
|
60 |
+
|
61 |
+
# Django stuff:
|
62 |
+
*.log
|
63 |
+
local_settings.py
|
64 |
+
# db.sqlite3
|
65 |
+
db.sqlite3-journal
|
66 |
+
|
67 |
+
# Flask stuff:
|
68 |
+
instance/
|
69 |
+
.webassets-cache
|
70 |
+
|
71 |
+
# Scrapy stuff:
|
72 |
+
.scrapy
|
73 |
+
|
74 |
+
# Sphinx documentation
|
75 |
+
docs/_build/
|
76 |
+
|
77 |
+
# PyBuilder
|
78 |
+
.pybuilder/
|
79 |
+
target/
|
80 |
+
|
81 |
+
# Jupyter Notebook
|
82 |
+
.ipynb_checkpoints
|
83 |
+
|
84 |
+
# IPython
|
85 |
+
profile_default/
|
86 |
+
ipython_config.py
|
87 |
+
|
88 |
+
# pyenv
|
89 |
+
# For a library or package, you might want to ignore these files since the code is
|
90 |
+
# intended to run in multiple environments; otherwise, check them in:
|
91 |
+
# .python-version
|
92 |
+
|
93 |
+
# pipenv
|
94 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
95 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
96 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
97 |
+
# install all needed dependencies.
|
98 |
+
#Pipfile.lock
|
99 |
+
|
100 |
+
# poetry
|
101 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
102 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
103 |
+
# commonly ignored for libraries.
|
104 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
105 |
+
#poetry.lock
|
106 |
+
|
107 |
+
# pdm
|
108 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
109 |
+
#pdm.lock
|
110 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
111 |
+
# in version control.
|
112 |
+
# https://pdm.fming.dev/#use-with-ide
|
113 |
+
.pdm.toml
|
114 |
+
|
115 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
116 |
+
__pypackages__/
|
117 |
+
|
118 |
+
# Celery stuff
|
119 |
+
celerybeat-schedule
|
120 |
+
celerybeat.pid
|
121 |
+
|
122 |
+
# SageMath parsed files
|
123 |
+
*.sage.py
|
124 |
+
|
125 |
+
# Environments
|
126 |
+
.venv
|
127 |
+
env/
|
128 |
+
venv/
|
129 |
+
ENV/
|
130 |
+
env.bak/
|
131 |
+
venv.bak/
|
132 |
+
|
133 |
+
# Spyder project settings
|
134 |
+
.spyderproject
|
135 |
+
.spyproject
|
136 |
+
|
137 |
+
# Rope project settings
|
138 |
+
.ropeproject
|
139 |
+
|
140 |
+
# mkdocs documentation
|
141 |
+
/site
|
142 |
+
|
143 |
+
# mypy
|
144 |
+
.mypy_cache/
|
145 |
+
.dmypy.json
|
146 |
+
dmypy.json
|
147 |
+
|
148 |
+
# Pyre type checker
|
149 |
+
.pyre/
|
150 |
+
|
151 |
+
# pytype static type analyzer
|
152 |
+
.pytype/
|
153 |
+
|
154 |
+
# Cython debug symbols
|
155 |
+
cython_debug/
|
156 |
+
|
157 |
+
# PyCharm
|
158 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
159 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
160 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
+
#.idea/
|
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
title: Text To Video Generator
|
3 |
emoji: 🚀
|
4 |
-
colorFrom:
|
5 |
colorTo: yellow
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.42.0
|
|
|
1 |
---
|
2 |
title: Text To Video Generator
|
3 |
emoji: 🚀
|
4 |
+
colorFrom: green
|
5 |
colorTo: yellow
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.42.0
|
app.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from gradio_client import Client
|
3 |
+
from utils import get_scenes, generate_video_assets, generate_video # Import the function from utils.py
|
4 |
+
|
5 |
+
# Streamlit app
|
6 |
+
st.title("Text to Video Generator")
|
7 |
+
|
8 |
+
# Text input box with a max of 1500 characters
|
9 |
+
text_script = st.text_area("Enter your text (max 1500 characters):", max_chars=1500)
|
10 |
+
|
11 |
+
|
12 |
+
# Initialize the client with the hosted model
|
13 |
+
client = Client("habib926653/Multilingual-TTS")
|
14 |
+
|
15 |
+
# Dropdown for language selection
|
16 |
+
language = st.selectbox("Choose Language:", ["Urdu", "English"]) # Add more languages as needed
|
17 |
+
|
18 |
+
# Get available speakers for the selected language
|
19 |
+
speakers_response = client.predict(language=language, api_name="/get_speakers")
|
20 |
+
|
21 |
+
# Extract speakers list
|
22 |
+
speakers = [choice[0] for choice in speakers_response["choices"]]
|
23 |
+
selected_speaker = st.selectbox("Choose Speaker:", speakers)
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
# Button to trigger the processing
|
28 |
+
if st.button("Generate Video"):
|
29 |
+
if text_script:
|
30 |
+
# Call the function from utils.py to process the text
|
31 |
+
scenes = get_scenes(text_script)
|
32 |
+
video_assets_folder = generate_video_assets(scenes, language, selected_speaker)
|
33 |
+
st.write(video_assets_folder)
|
34 |
+
generated_video_path = generate_video(video_assets_folder)
|
35 |
+
st.video(generated_video_path)
|
36 |
+
|
37 |
+
else:
|
38 |
+
st.warning("Please enter some text to generate prompts.")
|
constants.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
import os
|
3 |
+
|
4 |
+
load_dotenv()
|
5 |
+
|
6 |
+
HF_TOKEN = os.getenv("HF_TOKEN", None)
|
7 |
+
|
8 |
+
SUMMARIZATION_ENDPOINT="https://habib926653-text-translator-agent-api.hf.space/generate"
|
9 |
+
IMAGE_GENERATION_SPACE_NAME="habib926653/stabilityai-stable-diffusion-3.5-large-turbo"
|
10 |
+
|
11 |
+
# Supported formats
|
12 |
+
SUPPORTED_FORMATS = ["mp3", "wav", "ogg", "flac", "aac", "m4a"]
|
13 |
+
|
14 |
+
|
env.example
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
HF_TOKEN=HUGGING_FACE_TOKEN
|
2 |
+
GROQ_API_KEY=GROQ_API_KEY
|
requirements.txt
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==5.5.0
|
2 |
+
annotated-types==0.7.0
|
3 |
+
anyio==4.8.0
|
4 |
+
attrs==24.3.0
|
5 |
+
audeer==2.2.1
|
6 |
+
audiofile==1.5.1
|
7 |
+
audmath==1.4.1
|
8 |
+
blinker==1.9.0
|
9 |
+
cachetools==5.5.0
|
10 |
+
certifi==2024.12.14
|
11 |
+
cffi==1.17.1
|
12 |
+
charset-normalizer==3.4.1
|
13 |
+
click==8.1.8
|
14 |
+
decorator==4.4.2
|
15 |
+
distro==1.9.0
|
16 |
+
exceptiongroup==1.2.2
|
17 |
+
filelock==3.16.1
|
18 |
+
fsspec==2024.12.0
|
19 |
+
gitdb==4.0.12
|
20 |
+
GitPython==3.1.44
|
21 |
+
gradio_client==1.5.4
|
22 |
+
groq==0.15.0
|
23 |
+
h11==0.14.0
|
24 |
+
httpcore==1.0.7
|
25 |
+
httpx==0.28.1
|
26 |
+
huggingface-hub==0.27.1
|
27 |
+
idna==3.10
|
28 |
+
imageio==2.36.1
|
29 |
+
imageio-ffmpeg==0.5.1
|
30 |
+
Jinja2==3.1.5
|
31 |
+
jsonpatch==1.33
|
32 |
+
jsonpointer==3.0.0
|
33 |
+
jsonschema==4.23.0
|
34 |
+
jsonschema-specifications==2024.10.1
|
35 |
+
langchain-core==0.3.29
|
36 |
+
langchain-groq==0.2.3
|
37 |
+
langgraph==0.2.62
|
38 |
+
langgraph-checkpoint==2.0.9
|
39 |
+
langgraph-sdk==0.1.51
|
40 |
+
langsmith==0.2.10
|
41 |
+
markdown-it-py==3.0.0
|
42 |
+
MarkupSafe==3.0.2
|
43 |
+
mdurl==0.1.2
|
44 |
+
moviepy==1.0.3
|
45 |
+
msgpack==1.1.0
|
46 |
+
narwhals==1.21.1
|
47 |
+
numpy==2.2.1
|
48 |
+
opencv-python==4.10.0.84
|
49 |
+
orjson==3.10.14
|
50 |
+
packaging==24.2
|
51 |
+
pandas==2.2.3
|
52 |
+
pillow==11.1.0
|
53 |
+
proglog==0.1.10
|
54 |
+
protobuf==5.29.3
|
55 |
+
pyarrow==18.1.0
|
56 |
+
pycparser==2.22
|
57 |
+
pydantic==2.10.5
|
58 |
+
pydantic_core==2.27.2
|
59 |
+
pydeck==0.9.1
|
60 |
+
pydub==0.25.1
|
61 |
+
Pygments==2.19.1
|
62 |
+
python-dateutil==2.9.0.post0
|
63 |
+
python-dotenv==1.0.1
|
64 |
+
pytz==2024.2
|
65 |
+
PyYAML==6.0.2
|
66 |
+
referencing==0.35.1
|
67 |
+
requests==2.32.3
|
68 |
+
requests-toolbelt==1.0.0
|
69 |
+
rich==13.9.4
|
70 |
+
rpds-py==0.22.3
|
71 |
+
scipy==1.15.1
|
72 |
+
six==1.17.0
|
73 |
+
smmap==5.0.2
|
74 |
+
sniffio==1.3.1
|
75 |
+
soundfile==0.13.0
|
76 |
+
streamlit==1.41.1
|
77 |
+
tenacity==9.0.0
|
78 |
+
toml==0.10.2
|
79 |
+
tornado==6.4.2
|
80 |
+
tqdm==4.67.1
|
81 |
+
typing_extensions==4.12.2
|
82 |
+
tzdata==2024.2
|
83 |
+
urllib3==2.3.0
|
84 |
+
watchdog==6.0.0
|
85 |
+
websockets==14.1
|
response_schemas.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
class SingleScene(BaseModel):
|
5 |
+
text: str = Field(description="Actual Segment of text from the complete story")
|
6 |
+
image_prompts: List[str] = Field(
|
7 |
+
description="""List of detailed and descriptive image prompts for the segment
|
8 |
+
prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"
|
9 |
+
Example: "theme: eerie forest | style: cinematic realism | focus: abandoned cabin | details: broken windows, overgrown vines | lighting: moonlit fog | perspective: wide-angle shot"
|
10 |
+
"""
|
11 |
+
)
|
12 |
+
|
13 |
+
class ScenesResponseSchema(BaseModel):
|
14 |
+
scenes: List[SingleScene]
|
structured_output_extractor.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Type, Optional
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from langgraph.graph import StateGraph, START, END
|
4 |
+
from typing import TypedDict
|
5 |
+
import constants # Assuming constants.py holds LLM provider configurations
|
6 |
+
from langchain_groq import ChatGroq
|
7 |
+
|
8 |
+
|
9 |
+
# Define the State structure (similar to previous definition)
|
10 |
+
class State(TypedDict):
|
11 |
+
messages: list
|
12 |
+
output: Optional[BaseModel]
|
13 |
+
|
14 |
+
|
15 |
+
# Generic Pydantic model-based structured output extractor
|
16 |
+
class StructuredOutputExtractor:
|
17 |
+
def __init__(self, response_schema: Type[BaseModel]):
|
18 |
+
"""
|
19 |
+
Initializes the extractor for any given structured output model.
|
20 |
+
|
21 |
+
:param response_schema: Pydantic model class used for structured output extraction
|
22 |
+
"""
|
23 |
+
self.response_schema = response_schema
|
24 |
+
|
25 |
+
# Initialize language model (provider and API keys come from constants.py)
|
26 |
+
# self.llm = ChatGroq(model="llama-3.3-70b-versatile") # token limit 100k tokens
|
27 |
+
self.llm = ChatGroq(model="deepseek-r1-distill-llama-70b") # currently no limit per day
|
28 |
+
|
29 |
+
# Bind the model with structured output capability
|
30 |
+
self.structured_llm = self.llm.with_structured_output(response_schema)
|
31 |
+
|
32 |
+
# Build the graph for structured output
|
33 |
+
self._build_graph()
|
34 |
+
|
35 |
+
def _build_graph(self):
|
36 |
+
"""
|
37 |
+
Build the LangGraph computational graph for structured extraction.
|
38 |
+
"""
|
39 |
+
graph_builder = StateGraph(State)
|
40 |
+
|
41 |
+
# Add nodes and edges for structured output
|
42 |
+
graph_builder.add_node("extract", self._extract_structured_info)
|
43 |
+
graph_builder.add_edge(START, "extract")
|
44 |
+
graph_builder.add_edge("extract", END)
|
45 |
+
|
46 |
+
self.graph = graph_builder.compile()
|
47 |
+
|
48 |
+
def _extract_structured_info(self, state: dict):
|
49 |
+
"""
|
50 |
+
Extract structured information using the specified response model.
|
51 |
+
|
52 |
+
:param state: Current graph state
|
53 |
+
:return: Updated state with structured output
|
54 |
+
"""
|
55 |
+
query = state['messages'][-1].content
|
56 |
+
print(f"Processing query: {query}")
|
57 |
+
try:
|
58 |
+
# Extract details using the structured model
|
59 |
+
output = self.structured_llm.invoke(query)
|
60 |
+
# Return the structured response
|
61 |
+
return {"output": output}
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error during extraction: {e}")
|
64 |
+
return {"output": None}
|
65 |
+
|
66 |
+
def extract(self, query: str) -> Optional[BaseModel]:
|
67 |
+
"""
|
68 |
+
Public method to extract structured information.
|
69 |
+
|
70 |
+
:param query: Input query for structured output extraction
|
71 |
+
:return: Structured model object or None
|
72 |
+
"""
|
73 |
+
from langchain_core.messages import SystemMessage
|
74 |
+
|
75 |
+
result = self.graph.invoke({
|
76 |
+
"messages": [SystemMessage(content=query)]
|
77 |
+
})
|
78 |
+
# Return the structured model response, if available
|
79 |
+
result = result.get('output')
|
80 |
+
return result
|
81 |
+
|
82 |
+
|
83 |
+
if __name__ == '__main__':
|
84 |
+
|
85 |
+
# Example Pydantic model (e.g., Movie)
|
86 |
+
class Movie(BaseModel):
|
87 |
+
title: str
|
88 |
+
year: int
|
89 |
+
genre: str
|
90 |
+
rating: Optional[float] = None
|
91 |
+
actors: list[str] = []
|
92 |
+
|
93 |
+
|
94 |
+
# Example usage with a generic structured extractor
|
95 |
+
extractor = StructuredOutputExtractor(response_schema=Movie)
|
96 |
+
|
97 |
+
query = "Tell me about the movie Inception. Provide details about its title, year, genre, rating, and main actors."
|
98 |
+
|
99 |
+
result = extractor.extract(query)
|
100 |
+
print(type(result))
|
101 |
+
if result:
|
102 |
+
print(result)
|
utils.py
ADDED
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import constants
|
2 |
+
import os
|
3 |
+
from PIL import Image
|
4 |
+
from gradio_client import Client
|
5 |
+
import moviepy.editor as mp
|
6 |
+
from moviepy.video.VideoClip import ImageClip
|
7 |
+
from moviepy.editor import AudioFileClip
|
8 |
+
from structured_output_extractor import StructuredOutputExtractor
|
9 |
+
from response_schemas import ScenesResponseSchema
|
10 |
+
from typing import List, Dict
|
11 |
+
import tempfile
|
12 |
+
import os
|
13 |
+
|
14 |
+
|
15 |
+
def get_scenes(text_script: str):
|
16 |
+
|
17 |
+
prompt = f"""
|
18 |
+
ROLE: Story to Scene Generator
|
19 |
+
Tasks: For the given story
|
20 |
+
1. Read it Completely and Understand the Complete Context
|
21 |
+
2. Rewrite the story in tiny segments(but without even changing a word) and a descriptive image or list of image prompts to visualize each segment
|
22 |
+
3. Make sure each image prompt matches the theme of overall scene and ultimately the story
|
23 |
+
4. If necessary, a scene can have more than one image prompts
|
24 |
+
|
25 |
+
Here is the Complete Story: {text_script}
|
26 |
+
"""
|
27 |
+
|
28 |
+
|
29 |
+
extractor = StructuredOutputExtractor(response_schema=ScenesResponseSchema)
|
30 |
+
result = extractor.extract(prompt)
|
31 |
+
return result.model_dump() # returns dictionary version pydantic model
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
# def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media"):
|
37 |
+
# """
|
38 |
+
# Generate video assets (images and audio) for each scene in a structured folder hierarchy.
|
39 |
+
|
40 |
+
# Args:
|
41 |
+
# scenes (Dict): A dictionary containing a list of scenes under the key "scenes".
|
42 |
+
# language (str): The language code for audio generation.
|
43 |
+
# speaker (str): The speaker for audio generation.
|
44 |
+
# base_path (str): The base folder where all assets will be stored. Default is "media".
|
45 |
+
|
46 |
+
# Returns:
|
47 |
+
# Dict: A dictionary containing the paths to the generated assets.
|
48 |
+
# """
|
49 |
+
# try:
|
50 |
+
# # Ensure the base folder exists
|
51 |
+
# if not os.path.exists(base_path):
|
52 |
+
# os.makedirs(base_path)
|
53 |
+
|
54 |
+
# # Extract scenes from the input dictionary
|
55 |
+
# scenes_list = scenes.get("scenes", [])
|
56 |
+
# print(f"Total Scenes: {len(scenes_list)}")
|
57 |
+
|
58 |
+
# # Dictionary to store asset paths
|
59 |
+
# assets = {"scenes": []}
|
60 |
+
|
61 |
+
# # Create a folder for the current video
|
62 |
+
# video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
|
63 |
+
# if not os.path.exists(video_folder):
|
64 |
+
# os.makedirs(video_folder)
|
65 |
+
|
66 |
+
# # Create 'images' and 'audio' folders inside the video folder
|
67 |
+
# images_folder = os.path.join(video_folder, "images")
|
68 |
+
# audio_folder = os.path.join(video_folder, "audio")
|
69 |
+
# os.makedirs(images_folder, exist_ok=True)
|
70 |
+
# os.makedirs(audio_folder, exist_ok=True)
|
71 |
+
|
72 |
+
# for scene_count, scene in enumerate(scenes_list):
|
73 |
+
# text: str = scene.get("text", "")
|
74 |
+
# image_prompts: List[str] = scene.get("image_prompts", [])
|
75 |
+
|
76 |
+
# # Create a folder for the current scene inside the 'images' folder
|
77 |
+
# scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
|
78 |
+
# os.makedirs(scene_images_folder, exist_ok=True)
|
79 |
+
|
80 |
+
# # Generate audio for the scene
|
81 |
+
# audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
|
82 |
+
# audio_result = generate_audio(text, language, speaker, path=audio_path)
|
83 |
+
|
84 |
+
# if "error" in audio_result:
|
85 |
+
# print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
|
86 |
+
# continue
|
87 |
+
|
88 |
+
# # Generate images for the scene
|
89 |
+
# image_paths = []
|
90 |
+
# for count, prompt in enumerate(image_prompts):
|
91 |
+
# image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
|
92 |
+
# image_result = generate_image(prompt=prompt, path=image_path)
|
93 |
+
|
94 |
+
# if "error" in image_result:
|
95 |
+
# print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
|
96 |
+
# else:
|
97 |
+
# image_paths.append(image_path)
|
98 |
+
|
99 |
+
# # Add the scene's asset paths to the dictionary
|
100 |
+
# assets["scenes"].append({
|
101 |
+
# "scene_number": scene_count + 1,
|
102 |
+
# "audio_path": audio_path,
|
103 |
+
# "image_paths": image_paths
|
104 |
+
# })
|
105 |
+
|
106 |
+
# print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
|
107 |
+
|
108 |
+
# return assets
|
109 |
+
|
110 |
+
# except Exception as e:
|
111 |
+
# print(f"Error during video asset generation: {e}")
|
112 |
+
# return {"error": str(e)}
|
113 |
+
|
114 |
+
def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
|
115 |
+
try:
|
116 |
+
# Ensure the base folder exists
|
117 |
+
if not os.path.exists(base_path):
|
118 |
+
os.makedirs(base_path)
|
119 |
+
|
120 |
+
# Extract scenes from the input dictionary
|
121 |
+
scenes_list = scenes.get("scenes", [])
|
122 |
+
print(f"Total Scenes: {len(scenes_list)}")
|
123 |
+
|
124 |
+
# Create a folder for the current video
|
125 |
+
video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
|
126 |
+
if not os.path.exists(video_folder):
|
127 |
+
os.makedirs(video_folder)
|
128 |
+
|
129 |
+
# Create 'images' and 'audio' folders inside the video folder
|
130 |
+
images_folder = os.path.join(video_folder, "images")
|
131 |
+
audio_folder = os.path.join(video_folder, "audio")
|
132 |
+
os.makedirs(images_folder, exist_ok=True)
|
133 |
+
os.makedirs(audio_folder, exist_ok=True)
|
134 |
+
|
135 |
+
for scene_count, scene in enumerate(scenes_list):
|
136 |
+
text: str = scene.get("text", "")
|
137 |
+
image_prompts: List[str] = scene.get("image_prompts", [])
|
138 |
+
|
139 |
+
# Create a folder for the current scene inside the 'images' folder
|
140 |
+
scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
|
141 |
+
os.makedirs(scene_images_folder, exist_ok=True)
|
142 |
+
|
143 |
+
# Generate audio for the scene
|
144 |
+
audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
|
145 |
+
audio_result = generate_audio(text, language, speaker, path=audio_path)
|
146 |
+
|
147 |
+
if "error" in audio_result:
|
148 |
+
print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
|
149 |
+
continue
|
150 |
+
|
151 |
+
# Generate images for the scene
|
152 |
+
image_paths = []
|
153 |
+
for count, prompt in enumerate(image_prompts):
|
154 |
+
image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
|
155 |
+
image_result = generate_image(prompt=prompt, path=image_path)
|
156 |
+
|
157 |
+
if "error" in image_result:
|
158 |
+
print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
|
159 |
+
else:
|
160 |
+
image_paths.append(image_path)
|
161 |
+
|
162 |
+
print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
|
163 |
+
|
164 |
+
# Return the path of the main video folder
|
165 |
+
return video_folder
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
print(f"Error during video asset generation: {e}")
|
169 |
+
return {"error": str(e)}
|
170 |
+
|
171 |
+
|
172 |
+
def generate_audio(text, language_code, speaker, path='test_audio.mp3'):
|
173 |
+
try:
|
174 |
+
# Initialize the Gradio Client with the hosted model
|
175 |
+
client = Client("habib926653/Multilingual-TTS")
|
176 |
+
|
177 |
+
# Make the API request
|
178 |
+
result = client.predict(
|
179 |
+
text=text, # Text input for audio generation
|
180 |
+
language_code=language_code, # Language code (e.g., "Urdu")
|
181 |
+
speaker=speaker, # Selected speaker (e.g., "Asad")
|
182 |
+
api_name="/text_to_speech_edge"
|
183 |
+
)
|
184 |
+
|
185 |
+
# The result is a tuple: (text, audio_file_path)
|
186 |
+
audio_file_path = result[1] # The generated audio file path
|
187 |
+
|
188 |
+
# Read the audio file as bytes
|
189 |
+
with open(audio_file_path, 'rb') as f:
|
190 |
+
audio_bytes = f.read()
|
191 |
+
|
192 |
+
# Save the audio bytes to the specified path
|
193 |
+
with open(path, 'wb') as f:
|
194 |
+
f.write(audio_bytes)
|
195 |
+
|
196 |
+
# Return the result (which includes the file path)
|
197 |
+
return {"audio_file": path}
|
198 |
+
|
199 |
+
except Exception as e:
|
200 |
+
print(f"Error during audio generation: {e}")
|
201 |
+
return {"error": str(e)}
|
202 |
+
|
203 |
+
|
204 |
+
def generate_image(prompt, path='test_image.png'):
|
205 |
+
try:
|
206 |
+
# Initialize the Gradio Client with Hugging Face token
|
207 |
+
client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)
|
208 |
+
|
209 |
+
# Make the API request
|
210 |
+
result = client.predict(
|
211 |
+
prompt=prompt, # Text prompt for image generation
|
212 |
+
width=1280,
|
213 |
+
height=720,
|
214 |
+
api_name="/generate_image"
|
215 |
+
)
|
216 |
+
|
217 |
+
image = Image.open(result)
|
218 |
+
image.save(path)
|
219 |
+
|
220 |
+
# Return the result (which includes the URL or file path)
|
221 |
+
return result
|
222 |
+
|
223 |
+
except Exception as e:
|
224 |
+
print(f"Error during image generation: {e}")
|
225 |
+
return {"error": str(e)}
|
226 |
+
|
227 |
+
def generate_images(image_prompts, folder_name='test_folder'):
|
228 |
+
folder_path = tmp_folder(folder_name)
|
229 |
+
for index, prompt in enumerate(image_prompts):
|
230 |
+
print(index, prompt)
|
231 |
+
image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
|
232 |
+
yield prompt, image_path
|
233 |
+
|
234 |
+
|
235 |
+
|
236 |
+
def tmp_folder(folder_name: str) -> str:
|
237 |
+
# Use the current working directory or any other accessible path for temp folders
|
238 |
+
base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") # Change this to any path you prefer
|
239 |
+
|
240 |
+
# Ensure that the base temp folder exists
|
241 |
+
if not os.path.exists(base_tmp_path):
|
242 |
+
os.makedirs(base_tmp_path)
|
243 |
+
print(f"Base temporary folder '{base_tmp_path}' created.")
|
244 |
+
|
245 |
+
# Define the path for the specific temporary folder
|
246 |
+
folder_path = os.path.join(base_tmp_path, folder_name)
|
247 |
+
|
248 |
+
# Create the specific temporary folder if it doesn't exist
|
249 |
+
os.makedirs(folder_path, exist_ok=True)
|
250 |
+
|
251 |
+
print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")
|
252 |
+
|
253 |
+
return folder_path
|
254 |
+
|
255 |
+
|
256 |
+
|
257 |
+
from moviepy.editor import *
|
258 |
+
|
259 |
+
|
260 |
+
import os
|
261 |
+
import tempfile
|
262 |
+
from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips
|
263 |
+
|
264 |
+
|
265 |
+
|
266 |
+
def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
|
267 |
+
audio_folder = os.path.join(video_folder, "audio")
|
268 |
+
images_folder = os.path.join(video_folder, "images")
|
269 |
+
final_clips = []
|
270 |
+
|
271 |
+
# Get all scene folders
|
272 |
+
scene_folders = sorted(os.listdir(images_folder))
|
273 |
+
|
274 |
+
for scene in scene_folders:
|
275 |
+
scene_path = os.path.join(images_folder, scene)
|
276 |
+
audio_path = os.path.join(audio_folder, f"{scene}.mp3")
|
277 |
+
|
278 |
+
if not os.path.exists(audio_path):
|
279 |
+
print(f"Warning: Audio file {audio_path} not found. Skipping scene {scene}.")
|
280 |
+
continue
|
281 |
+
|
282 |
+
# Get all images for the scene
|
283 |
+
image_files = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if img.endswith(('.png', '.jpg', '.jpeg'))])
|
284 |
+
|
285 |
+
if not image_files:
|
286 |
+
print(f"Warning: No images found in {scene_path}. Skipping scene {scene}.")
|
287 |
+
continue
|
288 |
+
|
289 |
+
# Load audio file
|
290 |
+
audio_clip = mp.AudioFileClip(audio_path)
|
291 |
+
|
292 |
+
# Calculate duration per image
|
293 |
+
duration_per_image = audio_clip.duration / len(image_files)
|
294 |
+
|
295 |
+
# Create image clips
|
296 |
+
image_clips = [mp.ImageClip(img).set_duration(duration_per_image) for img in image_files]
|
297 |
+
|
298 |
+
# Concatenate image clips
|
299 |
+
scene_video = mp.concatenate_videoclips(image_clips, method="compose").set_audio(audio_clip)
|
300 |
+
|
301 |
+
final_clips.append(scene_video)
|
302 |
+
|
303 |
+
if not final_clips:
|
304 |
+
print("Error: No valid scenes processed.")
|
305 |
+
return None
|
306 |
+
|
307 |
+
# Concatenate all scenes
|
308 |
+
final_video = mp.concatenate_videoclips(final_clips, method="compose")
|
309 |
+
output_path = os.path.join(video_folder, output_filename)
|
310 |
+
final_video.write_videofile(output_path, fps=24, codec='libx264')
|
311 |
+
|
312 |
+
return output_path
|
313 |
+
|
314 |
+
def generate_video_old(audio_file, images, segments):
|
315 |
+
try:
|
316 |
+
# Save the uploaded audio file to a temporary location
|
317 |
+
file_extension = os.path.splitext(audio_file.name)[1]
|
318 |
+
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
|
319 |
+
temp_audio_path.write(audio_file.read())
|
320 |
+
temp_audio_path.close()
|
321 |
+
|
322 |
+
# Load the audio file using MoviePy
|
323 |
+
audio = AudioFileClip(temp_audio_path.name)
|
324 |
+
|
325 |
+
# Define YouTube-like dimensions (16:9 aspect ratio)
|
326 |
+
frame_width = 1280
|
327 |
+
frame_height = 720
|
328 |
+
|
329 |
+
video_clips = []
|
330 |
+
total_segments = len(segments)
|
331 |
+
|
332 |
+
for i, current_segment in enumerate(segments):
|
333 |
+
start_time = current_segment["start"]
|
334 |
+
end_time = current_segment["end"]
|
335 |
+
|
336 |
+
# Calculate the actual duration including any gap until the next segment
|
337 |
+
if i < total_segments - 1:
|
338 |
+
# If there's a next segment, extend until it starts
|
339 |
+
next_segment = segments[i + 1]
|
340 |
+
actual_end_time = next_segment["start"]
|
341 |
+
else:
|
342 |
+
# For the last segment, use its end time
|
343 |
+
actual_end_time = end_time
|
344 |
+
|
345 |
+
# Calculate total duration including any gap
|
346 |
+
segment_duration = actual_end_time - start_time
|
347 |
+
|
348 |
+
print(f"\nProcessing segment {i + 1}/{total_segments}:")
|
349 |
+
print(f" Start time: {start_time}s")
|
350 |
+
print(f" Base end time: {end_time}s")
|
351 |
+
print(f" Actual end time: {actual_end_time}s")
|
352 |
+
print(f" Total duration: {segment_duration}s")
|
353 |
+
print(f" Text: '{current_segment['text']}'")
|
354 |
+
|
355 |
+
# Ensure the image index is within bounds
|
356 |
+
image_path = images[min(i, len(images) - 1)]
|
357 |
+
|
358 |
+
# Create an ImageClip for the current segment
|
359 |
+
image_clip = ImageClip(image_path)
|
360 |
+
|
361 |
+
# Resize and pad the image to fit a 16:9 aspect ratio
|
362 |
+
image_clip = image_clip.resize(height=frame_height).on_color(
|
363 |
+
size=(frame_width, frame_height),
|
364 |
+
color=(0, 0, 0), # Black background
|
365 |
+
pos="center" # Center the image
|
366 |
+
)
|
367 |
+
|
368 |
+
# Set the duration and start time for the clip
|
369 |
+
image_clip = image_clip.set_duration(segment_duration)
|
370 |
+
image_clip = image_clip.set_start(start_time) # Set the start time explicitly
|
371 |
+
|
372 |
+
video_clips.append(image_clip)
|
373 |
+
|
374 |
+
# Concatenate all the image clips to form the video
|
375 |
+
print("Concatenating video clips...")
|
376 |
+
video = concatenate_videoclips(video_clips, method="compose")
|
377 |
+
|
378 |
+
# Add the audio to the video
|
379 |
+
video = video.set_audio(audio)
|
380 |
+
|
381 |
+
# Save the video to a temporary file
|
382 |
+
temp_dir = tempfile.gettempdir()
|
383 |
+
video_path = os.path.join(temp_dir, "generated_video.mp4")
|
384 |
+
print(f"Writing video file to {video_path}...")
|
385 |
+
video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")
|
386 |
+
|
387 |
+
# Clean up the temporary audio file
|
388 |
+
os.remove(temp_audio_path.name)
|
389 |
+
print("Temporary audio file removed.")
|
390 |
+
|
391 |
+
return video_path
|
392 |
+
|
393 |
+
except Exception as e:
|
394 |
+
print(f"Error generating video: {e}")
|
395 |
+
return None
|
396 |
+
|
397 |
+
|
398 |
+
|
399 |
+
|
400 |
+
|
401 |
+
|
402 |
+
# Example usage:
|
403 |
+
if __name__ == "__main__":
|
404 |
+
short_story = """
|
405 |
+
In a quiet village, a young girl named Lily discovered a hidden garden.
|
406 |
+
Every flower in the garden glowed with a magical light, revealing secrets of the past.
|
407 |
+
Lily knew she had found something truly extraordinary.
|
408 |
+
"""
|
409 |
+
generate_audio(short_story, "Urdu", "Asad")
|
410 |
+
# scenes_response = get_scenes(short_story)
|
411 |
+
# scenes = scenes_response.get("scenes")
|
412 |
+
# print("total scenes: ", len(scenes))
|
413 |
+
# for scene in scenes:
|
414 |
+
# print("image prompts for this scene", len(scene.get("image_prompts")))
|
415 |
+
# print("\n\n")
|
416 |
+
# for scene_count,scene in enumerate(scenes):
|
417 |
+
# image_prompts = scene.get("image_prompts")
|
418 |
+
# for count, prompt in enumerate(image_prompts):
|
419 |
+
# generate_image(prompt=prompt, path=f"scene_{scene_count+1}_image_{count+1}.png")
|