Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -65,12 +65,6 @@ from diffusers.models import AutoencoderKLCogVideoX
|
|
65 |
from transformers import SiglipImageProcessor, SiglipVisionModel
|
66 |
from diffposetalk.diffposetalk import DiffPoseTalk
|
67 |
|
68 |
-
def cleanup_resources():
|
69 |
-
"""Clear CUDA cache and garbage collect"""
|
70 |
-
if torch.cuda.is_available():
|
71 |
-
torch.cuda.empty_cache()
|
72 |
-
gc.collect()
|
73 |
-
|
74 |
# Helper functions from the original script
|
75 |
def parse_video(driving_frames, max_frame_num, fps=25):
|
76 |
video_length = len(driving_frames)
|
@@ -180,8 +174,8 @@ def process_image_audio(image_path, audio_path, guidance_scale=3.0, steps=10, pr
|
|
180 |
final_output_path = temp_output_file.name
|
181 |
|
182 |
# Set seed
|
183 |
-
seed = 43
|
184 |
-
generator = torch.Generator(device="cuda").manual_seed(seed)
|
185 |
|
186 |
progress(0.2, desc="Processing image...")
|
187 |
# Load and process image
|
@@ -244,20 +238,20 @@ def process_image_audio(image_path, audio_path, guidance_scale=3.0, steps=10, pr
|
|
244 |
|
245 |
progress(0.6, desc="Generating animation (this may take a while)...")
|
246 |
# Generate video
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
out_samples = sample.frames[0]
|
262 |
|
263 |
out_samples = out_samples[2:] # Skip first two frames
|
@@ -290,7 +284,10 @@ def process_image_audio(image_path, audio_path, guidance_scale=3.0, steps=10, pr
|
|
290 |
comparison_with_audio = save_video_with_audio(comparison_path, audio_path, comparison_with_audio)
|
291 |
|
292 |
progress(1.0, desc="Done!")
|
293 |
-
|
|
|
|
|
|
|
294 |
return result_path, comparison_with_audio
|
295 |
|
296 |
# Create Gradio interface
|
|
|
65 |
from transformers import SiglipImageProcessor, SiglipVisionModel
|
66 |
from diffposetalk.diffposetalk import DiffPoseTalk
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# Helper functions from the original script
|
69 |
def parse_video(driving_frames, max_frame_num, fps=25):
|
70 |
video_length = len(driving_frames)
|
|
|
174 |
final_output_path = temp_output_file.name
|
175 |
|
176 |
# Set seed
|
177 |
+
# seed = 43
|
178 |
+
# generator = torch.Generator(device="cuda").manual_seed(seed)
|
179 |
|
180 |
progress(0.2, desc="Processing image...")
|
181 |
# Load and process image
|
|
|
238 |
|
239 |
progress(0.6, desc="Generating animation (this may take a while)...")
|
240 |
# Generate video
|
241 |
+
with torch.no_grad():
|
242 |
+
sample = pipe(
|
243 |
+
image=image,
|
244 |
+
image_face=image_face,
|
245 |
+
control_video=input_video,
|
246 |
+
prompt="",
|
247 |
+
negative_prompt="",
|
248 |
+
height=480,
|
249 |
+
width=720,
|
250 |
+
num_frames=49,
|
251 |
+
# generator=generator,
|
252 |
+
guidance_scale=guidance_scale,
|
253 |
+
num_inference_steps=steps,
|
254 |
+
)
|
255 |
out_samples = sample.frames[0]
|
256 |
|
257 |
out_samples = out_samples[2:] # Skip first two frames
|
|
|
284 |
comparison_with_audio = save_video_with_audio(comparison_path, audio_path, comparison_with_audio)
|
285 |
|
286 |
progress(1.0, desc="Done!")
|
287 |
+
|
288 |
+
torch.cuda.empty_cache()
|
289 |
+
gc.collect()
|
290 |
+
|
291 |
return result_path, comparison_with_audio
|
292 |
|
293 |
# Create Gradio interface
|