Spaces:
Running
Running
Update app2.py
Browse files
app2.py
CHANGED
@@ -9,101 +9,152 @@ import uuid
|
|
9 |
import tempfile
|
10 |
import shlex
|
11 |
import shutil
|
12 |
-
import logging
|
|
|
13 |
|
14 |
-
#
|
|
|
|
|
15 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
16 |
|
17 |
# Supported models configuration
|
18 |
MODELS = {
|
19 |
-
"
|
|
|
|
|
20 |
"base_url": "https://api.deepseek.com/v1",
|
21 |
"env_key": "DEEPSEEK_API_KEY",
|
|
|
22 |
},
|
23 |
"Qwen/Qwen2.5-Coder-32B-Instruct": {
|
24 |
-
"base_url": "https://api-inference.huggingface.co/v1/",
|
25 |
"env_key": "HF_TOKEN",
|
|
|
|
|
|
|
|
|
26 |
},
|
27 |
-
#
|
|
|
|
|
|
|
|
|
|
|
28 |
}
|
29 |
|
30 |
-
#
|
31 |
-
def get_first_available_key_config():
|
32 |
-
for model, config in MODELS.items():
|
33 |
-
if config["env_key"] in os.environ and os.environ[config["env_key"]]:
|
34 |
-
logging.info(f"Using API key for model: {model}")
|
35 |
-
return config
|
36 |
-
return None
|
37 |
-
|
38 |
-
# Initialize client with first available model configuration
|
39 |
-
initial_config = get_first_available_key_config()
|
40 |
-
if initial_config:
|
41 |
-
client = OpenAI(
|
42 |
-
base_url=initial_config["base_url"],
|
43 |
-
api_key=os.environ[initial_config["env_key"]],
|
44 |
-
)
|
45 |
-
initial_model_choice = next(iter(MODELS.keys())) # Keep track of which model config was used initially
|
46 |
-
else:
|
47 |
-
logging.warning("No API keys found in environment variables for configured models. API calls will fail.")
|
48 |
-
# Initialize with placeholder values or handle error as appropriate
|
49 |
-
client = None # Or raise an error, or use a default config if applicable
|
50 |
-
initial_model_choice = list(MODELS.keys())[0] # Default UI selection
|
51 |
-
|
52 |
allowed_medias = [
|
53 |
".png", ".jpg", ".webp", ".jpeg", ".tiff", ".bmp", ".gif", ".svg",
|
54 |
-
".mp3", ".wav", ".ogg", ".aac", ".flac",
|
55 |
".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".mpg", ".mpeg", ".m4v",
|
56 |
".3gp", ".3g2", ".3gpp",
|
57 |
]
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
def get_files_infos(files):
|
|
|
61 |
results = []
|
62 |
if not files:
|
63 |
return results
|
64 |
|
65 |
for file_obj in files:
|
66 |
file_path = Path(file_obj.name)
|
67 |
-
info = {"error": None
|
68 |
try:
|
69 |
info["size"] = os.path.getsize(file_path)
|
70 |
-
# Sanitize filename
|
71 |
-
|
72 |
-
|
73 |
-
info["
|
74 |
-
|
75 |
|
76 |
-
|
|
|
|
|
|
|
77 |
info["type"] = "video"
|
78 |
try:
|
79 |
-
|
|
|
|
|
|
|
80 |
info["duration"] = video.duration
|
81 |
-
info["dimensions"] = f"{video.size[0]}x{video.size[1]}"
|
82 |
if video.audio:
|
83 |
info["type"] = "video/audio"
|
84 |
-
info["audio_channels"] = video.audio.nchannels
|
85 |
-
video.close()
|
86 |
except UnicodeDecodeError as ude:
|
87 |
-
info["error"] = f"Metadata decoding error ({ude}).
|
88 |
-
logging.warning(f"UnicodeDecodeError processing video {info['
|
|
|
|
|
|
|
89 |
except Exception as e:
|
90 |
info["error"] = f"Error reading video metadata ({type(e).__name__})."
|
91 |
-
logging.warning(f"Error processing video {info['
|
92 |
|
93 |
-
|
|
|
94 |
info["type"] = "audio"
|
95 |
try:
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
100 |
except UnicodeDecodeError as ude:
|
101 |
-
info["error"] = f"Metadata decoding error ({ude}).
|
102 |
-
logging.warning(f"UnicodeDecodeError processing audio {info['
|
|
|
|
|
|
|
103 |
except Exception as e:
|
104 |
info["error"] = f"Error reading audio metadata ({type(e).__name__})."
|
105 |
-
logging.warning(f"Error processing audio {info['
|
106 |
|
|
|
107 |
elif file_extension in (".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".svg", ".webp"):
|
108 |
info["type"] = "image"
|
109 |
try:
|
@@ -111,21 +162,25 @@ def get_files_infos(files):
|
|
111 |
info["dimensions"] = f"{img.size[0]}x{img.size[1]}"
|
112 |
except Exception as e:
|
113 |
info["error"] = f"Error reading image metadata ({type(e).__name__})."
|
114 |
-
logging.warning(f"Error processing image {info['
|
115 |
|
116 |
else:
|
117 |
info["type"] = "unknown"
|
118 |
info["error"] = "Unsupported file type."
|
119 |
-
logging.warning(f"Unsupported file type: {info['
|
120 |
|
121 |
except OSError as ose:
|
122 |
info["error"] = f"File system error: {ose}"
|
123 |
logging.error(f"OSError accessing file {file_path}: {ose}", exc_info=True)
|
124 |
-
if "name" not in info: info["name"] =
|
|
|
|
|
|
|
|
|
125 |
except Exception as e:
|
126 |
info["error"] = f"Unexpected error processing file: {e}"
|
127 |
logging.error(f"Unexpected error processing file {file_path}: {e}", exc_info=True)
|
128 |
-
if "name" not in info: info["name"] =
|
129 |
|
130 |
results.append(info)
|
131 |
|
@@ -133,325 +188,386 @@ def get_files_infos(files):
|
|
133 |
|
134 |
|
135 |
def get_completion(prompt, files_info, top_p, temperature, model_choice):
|
136 |
-
|
|
|
137 |
|
138 |
if client is None:
|
139 |
-
|
|
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
files_info_string += "|------|------|------------|----------|----------------|--------|\n"
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
name = file_info.get("name", "N/A")
|
148 |
-
# Use original name or sanitized name for display in the table, decide which is better
|
149 |
-
display_name = file_info.get("original_name", name) # Prefer original name for user readability
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
file_type = file_info.get("type", "N/A")
|
152 |
dimensions = file_info.get("dimensions", "-")
|
153 |
-
|
154 |
-
|
|
|
|
|
155 |
status = "Error" if file_info.get("error") else "OK"
|
|
|
|
|
156 |
|
157 |
-
files_info_string += f"| {file_type} | {
|
158 |
if file_info.get("error"):
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
You are a
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
AVAILABLE ASSETS LIST:
|
196 |
|
197 |
{files_info_string}
|
198 |
|
199 |
-
OBJECTIVE
|
200 |
-
Make sure the final output file is named exactly "output.mp4".
|
201 |
|
202 |
-
|
203 |
-
"""
|
204 |
-
},
|
205 |
-
]
|
206 |
-
try:
|
207 |
-
# Print the complete prompt for debugging
|
208 |
-
logging.info("\n=== COMPLETE PROMPT ===\n")
|
209 |
-
for msg in messages:
|
210 |
-
logging.info(f"\n[{msg['role'].upper()}]:\n{msg['content']}")
|
211 |
-
logging.info("=====================\n")
|
212 |
-
|
213 |
-
if model_choice not in MODELS:
|
214 |
-
raise ValueError(f"Model {model_choice} is not supported")
|
215 |
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
raise gr.Error(f"API Key ({model_config['env_key']}) not found in environment variables for model {model_choice}.")
|
221 |
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
#
|
226 |
-
|
227 |
|
228 |
completion = client.chat.completions.create(
|
229 |
-
model=
|
230 |
messages=messages,
|
231 |
temperature=temperature,
|
232 |
top_p=top_p,
|
233 |
-
max_tokens=
|
234 |
)
|
235 |
-
content = completion.choices[0].message.content.strip()
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
logging.
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
return command
|
261 |
|
262 |
except Exception as e:
|
263 |
-
logging.error(f"
|
264 |
-
#
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
266 |
|
|
|
267 |
|
268 |
def update(
|
269 |
files,
|
270 |
prompt,
|
271 |
top_p=1,
|
272 |
temperature=1,
|
273 |
-
model_choice=
|
274 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
if not files:
|
276 |
-
raise gr.Error("Please upload at least one media file.")
|
277 |
if not prompt:
|
278 |
-
raise gr.Error("Please enter editing instructions (prompt).")
|
279 |
-
if
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
logging.info(f"API Client initialized/updated for model: {model_choice}")
|
292 |
-
|
|
|
293 |
raise gr.Error(f"Failed to initialize API client: {e}")
|
|
|
|
|
|
|
|
|
|
|
294 |
|
295 |
|
296 |
-
#
|
|
|
297 |
files_info = get_files_infos(files)
|
298 |
-
file_errors = [f"- {f.get('original_name',
|
|
|
299 |
if file_errors:
|
300 |
-
error_message = "Errors occurred while processing uploaded files:\n" + "\n".join(file_errors)
|
301 |
logging.error(error_message)
|
302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
-
#
|
305 |
for file_info in files_info:
|
306 |
-
if file_info
|
307 |
-
|
308 |
-
|
309 |
-
|
|
|
310 |
|
311 |
-
#
|
312 |
command_string = None
|
313 |
-
attempts = 0
|
314 |
-
max_attempts = 2 # Allow one retry
|
315 |
-
last_exception = None
|
316 |
-
|
317 |
-
while attempts < max_attempts:
|
318 |
-
logging.info(f"Attempt {attempts + 1} to generate FFMPEG command.")
|
319 |
-
try:
|
320 |
-
command_string = get_completion(
|
321 |
-
prompt, files_info, top_p, temperature, model_choice
|
322 |
-
)
|
323 |
-
logging.info(
|
324 |
-
f"Generated FFMPEG command string:\n{command_string}\n"
|
325 |
-
)
|
326 |
-
break # Success, exit loop
|
327 |
-
except Exception as e:
|
328 |
-
last_exception = e
|
329 |
-
logging.warning(f"Attempt {attempts + 1} failed: {e}")
|
330 |
-
attempts += 1
|
331 |
-
if attempts >= max_attempts:
|
332 |
-
logging.error("Max attempts reached. Failed to generate valid command.")
|
333 |
-
raise gr.Error(f"Failed to generate FFMPEG command after {max_attempts} attempts. Last error: {last_exception}")
|
334 |
-
|
335 |
-
# 4. Prepare Temporary Directory and Files
|
336 |
-
temp_dir_obj = tempfile.TemporaryDirectory()
|
337 |
-
temp_dir = temp_dir_obj.name
|
338 |
-
logging.info(f"Created temporary directory: {temp_dir}")
|
339 |
try:
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
if not args or args[0].lower() != "ffmpeg":
|
359 |
-
raise gr.Error(f"Generated command does not start with 'ffmpeg'. Command: {command_string}")
|
360 |
-
|
361 |
-
# IMPORTANT: Check and remove the placeholder 'output.mp4' if it's the last argument
|
362 |
-
if args[-1] == "output.mp4":
|
363 |
-
logging.info("Removing placeholder 'output.mp4' from the end of the command.")
|
364 |
-
args.pop()
|
365 |
-
elif "output.mp4" in args:
|
366 |
-
logging.warning("Placeholder 'output.mp4' found but not at the end of the command. Execution might fail.")
|
367 |
-
# Decide if you want to raise an error here or let ffmpeg handle it
|
368 |
-
|
369 |
-
|
370 |
-
# Define the actual output path
|
371 |
-
output_file_name = f"output_{uuid.uuid4()}.mp4"
|
372 |
-
output_file_path = str(Path(temp_dir) / output_file_name)
|
373 |
-
|
374 |
-
# Dry Run (optional but recommended)
|
375 |
-
# Note: Dry run might fail for complex commands even if they are valid for execution
|
376 |
-
# Consider making dry run optional or improving its robustness if needed
|
377 |
-
# dry_run_args = args + ["-f", "null", "-"]
|
378 |
-
# logging.info(f"Performing dry run: {' '.join(dry_run_args)}")
|
379 |
-
# ffmpg_dry_run = subprocess.run(
|
380 |
-
# dry_run_args,
|
381 |
-
# stderr=subprocess.PIPE,
|
382 |
-
# stdout=subprocess.PIPE, # Capture stdout too
|
383 |
-
# text=True,
|
384 |
-
# encoding='utf-8', errors='replace', # Handle potential weird output
|
385 |
-
# cwd=temp_dir,
|
386 |
-
# timeout=30 # Add a timeout
|
387 |
-
# )
|
388 |
-
# if ffmpg_dry_run.returncode != 0:
|
389 |
-
# error_output = ffmpg_dry_run.stderr or ffmpg_dry_run.stdout
|
390 |
-
# logging.error(f"FFMPEG dry run failed. Return code: {ffmpg_dry_run.returncode}\nOutput:\n{error_output}")
|
391 |
-
# raise gr.Error(f"Generated FFMPEG command seems invalid (Dry Run Failed). Please check the command or try different instructions.\nError: {error_output[:500]}...") # Show partial error
|
392 |
-
|
393 |
-
|
394 |
-
# Final Execution
|
395 |
-
final_command = args + ["-y", output_file_path] # Add overwrite flag and final output path
|
396 |
-
logging.info(f"Executing FFMPEG command: ffmpeg {' '.join(final_command[1:])}")
|
397 |
|
398 |
try:
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
|
440 |
-
|
441 |
-
|
|
|
|
|
|
|
442 |
|
443 |
|
444 |
-
# --- Gradio Interface ---
|
445 |
-
with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.themes.Soft()) as demo:
|
446 |
gr.Markdown(
|
447 |
"""
|
448 |
-
#
|
449 |
|
450 |
-
Welcome to the AI Video Editor! This
|
|
|
451 |
|
452 |
-
**No complex software
|
453 |
|
454 |
-
**Get started
|
|
|
455 |
""",
|
456 |
elem_id="header",
|
457 |
)
|
@@ -459,35 +575,28 @@ with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.th
|
|
459 |
with gr.Accordion("📋 Usage Instructions & Examples", open=False):
|
460 |
gr.Markdown(
|
461 |
"""
|
462 |
-
### How to Use
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
* **Top-p & Temperature**: Fine-tune the AI's creativity and randomness. Lower temperature (e.g., 0.1) leads to more predictable results, higher values increase randomness. Top-p controls the diversity of the AI's choices. Default values are usually good starting points.
|
469 |
-
4. **Generate**: Click the **"Run"** button. The AI will generate an FFMPEG command, which will then be executed to produce your video.
|
470 |
-
5. **Review**: The resulting video will appear in the "Generated Video" player. The exact FFMPEG command used will be shown below it.
|
471 |
|
472 |
### Example Instructions
|
473 |
-
|
474 |
-
* `
|
475 |
-
* `
|
476 |
-
* `
|
477 |
-
* `
|
478 |
-
* `
|
479 |
-
* `
|
480 |
-
* `
|
481 |
-
|
482 |
-
|
483 |
-
*
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
* **Be Specific**: Instead of "make it shorter," say "remove the first 5 seconds."
|
488 |
-
* **Use Filenames**: Refer to files by their names (e.g., `Combine intro.mp4 and main.mp4`). The AI uses names with spaces replaced by underscores.
|
489 |
-
* **Specify Details**: For text, mention font size, color, position (e.g., `top_left`, `center`, `bottom_right`). For effects, specify parameters (e.g., `fade duration of 1 second`).
|
490 |
-
* **Keep it Simple**: Aim for one primary goal per instruction. Complex multi-step edits might require breaking down the task or might exceed the AI's ability to generate a single, simple command.
|
491 |
"""
|
492 |
)
|
493 |
|
@@ -495,88 +604,95 @@ with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.th
|
|
495 |
with gr.Column(scale=1):
|
496 |
user_files = gr.File(
|
497 |
file_count="multiple",
|
498 |
-
label="Upload Media Files",
|
499 |
file_types=allowed_medias,
|
500 |
-
# Consider adding interactive=True if needed, default is True
|
501 |
)
|
502 |
user_prompt = gr.Textbox(
|
503 |
placeholder="e.g., 'Combine video1.mp4 and video2.mp4'",
|
504 |
-
label="Instructions / Editing Objective",
|
505 |
lines=3,
|
506 |
)
|
507 |
-
with gr.Accordion("Advanced Parameters", open=False):
|
508 |
-
|
|
|
|
|
509 |
choices=list(MODELS.keys()),
|
510 |
-
value=
|
511 |
-
label="Select AI Model",
|
512 |
-
|
513 |
-
|
514 |
minimum=0.0, maximum=1.0, value=0.7, step=0.05,
|
515 |
-
label="Top-p (
|
516 |
-
|
517 |
-
|
518 |
-
minimum=0.0, maximum=2.0, value=0.
|
519 |
-
label="Temperature (
|
520 |
-
|
521 |
-
|
522 |
|
523 |
with gr.Column(scale=1):
|
524 |
-
|
525 |
-
label="Generated Video Output",
|
526 |
-
interactive=False,
|
527 |
include_audio=True,
|
528 |
)
|
529 |
-
|
530 |
|
531 |
-
#
|
532 |
-
|
533 |
fn=update,
|
534 |
-
inputs=[user_files, user_prompt,
|
535 |
-
outputs=[
|
536 |
-
api_name="generate_edit"
|
537 |
)
|
538 |
|
539 |
-
# Examples
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
],
|
547 |
-
[
|
548 |
-
["./examples/Jiangnan_Rain.mp4"],
|
549 |
-
"Cut the video to extract only the middle 30 seconds (starting at 00:30 and ending at 01:00).",
|
550 |
-
0.7, 0.1, list(MODELS.keys())[min(1, len(MODELS)-1)], # Use second model if available
|
551 |
-
],
|
552 |
-
[
|
553 |
-
["./examples/Lotus_Pond01.mp4"], # Make sure this path exists or adjust
|
554 |
-
"Convert the video to black and white (grayscale) while maintaining the original audio.",
|
555 |
-
0.7, 0.1, list(MODELS.keys())[0],
|
556 |
-
],
|
557 |
-
[
|
558 |
-
["./examples/Lotus_Pond01.mp4"],
|
559 |
-
"Create a slow-motion version of the video by reducing the speed to 0.5x.",
|
560 |
-
0.7, 0.1, list(MODELS.keys())[min(1, len(MODELS)-1)],
|
561 |
-
],
|
562 |
-
[
|
563 |
-
["./examples/image1.jpg", "./examples/image2.png", "./examples/background.mp3"], # Example with images and audio
|
564 |
-
"Create a video slideshow from image1.jpg and image2.png, showing each image for 4 seconds. Use background.mp3 as the audio track.",
|
565 |
-
0.7, 0.1, list(MODELS.keys())[0],
|
566 |
-
],
|
567 |
],
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
575 |
|
576 |
-
#
|
577 |
|
578 |
# --- Launch the App ---
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
|
|
|
|
|
9 |
import tempfile
|
10 |
import shlex
|
11 |
import shutil
|
12 |
+
import logging
|
13 |
+
import traceback # For detailed error logging
|
14 |
|
15 |
+
# --- Configuration ---
|
16 |
+
|
17 |
+
# Configure logging
|
18 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
19 |
|
20 |
# Supported models configuration
|
21 |
MODELS = {
|
22 |
+
# Format: "Display Name": {"base_url": "...", "env_key": "...", "model_name_for_api": "..."}
|
23 |
+
# Add your models here
|
24 |
+
"deepseek-ai/DeepSeek-V3": {
|
25 |
"base_url": "https://api.deepseek.com/v1",
|
26 |
"env_key": "DEEPSEEK_API_KEY",
|
27 |
+
"model_name_for_api": "deepseek-chat", # Use the specific model name required by DeepSeek API
|
28 |
},
|
29 |
"Qwen/Qwen2.5-Coder-32B-Instruct": {
|
30 |
+
"base_url": "https://api-inference.huggingface.co/v1/", # Check if correct for chat completions
|
31 |
"env_key": "HF_TOKEN",
|
32 |
+
# Note: HF Inference API might use a different endpoint or format for chat completions.
|
33 |
+
# This base URL might be for text-generation. Adjust if needed.
|
34 |
+
# Also, the model name might need /chat/completions appended or similar.
|
35 |
+
"model_name_for_api": "Qwen/Qwen2.5-Coder-32B-Instruct", # Usually the model ID on HF
|
36 |
},
|
37 |
+
# Example using a local server (like LM Studio, Ollama)
|
38 |
+
# "Local Model (via Ollama)": {
|
39 |
+
# "base_url": "http://localhost:11434/v1", # Ollama's OpenAI-compatible endpoint
|
40 |
+
# "env_key": "OLLAMA_API_KEY", # Often not needed, use "NONE" or similar if no key
|
41 |
+
# "model_name_for_api": "qwen:14b", # The specific model name served by Ollama
|
42 |
+
# },
|
43 |
}
|
44 |
|
45 |
+
# Allowed media file extensions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
allowed_medias = [
|
47 |
".png", ".jpg", ".webp", ".jpeg", ".tiff", ".bmp", ".gif", ".svg",
|
48 |
+
".mp3", ".wav", ".ogg", ".aac", ".flac", ".m4a",
|
49 |
".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".mpg", ".mpeg", ".m4v",
|
50 |
".3gp", ".3g2", ".3gpp",
|
51 |
]
|
52 |
|
53 |
+
# --- Global Variables ---
|
54 |
+
client = None
|
55 |
+
initial_model_choice = None
|
56 |
+
|
57 |
+
# --- Helper Functions ---
|
58 |
+
|
59 |
+
def get_first_available_key_config():
|
60 |
+
"""Finds the first model config with a valid API key in environment variables."""
|
61 |
+
for model_display_name, config in MODELS.items():
|
62 |
+
api_key = os.environ.get(config["env_key"])
|
63 |
+
# Treat empty string "" as missing key, handle potential "NONE" placeholder
|
64 |
+
if api_key and api_key.upper() != "NONE":
|
65 |
+
logging.info(f"Found valid API key for model: {model_display_name}")
|
66 |
+
return model_display_name, config
|
67 |
+
logging.warning("No valid API keys found in environment variables for any configured models.")
|
68 |
+
return None, None
|
69 |
+
|
70 |
+
def initialize_client():
|
71 |
+
"""Initializes the OpenAI client with the first available config."""
|
72 |
+
global client, initial_model_choice
|
73 |
+
initial_model_choice, config = get_first_available_key_config()
|
74 |
+
if config:
|
75 |
+
try:
|
76 |
+
api_key = os.environ.get(config["env_key"])
|
77 |
+
# Handle case where key is explicitly set to "NONE" or similar for keyless local models
|
78 |
+
effective_api_key = api_key if api_key and api_key.upper() != "NONE" else "required-but-not-used" # Placeholder for local models if needed
|
79 |
+
|
80 |
+
client = OpenAI(
|
81 |
+
base_url=config["base_url"],
|
82 |
+
api_key=effective_api_key,
|
83 |
+
)
|
84 |
+
logging.info(f"OpenAI client initialized for model: {initial_model_choice} using base_url: {config['base_url']}")
|
85 |
+
except Exception as e:
|
86 |
+
logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
|
87 |
+
client = None
|
88 |
+
initial_model_choice = list(MODELS.keys())[0] # Fallback UI selection
|
89 |
+
else:
|
90 |
+
client = None
|
91 |
+
# Set a default model choice for the UI even if client fails
|
92 |
+
initial_model_choice = list(MODELS.keys())[0] if MODELS else None
|
93 |
|
94 |
def get_files_infos(files):
|
95 |
+
"""Extracts metadata from uploaded files, handling potential errors."""
|
96 |
results = []
|
97 |
if not files:
|
98 |
return results
|
99 |
|
100 |
for file_obj in files:
|
101 |
file_path = Path(file_obj.name)
|
102 |
+
info = {"error": None, "original_name": file_path.name}
|
103 |
try:
|
104 |
info["size"] = os.path.getsize(file_path)
|
105 |
+
# Sanitize filename (used in ffmpeg command)
|
106 |
+
info["name"] = file_path.name.replace(" ", "_")
|
107 |
+
# Validate sanitized name (basic check)
|
108 |
+
if not info["name"] or "/" in info["name"] or "\\" in info["name"]:
|
109 |
+
raise ValueError(f"Invalid sanitized filename generated: '{info['name']}'")
|
110 |
|
111 |
+
file_extension = file_path.suffix.lower()
|
112 |
+
|
113 |
+
# Video Processing
|
114 |
+
if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".mpg", ".mpeg", ".m4v", ".3gp", ".3g2", ".3gpp"):
|
115 |
info["type"] = "video"
|
116 |
try:
|
117 |
+
# Ensure ffmpeg is found by moviepy, handle potential issues
|
118 |
+
if not shutil.which("ffmpeg"):
|
119 |
+
raise FileNotFoundError("ffmpeg command not found in PATH. MoviePy cannot process video/audio.")
|
120 |
+
video = VideoFileClip(str(file_path), verbose=False)
|
121 |
info["duration"] = video.duration
|
122 |
+
info["dimensions"] = f"{video.size[0]}x{video.size[1]}" if video.size else "N/A"
|
123 |
if video.audio:
|
124 |
info["type"] = "video/audio"
|
125 |
+
info["audio_channels"] = video.audio.nchannels if hasattr(video.audio, 'nchannels') else "N/A"
|
126 |
+
video.close() # Release file handle
|
127 |
except UnicodeDecodeError as ude:
|
128 |
+
info["error"] = f"Metadata decoding error ({ude}). Duration/dimensions might be missing."
|
129 |
+
logging.warning(f"UnicodeDecodeError processing video '{info['original_name']}': {ude}")
|
130 |
+
except FileNotFoundError as fnf:
|
131 |
+
info["error"] = str(fnf)
|
132 |
+
logging.error(f"FFmpeg not found: {fnf}")
|
133 |
except Exception as e:
|
134 |
info["error"] = f"Error reading video metadata ({type(e).__name__})."
|
135 |
+
logging.warning(f"Error processing video '{info['original_name']}': {e}", exc_info=False) # Log less verbose traceback for common errors
|
136 |
|
137 |
+
# Audio Processing
|
138 |
+
elif file_extension in (".mp3", ".wav", ".ogg", ".aac", ".flac", ".m4a"):
|
139 |
info["type"] = "audio"
|
140 |
try:
|
141 |
+
if not shutil.which("ffmpeg"):
|
142 |
+
raise FileNotFoundError("ffmpeg command not found in PATH. MoviePy cannot process video/audio.")
|
143 |
+
audio = AudioFileClip(str(file_path), verbose=False)
|
144 |
+
info["duration"] = audio.duration
|
145 |
+
info["audio_channels"] = audio.nchannels if hasattr(audio, 'nchannels') else "N/A"
|
146 |
+
audio.close()
|
147 |
except UnicodeDecodeError as ude:
|
148 |
+
info["error"] = f"Metadata decoding error ({ude}). Duration/channels might be missing."
|
149 |
+
logging.warning(f"UnicodeDecodeError processing audio '{info['original_name']}': {ude}")
|
150 |
+
except FileNotFoundError as fnf:
|
151 |
+
info["error"] = str(fnf)
|
152 |
+
logging.error(f"FFmpeg not found: {fnf}")
|
153 |
except Exception as e:
|
154 |
info["error"] = f"Error reading audio metadata ({type(e).__name__})."
|
155 |
+
logging.warning(f"Error processing audio '{info['original_name']}': {e}", exc_info=False)
|
156 |
|
157 |
+
# Image Processing
|
158 |
elif file_extension in (".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".svg", ".webp"):
|
159 |
info["type"] = "image"
|
160 |
try:
|
|
|
162 |
info["dimensions"] = f"{img.size[0]}x{img.size[1]}"
|
163 |
except Exception as e:
|
164 |
info["error"] = f"Error reading image metadata ({type(e).__name__})."
|
165 |
+
logging.warning(f"Error processing image '{info['original_name']}': {e}", exc_info=False)
|
166 |
|
167 |
else:
|
168 |
info["type"] = "unknown"
|
169 |
info["error"] = "Unsupported file type."
|
170 |
+
logging.warning(f"Unsupported file type: {info['original_name']}")
|
171 |
|
172 |
except OSError as ose:
|
173 |
info["error"] = f"File system error: {ose}"
|
174 |
logging.error(f"OSError accessing file {file_path}: {ose}", exc_info=True)
|
175 |
+
if "name" not in info: info["name"] = info["original_name"].replace(" ", "_") # Ensure sanitized name exists
|
176 |
+
except ValueError as ve: # Catch invalid sanitized name error
|
177 |
+
info["error"] = str(ve)
|
178 |
+
logging.error(f"Filename sanitization error for {info['original_name']}: {ve}")
|
179 |
+
if "name" not in info: info["name"] = f"invalid_name_{uuid.uuid4()}" # Provide a fallback name
|
180 |
except Exception as e:
|
181 |
info["error"] = f"Unexpected error processing file: {e}"
|
182 |
logging.error(f"Unexpected error processing file {file_path}: {e}", exc_info=True)
|
183 |
+
if "name" not in info: info["name"] = info["original_name"].replace(" ", "_")
|
184 |
|
185 |
results.append(info)
|
186 |
|
|
|
188 |
|
189 |
|
190 |
def get_completion(prompt, files_info, top_p, temperature, model_choice):
|
191 |
+
"""Generates the FFMPEG command using the selected AI model."""
|
192 |
+
global client # Ensure we use the potentially updated client
|
193 |
|
194 |
if client is None:
|
195 |
+
# This should ideally be caught earlier, but double-check
|
196 |
+
raise gr.Error("API Client not initialized. Cannot contact AI.")
|
197 |
|
198 |
+
if model_choice not in MODELS:
|
199 |
+
raise ValueError(f"Model '{model_choice}' is not found in configuration.")
|
|
|
200 |
|
201 |
+
model_config = MODELS[model_choice]
|
202 |
+
model_name_for_api = model_config["model_name_for_api"]
|
|
|
|
|
|
|
203 |
|
204 |
+
# --- Create files info table (Markdown for the AI) ---
|
205 |
+
files_info_string = "| Type | Name (for command) | Dimensions | Duration (s) | Audio Channels | Status |\n"
|
206 |
+
files_info_string += "|------|--------------------|------------|--------------|----------------|--------|\n"
|
207 |
+
|
208 |
+
valid_files_count = 0
|
209 |
+
for file_info in files_info:
|
210 |
+
name_for_command = file_info.get("name", "N/A") # Use sanitized name
|
211 |
file_type = file_info.get("type", "N/A")
|
212 |
dimensions = file_info.get("dimensions", "-")
|
213 |
+
duration_val = file_info.get('duration')
|
214 |
+
duration_str = f"{duration_val:.2f}" if duration_val is not None else "-"
|
215 |
+
audio_ch_val = file_info.get('audio_channels')
|
216 |
+
audio_ch_str = str(audio_ch_val) if audio_ch_val is not None else "-"
|
217 |
status = "Error" if file_info.get("error") else "OK"
|
218 |
+
if not file_info.get("error"):
|
219 |
+
valid_files_count += 1
|
220 |
|
221 |
+
files_info_string += f"| {file_type} | `{name_for_command}` | {dimensions} | {duration_str} | {audio_ch_str} | {status} |\n"
|
222 |
if file_info.get("error"):
|
223 |
+
# Provide error details clearly
|
224 |
+
files_info_string += f"| `Error Details` | `{file_info['error'][:100]}` | - | - | - | - |\n" # Truncate long errors
|
225 |
+
|
226 |
+
if valid_files_count == 0:
|
227 |
+
raise gr.Error("No valid media files could be processed. Please check the file formats or errors.")
|
228 |
+
|
229 |
+
# --- Construct Messages for the AI ---
|
230 |
+
system_prompt = """You are a highly skilled FFMPEG expert simulating a command-line interface.
|
231 |
+
Given a list of media assets and a user's objective, generate the SIMPLEST POSSIBLE, SINGLE ffmpeg command to achieve the goal.
|
232 |
+
|
233 |
+
**Input Files:** Use the filenames provided in the 'Name (for command)' column of the asset list. These names have spaces replaced with underscores.
|
234 |
+
**Output File:** The final output MUST be named exactly `output.mp4`.
|
235 |
+
**Output Format:** The final output MUST be a video/mp4 container.
|
236 |
+
|
237 |
+
**Key Requirements:**
|
238 |
+
1. **Single Command:** Output ONLY the ffmpeg command, on a single line. No explanations, no comments, no introductory text, no code blocks (like ```bash ... ```).
|
239 |
+
2. **Simplicity:** Use the minimum required options. Avoid `-filter_complex` unless absolutely necessary. Prefer direct mapping, simple filters (`-vf`, `-af`), concatenation (`concat` demuxer), etc.
|
240 |
+
3. **Correctness:** Ensure options, filter syntax, and stream mapping are correct.
|
241 |
+
4. **Input Names:** Strictly use the provided sanitized input filenames (e.g., `My_Video.mp4`).
|
242 |
+
5. **Output Name:** End the command with `-y output.mp4` (the `-y` allows overwriting).
|
243 |
+
6. **Handle Errors:** If an asset has an 'Error' status, try to work around it if possible (e.g., ignore a faulty audio stream if only video is needed), or generate a command that likely fails gracefully if the task is impossible without that asset. Do NOT output error messages yourself, just the command.
|
244 |
+
7. **Specific Tasks:**
|
245 |
+
* *Waveform:* If asked for waveform, use `showwaves` filter (e.g., `"[0:a]showwaves=s=1280x100:mode=line,format=pix_fmts=yuv420p[v]"`), map video and audio (`-map "[v]" -map 0:a?`), and consider making audio mono (`-ac 1`) unless stereo is requested. Use video dimensions if provided, otherwise default to something reasonable like 1280x100.
|
246 |
+
* *Image Sequence:* Use `-framerate` and pattern (`img%03d.png`) if applicable. For single images, use `-loop 1 -t duration`.
|
247 |
+
* *Text Overlay:* Use `drawtext` filter. Get position (e.g., `x=(w-text_w)/2:y=h-th-10`), font, size, color from user prompt if possible, otherwise use defaults.
|
248 |
+
* *Concatenation:* Prefer the `concat` demuxer (requires a temporary file list) over the `concat` filter if possible for simple cases without re-encoding. However, since you MUST output a single command, you might need to use the filter (`[0:v][1:v]concat=n=2:v=1[outv]`) if creating a temp file list isn't feasible within the single command constraint. Prioritize simplicity.
|
249 |
+
|
250 |
+
**Example Output:**
|
251 |
+
ffmpeg -i input_video.mp4 -vf "scale=1280:720" -c:a copy -y output.mp4
|
252 |
+
|
253 |
+
**DO NOT include ```bash or ``` anywhere in your response.** Just the raw command.
|
254 |
+
"""
|
255 |
+
user_message_content = f"""Generate the single-line FFMPEG command based on the assets and objective.
|
256 |
+
|
257 |
+
**AVAILABLE ASSETS:**
|
|
|
|
|
258 |
|
259 |
{files_info_string}
|
260 |
|
261 |
+
**OBJECTIVE:** {prompt}
|
|
|
262 |
|
263 |
+
**FFMPEG Command:**
|
264 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
+
messages = [
|
267 |
+
{"role": "system", "content": system_prompt},
|
268 |
+
{"role": "user", "content": user_message_content},
|
269 |
+
]
|
|
|
270 |
|
271 |
+
try:
|
272 |
+
logging.info(f"Sending request to AI model: {model_name_for_api} at {client.base_url}")
|
273 |
+
# Optional: Log the prompt itself (can be very long)
|
274 |
+
# logging.debug(f"System Prompt:\n{system_prompt}")
|
275 |
+
# logging.debug(f"User Message:\n{user_message_content}")
|
276 |
|
277 |
completion = client.chat.completions.create(
|
278 |
+
model=model_name_for_api,
|
279 |
messages=messages,
|
280 |
temperature=temperature,
|
281 |
top_p=top_p,
|
282 |
+
max_tokens=1024, # Adjust token limit as needed
|
283 |
)
|
284 |
+
content = completion.choices[0].message.content.strip()
|
285 |
+
|
286 |
+
logging.info(f"AI Raw Response: '{content}'")
|
287 |
+
|
288 |
+
# --- Command Validation and Cleaning ---
|
289 |
+
# Remove potential markdown code blocks manually if AI didn't follow instructions
|
290 |
+
if content.startswith("```") and content.endswith("```"):
|
291 |
+
content = re.sub(r"^```(?:bash|sh)?\s*", "", content)
|
292 |
+
content = re.sub(r"\s*```$", "", content)
|
293 |
+
content = content.strip()
|
294 |
+
logging.warning("AI included code blocks despite instructions, attempting cleanup.")
|
295 |
+
|
296 |
+
# Remove any leading text before "ffmpeg" if necessary
|
297 |
+
ffmpeg_index = content.lower().find("ffmpeg ")
|
298 |
+
if ffmpeg_index > 0:
|
299 |
+
logging.warning(f"AI included leading text, stripping: '{content[:ffmpeg_index]}'")
|
300 |
+
content = content[ffmpeg_index:]
|
301 |
+
elif ffmpeg_index == -1 and not content.lower().startswith("ffmpeg"):
|
302 |
+
logging.error(f"AI response does not contain 'ffmpeg': '{content}'")
|
303 |
+
raise ValueError("AI did not generate a valid ffmpeg command.")
|
304 |
+
|
305 |
+
# Ensure it ends with the expected output file pattern (flexible space before -y)
|
306 |
+
if not content.rstrip().endswith("-y output.mp4"):
|
307 |
+
logging.warning("AI response doesn't end with '-y output.mp4'. Appending it.")
|
308 |
+
# Append '-y output.mp4' if missing, trying to be robust
|
309 |
+
if content.rstrip().endswith("output.mp4"):
|
310 |
+
content = content.rstrip() + " -y output.mp4" # Add -y if only output.mp4 is there
|
311 |
+
elif not " output.mp4" in content: # Avoid adding if output.mp4 is elsewhere
|
312 |
+
content = content.rstrip() + " -y output.mp4"
|
313 |
+
|
314 |
+
|
315 |
+
# Remove potential extra newlines
|
316 |
+
command = content.replace('\n', ' ').replace('\r', '').strip()
|
317 |
+
|
318 |
+
if not command:
|
319 |
+
raise ValueError("AI generated an empty command string.")
|
320 |
+
|
321 |
+
logging.info(f"Cleaned AI Command: '{command}'")
|
322 |
return command
|
323 |
|
324 |
except Exception as e:
|
325 |
+
logging.error(f"Error during AI completion or processing: {e}", exc_info=True)
|
326 |
+
# Try to give a more specific error to the user
|
327 |
+
if "authentication" in str(e).lower():
|
328 |
+
raise gr.Error(f"AI API Authentication Error. Check your API key ({model_config['env_key']}). Error: {e}")
|
329 |
+
elif "rate limit" in str(e).lower():
|
330 |
+
raise gr.Error(f"AI API Rate Limit Exceeded. Please try again later. Error: {e}")
|
331 |
+
else:
|
332 |
+
raise gr.Error(f"Failed to get command from AI. Error: {e}")
|
333 |
|
334 |
+
# --- Main Gradio Update Function ---
|
335 |
|
336 |
def update(
|
337 |
files,
|
338 |
prompt,
|
339 |
top_p=1,
|
340 |
temperature=1,
|
341 |
+
model_choice=None, # Default to None, will use initial_model_choice
|
342 |
):
|
343 |
+
"""Handles the main logic: file processing, AI call, FFMPEG execution."""
|
344 |
+
# *** Fix: Declare global client at the beginning ***
|
345 |
+
global client
|
346 |
+
|
347 |
+
# Use initial choice if none provided (e.g., from direct call)
|
348 |
+
if model_choice is None:
|
349 |
+
model_choice = initial_model_choice
|
350 |
+
|
351 |
+
# --- Input Validations ---
|
352 |
if not files:
|
353 |
+
raise gr.Error("❌ Please upload at least one media file.")
|
354 |
if not prompt:
|
355 |
+
raise gr.Error("📝 Please enter editing instructions (prompt).")
|
356 |
+
if not model_choice or model_choice not in MODELS:
|
357 |
+
raise gr.Error(f"❓ Invalid model selected: {model_choice}. Please choose from the list.")
|
358 |
+
|
359 |
+
# --- Check FFMPEG Availability ---
|
360 |
+
if not shutil.which("ffmpeg"):
|
361 |
+
error_msg = "❌ FFMPEG command not found in system PATH. This application requires FFMPEG to be installed and accessible."
|
362 |
+
logging.error(error_msg)
|
363 |
+
raise gr.Error(error_msg)
|
364 |
+
|
365 |
+
# --- Check and potentially update API client ---
|
366 |
+
model_config = MODELS[model_choice]
|
367 |
+
api_key_env_var = model_config["env_key"]
|
368 |
+
api_key = os.environ.get(api_key_env_var)
|
369 |
+
effective_api_key = api_key if api_key and api_key.upper() != "NONE" else "required-but-not-used"
|
370 |
+
|
371 |
+
# Check if key is missing (and not intentionally "NONE")
|
372 |
+
if not api_key and effective_api_key != "required-but-not-used":
|
373 |
+
raise gr.Error(f"🔑 API Key ({api_key_env_var}) for the selected model '{model_choice}' is missing. Please set it as an environment variable.")
|
374 |
+
|
375 |
+
# Initialize or update client if needed
|
376 |
+
if client is None:
|
377 |
+
logging.warning(f"Client was None, attempting re-initialization for model: {model_choice}")
|
378 |
+
try:
|
379 |
+
client = OpenAI(base_url=model_config["base_url"], api_key=effective_api_key)
|
380 |
logging.info(f"API Client initialized/updated for model: {model_choice}")
|
381 |
+
except Exception as e:
|
382 |
+
logging.error(f"Failed to initialize API client: {e}", exc_info=True)
|
383 |
raise gr.Error(f"Failed to initialize API client: {e}")
|
384 |
+
# If client exists, check if base_url or key needs update for the selected model
|
385 |
+
elif client.base_url != model_config["base_url"] or client.api_key != effective_api_key:
|
386 |
+
logging.info(f"Updating API client configuration for selected model: {model_choice}")
|
387 |
+
client.base_url = model_config["base_url"]
|
388 |
+
client.api_key = effective_api_key
|
389 |
|
390 |
|
391 |
+
# --- Get File Infos and Check for Errors ---
|
392 |
+
logging.info("Processing uploaded files...")
|
393 |
files_info = get_files_infos(files)
|
394 |
+
file_errors = [f"- '{f.get('original_name', 'Unknown file')}': {f['error']}"
|
395 |
+
for f in files_info if f.get("error")]
|
396 |
if file_errors:
|
397 |
+
error_message = "⚠️ Errors occurred while processing uploaded files:\n" + "\n".join(file_errors)
|
398 |
logging.error(error_message)
|
399 |
+
# Allow proceeding if *some* files are okay, but warn the user.
|
400 |
+
# Let the AI decide how to handle the errored files based on the prompt.
|
401 |
+
# If *all* files have errors, then raise the error.
|
402 |
+
if len(file_errors) == len(files_info):
|
403 |
+
raise gr.Error(error_message + "\n\nCannot proceed as no files could be read.")
|
404 |
+
else:
|
405 |
+
gr.Warning(error_message + "\n\nAttempting to proceed with valid files. The AI will be informed about the errors.")
|
406 |
+
|
407 |
|
408 |
+
# --- Validate File Sizes and Durations (Optional limits) ---
|
409 |
for file_info in files_info:
|
410 |
+
if not file_info.get("error"): # Only check valid files
|
411 |
+
if "size" in file_info and file_info["size"] > 1024 * 1024 * 1024: # 150MB limit
|
412 |
+
raise gr.Error(f"File '{file_info.get('original_name')}' ({file_info['size'] / (1024*1024):.1f}MB) exceeds the 150MB size limit.")
|
413 |
+
if file_info.get("type", "").startswith("video") and "duration" in file_info and file_info["duration"] > 300: # 5 minute limit for videos
|
414 |
+
raise gr.Error(f"Video '{file_info.get('original_name')}' ({file_info['duration']:.0f}s) exceeds the 5-minute duration limit.")
|
415 |
|
416 |
+
# --- Get FFMPEG Command from AI ---
|
417 |
command_string = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
try:
|
419 |
+
logging.info(f"Getting FFMPEG command from AI model: {model_choice}")
|
420 |
+
command_string = get_completion(
|
421 |
+
prompt, files_info, top_p, temperature, model_choice
|
422 |
+
)
|
423 |
+
except gr.Error as e:
|
424 |
+
raise e # Propagate Gradio errors directly
|
425 |
+
except Exception as e:
|
426 |
+
logging.error(f"Failed to get command from AI: {e}", exc_info=True)
|
427 |
+
raise gr.Error(f"Failed to get or process command from AI. Error: {e}")
|
428 |
+
|
429 |
+
if not command_string:
|
430 |
+
raise gr.Error("AI returned an empty command. Please try again or rephrase.")
|
431 |
+
|
432 |
+
# --- Prepare Temporary Directory and Execute FFMPEG ---
|
433 |
+
# Using 'with' ensures cleanup even if errors occur
|
434 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
435 |
+
logging.info(f"Created temporary directory: {temp_dir}")
|
436 |
+
final_output_location = None # Path to the final video outside temp dir
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
|
438 |
try:
|
439 |
+
# Copy necessary files to temp dir using sanitized names
|
440 |
+
logging.info("Copying files to temporary directory...")
|
441 |
+
input_file_mapping = {} # Map sanitized name to original path if needed
|
442 |
+
for i, file_obj in enumerate(files):
|
443 |
+
file_info = files_info[i]
|
444 |
+
# Only copy files that were processed without error
|
445 |
+
if not file_info.get("error"):
|
446 |
+
original_path = Path(file_obj.name)
|
447 |
+
sanitized_name = file_info['name']
|
448 |
+
destination_path = Path(temp_dir) / sanitized_name
|
449 |
+
try:
|
450 |
+
shutil.copy(original_path, destination_path)
|
451 |
+
logging.info(f"Copied '{original_path.name}' -> '{destination_path}'")
|
452 |
+
input_file_mapping[sanitized_name] = original_path
|
453 |
+
except Exception as copy_err:
|
454 |
+
logging.error(f"Failed to copy file {original_path} to {destination_path}: {copy_err}")
|
455 |
+
# Raise error as ffmpeg will fail if inputs are missing
|
456 |
+
raise gr.Error(f"Failed to prepare input file: {original_path.name}. Error: {copy_err}")
|
457 |
+
|
458 |
+
# --- Parse and Validate FFMPEG Command ---
|
459 |
+
try:
|
460 |
+
# Split command string safely
|
461 |
+
args = shlex.split(command_string)
|
462 |
+
except ValueError as e:
|
463 |
+
logging.error(f"Command syntax error: {e}. Command: {command_string}")
|
464 |
+
raise gr.Error(f"Generated command has syntax errors (e.g., unbalanced quotes): {e}\nCommand: {command_string}")
|
465 |
+
|
466 |
+
if not args or args[0].lower() != "ffmpeg":
|
467 |
+
raise gr.Error(f"Generated command does not start with 'ffmpeg'. Command: {command_string}")
|
468 |
+
|
469 |
+
# --- Prepare Final Command Arguments ---
|
470 |
+
# Define the actual temporary output path *inside* the temp dir
|
471 |
+
temp_output_file_name = f"output_{uuid.uuid4()}.mp4"
|
472 |
+
temp_output_path = str(Path(temp_dir) / temp_output_file_name)
|
473 |
+
|
474 |
+
# Replace the placeholder 'output.mp4' with the actual temp output path
|
475 |
+
final_args = []
|
476 |
+
output_placeholder_found = False
|
477 |
+
for arg in args:
|
478 |
+
if arg == "output.mp4":
|
479 |
+
# Check if it's preceded by -y, if not, add -y
|
480 |
+
if final_args and final_args[-1] != "-y":
|
481 |
+
final_args.append("-y")
|
482 |
+
final_args.append(temp_output_path)
|
483 |
+
output_placeholder_found = True
|
484 |
+
else:
|
485 |
+
final_args.append(arg)
|
486 |
+
|
487 |
+
# If AI forgot output.mp4, add it (shouldn't happen with good prompting)
|
488 |
+
if not output_placeholder_found:
|
489 |
+
logging.warning("AI command did not include 'output.mp4'. Appending target output path.")
|
490 |
+
if final_args[-1] != "-y":
|
491 |
+
final_args.append("-y")
|
492 |
+
final_args.append(temp_output_path)
|
493 |
+
|
494 |
+
|
495 |
+
# --- Execute FFMPEG ---
|
496 |
+
logging.info(f"Executing FFMPEG: {' '.join(final_args)}")
|
497 |
+
try:
|
498 |
+
process = subprocess.run(
|
499 |
+
final_args,
|
500 |
+
cwd=temp_dir, # Execute in the directory with copied files
|
501 |
+
capture_output=True, # Captures stdout and stderr
|
502 |
+
text=True,
|
503 |
+
encoding='utf-8', errors='replace',
|
504 |
+
check=True, # Raise CalledProcessError if return code is non-zero
|
505 |
+
timeout=3000 # 5 minute timeout
|
506 |
+
)
|
507 |
+
logging.info("FFMPEG command executed successfully.")
|
508 |
+
# Log stderr as it often contains useful info/warnings
|
509 |
+
if process.stderr: logging.info(f"FFMPEG stderr:\n{process.stderr}")
|
510 |
+
# Log stdout only if needed for debugging
|
511 |
+
if process.stdout: logging.debug(f"FFMPEG stdout:\n{process.stdout}")
|
512 |
+
|
513 |
+
except subprocess.CalledProcessError as e:
|
514 |
+
error_output = e.stderr or e.stdout or "No output captured."
|
515 |
+
logging.error(f"FFMPEG execution failed! Return code: {e.returncode}\nCommand: {' '.join(e.cmd)}\nOutput:\n{error_output}")
|
516 |
+
error_summary = error_output.strip().split('\n')[-1] # Get last line
|
517 |
+
raise gr.Error(f"❌ FFMPEG execution failed: {error_summary}\n(Check logs/console for full command and error details)")
|
518 |
+
except subprocess.TimeoutExpired as e:
|
519 |
+
logging.error(f"FFMPEG command timed out after {e.timeout} seconds.\nCommand: {' '.join(e.cmd)}")
|
520 |
+
raise gr.Error(f"⏳ FFMPEG command timed out after {e.timeout} seconds. The operation might be too complex or files too large.")
|
521 |
+
except FileNotFoundError as e:
|
522 |
+
# This should be caught earlier, but double-check
|
523 |
+
logging.error(f"FFMPEG command failed: {e}. Is ffmpeg installed and in PATH?")
|
524 |
+
raise gr.Error(f"❌ FFMPEG execution failed: '{e.filename}' not found. Ensure FFMPEG is installed and accessible.")
|
525 |
+
|
526 |
+
# --- Copy Result Out of Temp Directory ---
|
527 |
+
if Path(temp_output_path).exists() and os.path.getsize(temp_output_path) > 0:
|
528 |
+
# Create an output directory if it doesn't exist
|
529 |
+
output_dir = Path("./output_videos")
|
530 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
531 |
+
# Copy to a filename based on UUID to avoid collisions
|
532 |
+
final_output_location = shutil.copy(temp_output_path, output_dir / f"{Path(temp_output_path).stem}.mp4")
|
533 |
+
logging.info(f"Copied final output video to: {final_output_location}")
|
534 |
+
else:
|
535 |
+
logging.error(f"FFMPEG seemed to succeed, but output file '{temp_output_path}' is missing or empty.")
|
536 |
+
raise gr.Error("❌ FFMPEG finished, but the output file was not created or is empty. Check the generated command and logs.")
|
537 |
|
538 |
+
# --- Prepare Display Command (using original placeholder) ---
|
539 |
+
display_command_markdown = f"### Generated Command\n```bash\n{command_string}\n```"
|
540 |
+
|
541 |
+
# --- Return Results ---
|
542 |
+
return final_output_location, gr.update(value=display_command_markdown)
|
543 |
+
|
544 |
+
except Exception as e:
|
545 |
+
# Catch any other unexpected errors during setup or execution within the temp dir
|
546 |
+
logging.error(f"Error during processing: {e}", exc_info=True)
|
547 |
+
# No need to manually cleanup temp_dir, 'with' handles it
|
548 |
+
if isinstance(e, gr.Error): raise e # Re-raise Gradio errors
|
549 |
+
else: raise gr.Error(f"An unexpected error occurred: {e}")
|
550 |
|
551 |
+
# --- Initialize Client on Startup ---
|
552 |
+
initialize_client()
|
553 |
+
if client is None and initial_model_choice:
|
554 |
+
logging.warning("Application starting without a functional AI client due to initialization errors or missing keys.")
|
555 |
+
# Consider showing a warning in the UI if possible, or rely on errors during `update`
|
556 |
|
557 |
|
558 |
+
# --- Gradio Interface Definition ---
|
559 |
+
with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.themes.Soft(primary_hue=gr.themes.colors.sky)) as demo:
|
560 |
gr.Markdown(
|
561 |
"""
|
562 |
+
# 🏞️ AI Video Editor: Your Smart Editing Assistant 🎬
|
563 |
|
564 |
+
Welcome to the AI Video Editor! This tool uses AI models like **DeepSeek-V3** or **Qwen** to understand your editing needs in plain English.
|
565 |
+
Upload your media, describe the desired result, and the AI generates the **FFMPEG command** to create your video.
|
566 |
|
567 |
+
**No complex software needed!** Ideal for quick edits, learning FFMPEG, or automating simple video tasks. Trim, merge, add text, change speed, apply filters, combine media – just tell the AI!
|
568 |
|
569 |
+
**Get started:** Upload files, type instructions, click **"🚀 Run Edit"**!
|
570 |
+
*(Ensure FFMPEG is installed on the system running this app.)*
|
571 |
""",
|
572 |
elem_id="header",
|
573 |
)
|
|
|
575 |
with gr.Accordion("📋 Usage Instructions & Examples", open=False):
|
576 |
gr.Markdown(
|
577 |
"""
|
578 |
+
### How to Use
|
579 |
+
1. **Upload Files**: Use the "Upload Media Files" area.
|
580 |
+
2. **Write Instructions**: Describe the edit in the "Instructions" box.
|
581 |
+
3. **(Optional) Adjust Parameters**: Select AI model, tweak Top-p/Temperature for creativity.
|
582 |
+
4. **Generate**: Click **"🚀 Run Edit"**.
|
583 |
+
5. **Review**: Watch the result in "Generated Video Output". The FFMPEG command used appears below.
|
|
|
|
|
|
|
584 |
|
585 |
### Example Instructions
|
586 |
+
* `Trim the video to keep only the segment from 10s to 25s.`
|
587 |
+
* `Concatenate video1.mp4 and video2.mp4.`
|
588 |
+
* `Add text "Hello World" at the bottom center, white font, size 24.`
|
589 |
+
* `Convert video to black and white.`
|
590 |
+
* `Create slideshow from image1.jpg, image2.png (5s each) with background.mp3.`
|
591 |
+
* `Resize video to 1280x720.`
|
592 |
+
* `Speed up video 2x.`
|
593 |
+
* `Generate waveform visualization for the audio file, 1280x120 pixels.`
|
594 |
+
|
595 |
+
### Tips
|
596 |
+
* **Be Specific**: "remove first 5 seconds" is better than "make shorter".
|
597 |
+
* **Use Filenames**: Refer to files like `Combine intro.mp4 and main.mp4` (AI uses names with underscores).
|
598 |
+
* **Details Matter**: For text, specify position, color, size. For fades, mention duration.
|
599 |
+
* **Keep it Simple**: One main goal per instruction works best.
|
|
|
|
|
|
|
|
|
600 |
"""
|
601 |
)
|
602 |
|
|
|
604 |
with gr.Column(scale=1):
|
605 |
user_files = gr.File(
|
606 |
file_count="multiple",
|
607 |
+
label="📤 Upload Media Files",
|
608 |
file_types=allowed_medias,
|
|
|
609 |
)
|
610 |
user_prompt = gr.Textbox(
|
611 |
placeholder="e.g., 'Combine video1.mp4 and video2.mp4'",
|
612 |
+
label="📝 Instructions / Editing Objective",
|
613 |
lines=3,
|
614 |
)
|
615 |
+
with gr.Accordion("⚙️ Advanced Parameters", open=False):
|
616 |
+
# Ensure initial_model_choice is valid before setting value
|
617 |
+
valid_initial_model = initial_model_choice if initial_model_choice in MODELS else (list(MODELS.keys())[0] if MODELS else None)
|
618 |
+
model_choice_dropdown = gr.Dropdown( # Changed to Dropdown for better UI with many models
|
619 |
choices=list(MODELS.keys()),
|
620 |
+
value=valid_initial_model,
|
621 |
+
label="🧠 Select AI Model",
|
622 |
+
)
|
623 |
+
top_p_slider = gr.Slider(
|
624 |
minimum=0.0, maximum=1.0, value=0.7, step=0.05,
|
625 |
+
label="Top-p (Diversity)", info="Lower values = more focused, higher = more random."
|
626 |
+
)
|
627 |
+
temperature_slider = gr.Slider(
|
628 |
+
minimum=0.0, maximum=2.0, value=0.2, step=0.1, # Default lower temp for more predictable ffmpeg
|
629 |
+
label="Temperature (Randomness)", info="Lower values = more deterministic, higher = more creative/random."
|
630 |
+
)
|
631 |
+
run_button = gr.Button("🚀 Run Edit", variant="primary")
|
632 |
|
633 |
with gr.Column(scale=1):
|
634 |
+
generated_video_output = gr.Video(
|
635 |
+
label="🎬 Generated Video Output",
|
636 |
+
interactive=False,
|
637 |
include_audio=True,
|
638 |
)
|
639 |
+
generated_command_output = gr.Markdown(label="💻 Generated FFMPEG Command")
|
640 |
|
641 |
+
# --- Event Handling ---
|
642 |
+
run_button.click(
|
643 |
fn=update,
|
644 |
+
inputs=[user_files, user_prompt, top_p_slider, temperature_slider, model_choice_dropdown],
|
645 |
+
outputs=[generated_video_output, generated_command_output],
|
646 |
+
api_name="generate_edit"
|
647 |
)
|
648 |
|
649 |
+
# --- Examples ---
|
650 |
+
# IMPORTANT: Update example file paths relative to where you run the script!
|
651 |
+
# Create an 'examples' folder or adjust paths.
|
652 |
+
example_list = [
|
653 |
+
[
|
654 |
+
["./examples/video1.mp4"], # Make sure this path exists
|
655 |
+
"Add text 'Watermark' to the top right corner, white font, size 18, slightly transparent.",
|
656 |
+
0.7, 0.2, list(MODELS.keys())[0] if MODELS else None,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
657 |
],
|
658 |
+
[
|
659 |
+
["./examples/video1.mp4"],
|
660 |
+
"Cut the video to keep only 10 seconds, starting from 00:00:15.",
|
661 |
+
0.7, 0.2, list(MODELS.keys())[min(1, len(MODELS)-1)] if len(MODELS) > 1 else (list(MODELS.keys())[0] if MODELS else None),
|
662 |
+
],
|
663 |
+
[
|
664 |
+
["./examples/video2.mp4"], # Make sure this path exists
|
665 |
+
"Convert the video to grayscale (black and white).",
|
666 |
+
0.7, 0.2, list(MODELS.keys())[0] if MODELS else None,
|
667 |
+
],
|
668 |
+
[
|
669 |
+
["./examples/image1.jpg", "./examples/image2.png", "./examples/audio.mp3"], # Make sure paths exist
|
670 |
+
"Create a slideshow: image1.jpg for 5s, then image2.png for 5s. Use audio.mp3 as background music. Output size 1920x1080.",
|
671 |
+
0.7, 0.2, list(MODELS.keys())[0] if MODELS else None,
|
672 |
+
],
|
673 |
+
]
|
674 |
+
# Filter out examples if no models are configured
|
675 |
+
valid_examples = [ex for ex in example_list if ex[4] is not None]
|
676 |
+
|
677 |
+
if valid_examples:
|
678 |
+
gr.Examples(
|
679 |
+
examples=valid_examples,
|
680 |
+
inputs=[user_files, user_prompt, top_p_slider, temperature_slider, model_choice_dropdown],
|
681 |
+
outputs=[generated_video_output, generated_command_output],
|
682 |
+
fn=update,
|
683 |
+
cache_examples=True, # Keep False unless examples are very stable and slow
|
684 |
+
label="✨ Example Use Cases (Click to Run)",
|
685 |
+
run_on_click=False,
|
686 |
+
)
|
687 |
+
else:
|
688 |
+
gr.Markdown("_(Examples disabled as no models seem to be configured with API keys)_")
|
689 |
|
690 |
+
# Footer removed as requested
|
691 |
|
692 |
# --- Launch the App ---
|
693 |
+
if __name__ == "__main__":
|
694 |
+
# Set concurrency limit based on resources
|
695 |
+
demo.queue(default_concurrency_limit=20)
|
696 |
+
# Launch on 0.0.0.0 to make accessible on network if needed
|
697 |
+
# demo.launch(show_api=False, server_name="0.0.0.0")
|
698 |
+
demo.launch(show_api=False) # Default for local/Hugging Face Spaces
|