innoai commited on
Commit
9a15500
·
verified ·
1 Parent(s): 9437428

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +523 -407
app2.py CHANGED
@@ -9,101 +9,152 @@ import uuid
9
  import tempfile
10
  import shlex
11
  import shutil
12
- import logging # 添加日志记录
 
13
 
14
- # 配置日志记录
 
 
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
 
17
  # Supported models configuration
18
  MODELS = {
19
- "deepseek-ai/DeepSeek-V3": {
 
 
20
  "base_url": "https://api.deepseek.com/v1",
21
  "env_key": "DEEPSEEK_API_KEY",
 
22
  },
23
  "Qwen/Qwen2.5-Coder-32B-Instruct": {
24
- "base_url": "https://api-inference.huggingface.co/v1/",
25
  "env_key": "HF_TOKEN",
 
 
 
 
26
  },
27
- # Add more models here if needed
 
 
 
 
 
28
  }
29
 
30
- # Function to get the first available API key
31
- def get_first_available_key_config():
32
- for model, config in MODELS.items():
33
- if config["env_key"] in os.environ and os.environ[config["env_key"]]:
34
- logging.info(f"Using API key for model: {model}")
35
- return config
36
- return None
37
-
38
- # Initialize client with first available model configuration
39
- initial_config = get_first_available_key_config()
40
- if initial_config:
41
- client = OpenAI(
42
- base_url=initial_config["base_url"],
43
- api_key=os.environ[initial_config["env_key"]],
44
- )
45
- initial_model_choice = next(iter(MODELS.keys())) # Keep track of which model config was used initially
46
- else:
47
- logging.warning("No API keys found in environment variables for configured models. API calls will fail.")
48
- # Initialize with placeholder values or handle error as appropriate
49
- client = None # Or raise an error, or use a default config if applicable
50
- initial_model_choice = list(MODELS.keys())[0] # Default UI selection
51
-
52
  allowed_medias = [
53
  ".png", ".jpg", ".webp", ".jpeg", ".tiff", ".bmp", ".gif", ".svg",
54
- ".mp3", ".wav", ".ogg", ".aac", ".flac", # Added more audio types
55
  ".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".mpg", ".mpeg", ".m4v",
56
  ".3gp", ".3g2", ".3gpp",
57
  ]
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def get_files_infos(files):
 
61
  results = []
62
  if not files:
63
  return results
64
 
65
  for file_obj in files:
66
  file_path = Path(file_obj.name)
67
- info = {"error": None} # Initialize error as None
68
  try:
69
  info["size"] = os.path.getsize(file_path)
70
- # Sanitize filename by replacing spaces with underscores
71
- original_name = file_path.name
72
- info["name"] = original_name.replace(" ", "_")
73
- info["original_name"] = original_name # Keep original name for user display if needed
74
- file_extension = file_path.suffix.lower() # Use lower case for consistency
75
 
76
- if file_extension in (".mp4", ".avi", ".mkv", ".mov", ".webm", ".flv", ".wmv", ".mpg", ".mpeg", ".m4v", ".3gp", ".3g2", ".3gpp"):
 
 
 
77
  info["type"] = "video"
78
  try:
79
- video = VideoFileClip(str(file_path)) # Use string path
 
 
 
80
  info["duration"] = video.duration
81
- info["dimensions"] = f"{video.size[0]}x{video.size[1]}"
82
  if video.audio:
83
  info["type"] = "video/audio"
84
- info["audio_channels"] = video.audio.nchannels
85
- video.close()
86
  except UnicodeDecodeError as ude:
87
- info["error"] = f"Metadata decoding error ({ude}). Basic info might be missing."
88
- logging.warning(f"UnicodeDecodeError processing video {info['name']}: {ude}")
 
 
 
89
  except Exception as e:
90
  info["error"] = f"Error reading video metadata ({type(e).__name__})."
91
- logging.warning(f"Error processing video {info['name']}: {e}", exc_info=True) # Log full traceback
92
 
93
- elif file_extension in (".mp3", ".wav", ".ogg", ".aac", ".flac"):
 
94
  info["type"] = "audio"
95
  try:
96
- audio = AudioFileClip(str(file_path)) # Use string path
97
- info["duration"] = audio.duration
98
- info["audio_channels"] = audio.nchannels
99
- audio.close()
 
 
100
  except UnicodeDecodeError as ude:
101
- info["error"] = f"Metadata decoding error ({ude}). Basic info might be missing."
102
- logging.warning(f"UnicodeDecodeError processing audio {info['name']}: {ude}")
 
 
 
103
  except Exception as e:
104
  info["error"] = f"Error reading audio metadata ({type(e).__name__})."
105
- logging.warning(f"Error processing audio {info['name']}: {e}", exc_info=True) # Log full traceback
106
 
 
107
  elif file_extension in (".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".svg", ".webp"):
108
  info["type"] = "image"
109
  try:
@@ -111,21 +162,25 @@ def get_files_infos(files):
111
  info["dimensions"] = f"{img.size[0]}x{img.size[1]}"
112
  except Exception as e:
113
  info["error"] = f"Error reading image metadata ({type(e).__name__})."
114
- logging.warning(f"Error processing image {info['name']}: {e}", exc_info=True)
115
 
116
  else:
117
  info["type"] = "unknown"
118
  info["error"] = "Unsupported file type."
119
- logging.warning(f"Unsupported file type: {info['name']}")
120
 
121
  except OSError as ose:
122
  info["error"] = f"File system error: {ose}"
123
  logging.error(f"OSError accessing file {file_path}: {ose}", exc_info=True)
124
- if "name" not in info: info["name"] = file_path.name # Ensure name is present even on early error
 
 
 
 
125
  except Exception as e:
126
  info["error"] = f"Unexpected error processing file: {e}"
127
  logging.error(f"Unexpected error processing file {file_path}: {e}", exc_info=True)
128
- if "name" not in info: info["name"] = file_path.name
129
 
130
  results.append(info)
131
 
@@ -133,325 +188,386 @@ def get_files_infos(files):
133
 
134
 
135
  def get_completion(prompt, files_info, top_p, temperature, model_choice):
136
- global client # Ensure we are using the global client object
 
137
 
138
  if client is None:
139
- raise gr.Error("API Client not initialized. Please check API key configuration.")
 
140
 
141
- # --- Create files info table ---
142
- files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels | Status |\n"
143
- files_info_string += "|------|------|------------|----------|----------------|--------|\n"
144
 
145
- for file_info in files_info:
146
- # Use sanitized name for the command context
147
- name = file_info.get("name", "N/A")
148
- # Use original name or sanitized name for display in the table, decide which is better
149
- display_name = file_info.get("original_name", name) # Prefer original name for user readability
150
 
 
 
 
 
 
 
 
151
  file_type = file_info.get("type", "N/A")
152
  dimensions = file_info.get("dimensions", "-")
153
- duration = f"{file_info.get('duration', '-'):.2f}s" if "duration" in file_info and file_info['duration'] is not None else "-"
154
- audio = f"{file_info.get('audio_channels', '-')}" if "audio_channels" in file_info and file_info['audio_channels'] is not None else "-"
 
 
155
  status = "Error" if file_info.get("error") else "OK"
 
 
156
 
157
- files_info_string += f"| {file_type} | {display_name} | {dimensions} | {duration} | {audio} | {status} |\n"
158
  if file_info.get("error"):
159
- files_info_string += f"| `Error Details` | `{file_info['error']}` | - | - | - | - |\n" # Add error details row
160
-
161
- # --- Construct Messages ---
162
- messages = [
163
- {
164
- "role": "system",
165
- "content": """
166
- You are a very experienced media engineer, controlling a UNIX terminal.
167
- You are an FFMPEG expert with years of experience and multiple contributions to the FFMPEG project.
168
-
169
- You are given:
170
- (1) A list of media assets (video, audio, images) with details like name, type, dimensions, duration, and status (including potential errors reading metadata). Use the 'Name' column from the table as the input filename in your command.
171
- (2) A user's objective describing a new video to be created from these assets.
172
-
173
- Your objective is to generate the SIMPLEST POSSIBLE, SINGLE ffmpeg command to achieve the user's goal.
174
-
175
- Key requirements:
176
- - Output exactly ONE ffmpeg command.
177
- - The command MUST be on a single line (no line breaks).
178
- - Use the absolute minimum number of ffmpeg options needed.
179
- - Avoid complex filter chains (`-filter_complex`) unless absolutely necessary. Prefer simpler filters, concatenation, scaling etc.
180
- - The final output file MUST be named exactly `output.mp4`.
181
- - Input filenames in the command MUST match the 'Name' column provided in the asset list (which uses underscores instead of spaces).
182
- - If the user asks for waveform visualization: use `-filter_complex "[0:a]showwaves=s=WxH:mode=line,format=pix_fmts=yuv420p[v]" -map "[v]" -map 0:a?` (replace WxH with desired video dimensions), and ensure audio is mono (`-ac 1`). Assume full video width if not specified.
183
- - For image sequences: Prefer `-framerate` and pattern matching (e.g., `img%03d.png`) if inputs suggest a sequence. Otherwise, use `-loop 1 -t duration` for single images.
184
- - Handle potential errors noted in the asset list gracefully if possible (e.g., if metadata is missing, use sensible defaults or inform the user if the task is impossible).
185
- - NEVER output multiple commands chained with `&&` or `;`.
186
- - NEVER use wildcards like `*` in filenames. Use specific filenames from the list.
187
-
188
- Remember: Simplicity and correctness are key. Generate only the ffmpeg command itself, no explanations.
189
- """,
190
- },
191
- {
192
- "role": "user",
193
- "content": f"""Provide only the single-line FFMPEG shell command to achieve the objective.
194
-
195
- AVAILABLE ASSETS LIST:
196
 
197
  {files_info_string}
198
 
199
- OBJECTIVE: {prompt}
200
- Make sure the final output file is named exactly "output.mp4".
201
 
202
- YOUR FFMPEG COMMAND:
203
- """,
204
- },
205
- ]
206
- try:
207
- # Print the complete prompt for debugging
208
- logging.info("\n=== COMPLETE PROMPT ===\n")
209
- for msg in messages:
210
- logging.info(f"\n[{msg['role'].upper()}]:\n{msg['content']}")
211
- logging.info("=====================\n")
212
-
213
- if model_choice not in MODELS:
214
- raise ValueError(f"Model {model_choice} is not supported")
215
 
216
- model_config = MODELS[model_choice]
217
- api_key = os.environ.get(model_config["env_key"])
218
-
219
- if not api_key:
220
- raise gr.Error(f"API Key ({model_config['env_key']}) not found in environment variables for model {model_choice}.")
221
 
222
- # Update client configuration for the selected model
223
- client.base_url = model_config["base_url"]
224
- client.api_key = api_key
225
- # Determine model name based on provider convention
226
- model_name = "deepseek-chat" if "deepseek" in model_choice.lower() else model_choice
227
 
228
  completion = client.chat.completions.create(
229
- model=model_name,
230
  messages=messages,
231
  temperature=temperature,
232
  top_p=top_p,
233
- max_tokens=2048,
234
  )
235
- content = completion.choices[0].message.content.strip() # Strip leading/trailing whitespace
236
-
237
- # Extract command: prioritize code blocks, then raw content
238
- command = content
239
- if "```" in content:
240
- import re
241
- match = re.search(r"```(?:sh|bash)?\s*(ffmpeg.*?)\s*```", content, re.DOTALL | re.IGNORECASE)
242
- if match:
243
- command = match.group(1).strip()
244
- logging.info(f"Extracted command from code block: {command}")
245
- else:
246
- # Fallback if block markers exist but pattern fails
247
- command = content.replace("```sh", "").replace("```bash", "").replace("```", "").strip()
248
- logging.warning(f"Could not extract command reliably from code block, using fallback: {command}")
249
- else:
250
- logging.info(f"No code block detected, using raw content as command: {command}")
251
-
252
- # Basic validation: ensure it starts with ffmpeg
253
- if not command.lower().startswith("ffmpeg "):
254
- logging.error(f"Generated content does not start with ffmpeg: {command}")
255
- raise ValueError("AI did not generate a valid ffmpeg command.")
256
-
257
- # Remove potential leading/trailing quotes if the AI wrapped the whole command
258
- command = command.strip('\'"')
259
-
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  return command
261
 
262
  except Exception as e:
263
- logging.error(f"API Error or processing error in get_completion: {e}", exc_info=True)
264
- # Re-raise specific Gradio error for UI display
265
- raise gr.Error(f"Failed to get command from AI: {e}")
 
 
 
 
 
266
 
 
267
 
268
  def update(
269
  files,
270
  prompt,
271
  top_p=1,
272
  temperature=1,
273
- model_choice=initial_model_choice, # Use the initial model as default
274
  ):
 
 
 
 
 
 
 
 
 
275
  if not files:
276
- raise gr.Error("Please upload at least one media file.")
277
  if not prompt:
278
- raise gr.Error("Please enter editing instructions (prompt).")
279
- if client is None and model_choice in MODELS:
280
- # Check again if client wasn't initialized but a model is chosen
281
- env_key = MODELS[model_choice]["env_key"]
282
- if env_key not in os.environ or not os.environ[env_key]:
283
- raise gr.Error(f"API Key ({env_key}) for the selected model '{model_choice}' is missing. Please set it as an environment variable.")
284
- # Try to re-initialize (or update if partially initialized)
285
- global client
286
- try:
287
- client = OpenAI(
288
- base_url=MODELS[model_choice]["base_url"],
289
- api_key=os.environ[env_key],
290
- )
 
 
 
 
 
 
 
 
 
 
 
 
291
  logging.info(f"API Client initialized/updated for model: {model_choice}")
292
- except Exception as e:
 
293
  raise gr.Error(f"Failed to initialize API client: {e}")
 
 
 
 
 
294
 
295
 
296
- # 1. Get File Infos and Check for Initial Errors
 
297
  files_info = get_files_infos(files)
298
- file_errors = [f"- {f.get('original_name', f.get('name', 'Unknown file'))}: {f['error']}" for f in files_info if f.get("error")]
 
299
  if file_errors:
300
- error_message = "Errors occurred while processing uploaded files:\n" + "\n".join(file_errors)
301
  logging.error(error_message)
302
- raise gr.Error(error_message)
 
 
 
 
 
 
 
303
 
304
- # 2. Validate File Sizes and Durations (optional, based on your constraints)
305
  for file_info in files_info:
306
- if file_info["size"] > 1000 * 1024 * 1024: # 100MB limit
307
- raise gr.Error(f"File '{file_info.get('original_name', file_info['name'])}' exceeds the 100MB size limit.")
308
- if file_info.get("type", "").startswith("video") and file_info.get("duration", 0) > 120: # 2 minute limit for videos
309
- raise gr.Error(f"Video '{file_info.get('original_name', file_info['name'])}' exceeds the 2-minute duration limit.")
 
310
 
311
- # 3. Get FFMPEG Command from AI (with retries if needed)
312
  command_string = None
313
- attempts = 0
314
- max_attempts = 2 # Allow one retry
315
- last_exception = None
316
-
317
- while attempts < max_attempts:
318
- logging.info(f"Attempt {attempts + 1} to generate FFMPEG command.")
319
- try:
320
- command_string = get_completion(
321
- prompt, files_info, top_p, temperature, model_choice
322
- )
323
- logging.info(
324
- f"Generated FFMPEG command string:\n{command_string}\n"
325
- )
326
- break # Success, exit loop
327
- except Exception as e:
328
- last_exception = e
329
- logging.warning(f"Attempt {attempts + 1} failed: {e}")
330
- attempts += 1
331
- if attempts >= max_attempts:
332
- logging.error("Max attempts reached. Failed to generate valid command.")
333
- raise gr.Error(f"Failed to generate FFMPEG command after {max_attempts} attempts. Last error: {last_exception}")
334
-
335
- # 4. Prepare Temporary Directory and Files
336
- temp_dir_obj = tempfile.TemporaryDirectory()
337
- temp_dir = temp_dir_obj.name
338
- logging.info(f"Created temporary directory: {temp_dir}")
339
  try:
340
- copied_file_paths = {}
341
- for i, file_obj in enumerate(files):
342
- original_path = Path(file_obj.name)
343
- # Use the sanitized name consistent with files_info sent to AI
344
- sanitized_name = original_path.name.replace(" ", "_")
345
- destination_path = Path(temp_dir) / sanitized_name
346
- shutil.copy(original_path, destination_path)
347
- logging.info(f"Copied '{original_path.name}' to '{destination_path}'")
348
- copied_file_paths[i] = destination_path # Keep track if needed
349
-
350
- # 5. Validate and Execute FFMPEG Command
351
- try:
352
- # Split command string safely for shell execution
353
- args = shlex.split(command_string)
354
- except ValueError as e:
355
- raise gr.Error(f"Generated command has syntax errors (e.g., unbalanced quotes): {e}\nCommand: {command_string}")
356
-
357
-
358
- if not args or args[0].lower() != "ffmpeg":
359
- raise gr.Error(f"Generated command does not start with 'ffmpeg'. Command: {command_string}")
360
-
361
- # IMPORTANT: Check and remove the placeholder 'output.mp4' if it's the last argument
362
- if args[-1] == "output.mp4":
363
- logging.info("Removing placeholder 'output.mp4' from the end of the command.")
364
- args.pop()
365
- elif "output.mp4" in args:
366
- logging.warning("Placeholder 'output.mp4' found but not at the end of the command. Execution might fail.")
367
- # Decide if you want to raise an error here or let ffmpeg handle it
368
-
369
-
370
- # Define the actual output path
371
- output_file_name = f"output_{uuid.uuid4()}.mp4"
372
- output_file_path = str(Path(temp_dir) / output_file_name)
373
-
374
- # Dry Run (optional but recommended)
375
- # Note: Dry run might fail for complex commands even if they are valid for execution
376
- # Consider making dry run optional or improving its robustness if needed
377
- # dry_run_args = args + ["-f", "null", "-"]
378
- # logging.info(f"Performing dry run: {' '.join(dry_run_args)}")
379
- # ffmpg_dry_run = subprocess.run(
380
- # dry_run_args,
381
- # stderr=subprocess.PIPE,
382
- # stdout=subprocess.PIPE, # Capture stdout too
383
- # text=True,
384
- # encoding='utf-8', errors='replace', # Handle potential weird output
385
- # cwd=temp_dir,
386
- # timeout=30 # Add a timeout
387
- # )
388
- # if ffmpg_dry_run.returncode != 0:
389
- # error_output = ffmpg_dry_run.stderr or ffmpg_dry_run.stdout
390
- # logging.error(f"FFMPEG dry run failed. Return code: {ffmpg_dry_run.returncode}\nOutput:\n{error_output}")
391
- # raise gr.Error(f"Generated FFMPEG command seems invalid (Dry Run Failed). Please check the command or try different instructions.\nError: {error_output[:500]}...") # Show partial error
392
-
393
-
394
- # Final Execution
395
- final_command = args + ["-y", output_file_path] # Add overwrite flag and final output path
396
- logging.info(f"Executing FFMPEG command: ffmpeg {' '.join(final_command[1:])}")
397
 
398
  try:
399
- process = subprocess.run(
400
- final_command,
401
- cwd=temp_dir,
402
- stderr=subprocess.PIPE,
403
- stdout=subprocess.PIPE,
404
- text=True,
405
- encoding='utf-8', errors='replace',
406
- check=True, # Raise CalledProcessError if return code is non-zero
407
- timeout=3000 # Set a reasonable timeout (e.g., 5 minutes)
408
- )
409
- logging.info("FFMPEG command executed successfully.")
410
- logging.info(f"FFMPEG stdout:\n{process.stdout}")
411
- logging.info(f"FFMPEG stderr:\n{process.stderr}")
412
-
413
- except subprocess.CalledProcessError as e:
414
- error_output = e.stderr or e.stdout
415
- logging.error(f"FFMPEG execution failed! Return code: {e.returncode}\nCommand: {' '.join(e.cmd)}\nOutput:\n{error_output}")
416
- raise gr.Error(f"FFMPEG execution failed.\nCommand: ffmpeg {' '.join(final_command[1:])}\nError: {error_output[:1000]}...") # Show more error context
417
- except subprocess.TimeoutExpired as e:
418
- logging.error(f"FFMPEG command timed out after {e.timeout} seconds.\nCommand: {' '.join(e.cmd)}")
419
- raise gr.Error(f"FFMPEG command timed out after {e.timeout} seconds. The operation might be too complex or the files too large.")
420
-
421
-
422
- # 6. Prepare Output
423
- # Display the command used (using the originally generated args + output)
424
- display_command_args = args + ["-y", "output.mp4"] # Reconstruct for display
425
- generated_command_markdown = f"### Generated Command\n```bash\nffmpeg {' '.join(display_command_args[1:])}\n```"
426
-
427
- # Return the path to the generated video and the command markdown
428
- # Gradio needs the actual path; it will handle cleanup if temp_dir_obj goes out of scope
429
- # However, explicitly returning the temp dir object might be safer depending on Gradio version
430
- # For simplicity, returning the path and relying on Gradio's handling of temp files.
431
- return output_file_path, gr.update(value=generated_command_markdown)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
- except Exception as e:
434
- # Catch any other unexpected errors during setup or execution
435
- logging.error(f"Error in update function: {e}", exc_info=True)
436
- # Clean up the temp directory manually if an error occurred before returning
437
- temp_dir_obj.cleanup()
438
- raise gr.Error(f"An unexpected error occurred: {e}")
 
 
 
 
 
 
439
 
440
- # No finally block needed for temp_dir_obj.cleanup() if using 'with TemporaryDirectory()'
441
- # If not using 'with', ensure cleanup happens in try/except/finally
 
 
 
442
 
443
 
444
- # --- Gradio Interface ---
445
- with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.themes.Soft()) as demo:
446
  gr.Markdown(
447
  """
448
- # 🏞 AI Video Editor: Your Smart Editing Assistant 🎬
449
 
450
- Welcome to the AI Video Editor! This powerful tool leverages advanced AI models like **Qwen2.5-Coder** and **DeepSeek-V3** to understand your editing needs expressed in plain English. Simply upload your video, audio, or image files, describe the desired outcome, and watch as the AI generates the necessary **FFMPEG command** to create your final video.
 
451
 
452
- **No complex software or coding required!** Perfect for quick edits, batch processing ideas, learning FFMPEG syntax, or automating simple video tasks. Whether you need to trim, merge, add text, change speed, apply filters, or combine different media types, just tell the AI what you want.
453
 
454
- **Get started now:** Upload your files, type your instructions, and hit "Run"!
 
455
  """,
456
  elem_id="header",
457
  )
@@ -459,35 +575,28 @@ with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.th
459
  with gr.Accordion("📋 Usage Instructions & Examples", open=False):
460
  gr.Markdown(
461
  """
462
- ### How to Use AI Video Editor
463
-
464
- 1. **Upload Media Files**: Drag & drop or click to upload your video, image, or audio files (`.mp4`, `.mov`, `.mp3`, `.wav`, `.jpg`, `.png`, etc.) into the "Media files" area. Multiple files are allowed.
465
- 2. **Write Instructions**: Clearly describe the editing task in the "Instructions" textbox. Be specific for best results.
466
- 3. **(Optional) Adjust Parameters**:
467
- * **Model**: Choose the AI model you want to use. Different models might have varying strengths in understanding instructions or FFMPEG knowledge.
468
- * **Top-p & Temperature**: Fine-tune the AI's creativity and randomness. Lower temperature (e.g., 0.1) leads to more predictable results, higher values increase randomness. Top-p controls the diversity of the AI's choices. Default values are usually good starting points.
469
- 4. **Generate**: Click the **"Run"** button. The AI will generate an FFMPEG command, which will then be executed to produce your video.
470
- 5. **Review**: The resulting video will appear in the "Generated Video" player. The exact FFMPEG command used will be shown below it.
471
 
472
  ### Example Instructions
473
-
474
- * `Trim the video to keep only the segment from 10 seconds to 25 seconds.`
475
- * `Concatenate video1.mp4 and video2.mp4 into a single video.`
476
- * `Add a text overlay "My Vacation 2024" at the bottom center with a white font.`
477
- * `Convert the input video to black and white.`
478
- * `Create a slideshow from image1.png and image2.png, each shown for 5 seconds, with background_music.mp3.`
479
- * `Resize the video to 1280x720 pixels.`
480
- * `Speed up the video by 2x.`
481
- * `Extract the audio track from the video as an mp3 file.` (Note: Current setup forces mp4 output, adjust system prompt if other outputs needed)
482
- * `Create a picture-in-picture effect with small_video.mp4 overlaid on the top right corner of main_video.mp4.`
483
- * `Generate a waveform visualization for the audio file.`
484
-
485
- ### Tips for Better Results
486
-
487
- * **Be Specific**: Instead of "make it shorter," say "remove the first 5 seconds."
488
- * **Use Filenames**: Refer to files by their names (e.g., `Combine intro.mp4 and main.mp4`). The AI uses names with spaces replaced by underscores.
489
- * **Specify Details**: For text, mention font size, color, position (e.g., `top_left`, `center`, `bottom_right`). For effects, specify parameters (e.g., `fade duration of 1 second`).
490
- * **Keep it Simple**: Aim for one primary goal per instruction. Complex multi-step edits might require breaking down the task or might exceed the AI's ability to generate a single, simple command.
491
  """
492
  )
493
 
@@ -495,88 +604,95 @@ with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.th
495
  with gr.Column(scale=1):
496
  user_files = gr.File(
497
  file_count="multiple",
498
- label="Upload Media Files",
499
  file_types=allowed_medias,
500
- # Consider adding interactive=True if needed, default is True
501
  )
502
  user_prompt = gr.Textbox(
503
  placeholder="e.g., 'Combine video1.mp4 and video2.mp4'",
504
- label="Instructions / Editing Objective",
505
  lines=3,
506
  )
507
- with gr.Accordion("Advanced Parameters", open=False):
508
- model_choice = gr.Radio(
 
 
509
  choices=list(MODELS.keys()),
510
- value=initial_model_choice, # Use the determined initial model
511
- label="Select AI Model",
512
- )
513
- top_p = gr.Slider(
514
  minimum=0.0, maximum=1.0, value=0.7, step=0.05,
515
- label="Top-p (Controls diversity)",
516
- )
517
- temperature = gr.Slider(
518
- minimum=0.0, maximum=2.0, value=0.1, step=0.1, # Max temp usually 1.0 or 2.0
519
- label="Temperature (Controls randomness)",
520
- )
521
- btn = gr.Button("🚀 Run Edit", variant="primary")
522
 
523
  with gr.Column(scale=1):
524
- generated_video = gr.Video(
525
- label="Generated Video Output",
526
- interactive=False, # User cannot change the video here
527
  include_audio=True,
528
  )
529
- generated_command = gr.Markdown(label="Generated FFMPEG Command")
530
 
531
- # Link button click to the update function
532
- btn.click(
533
  fn=update,
534
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
535
- outputs=[generated_video, generated_command],
536
- api_name="generate_edit" # Optional: Define API endpoint name
537
  )
538
 
539
- # Examples Section
540
- gr.Examples(
541
- examples=[
542
- [
543
- ["./examples/Jiangnan_Rain.mp4"], # Make sure this path exists or adjust
544
- "Add a text watermark 'Sample Video' to the upper right corner of the video with white text and semi-transparent background.",
545
- 0.7, 0.1, list(MODELS.keys())[0],
546
- ],
547
- [
548
- ["./examples/Jiangnan_Rain.mp4"],
549
- "Cut the video to extract only the middle 30 seconds (starting at 00:30 and ending at 01:00).",
550
- 0.7, 0.1, list(MODELS.keys())[min(1, len(MODELS)-1)], # Use second model if available
551
- ],
552
- [
553
- ["./examples/Lotus_Pond01.mp4"], # Make sure this path exists or adjust
554
- "Convert the video to black and white (grayscale) while maintaining the original audio.",
555
- 0.7, 0.1, list(MODELS.keys())[0],
556
- ],
557
- [
558
- ["./examples/Lotus_Pond01.mp4"],
559
- "Create a slow-motion version of the video by reducing the speed to 0.5x.",
560
- 0.7, 0.1, list(MODELS.keys())[min(1, len(MODELS)-1)],
561
- ],
562
- [
563
- ["./examples/image1.jpg", "./examples/image2.png", "./examples/background.mp3"], # Example with images and audio
564
- "Create a video slideshow from image1.jpg and image2.png, showing each image for 4 seconds. Use background.mp3 as the audio track.",
565
- 0.7, 0.1, list(MODELS.keys())[0],
566
- ],
567
  ],
568
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
569
- outputs=[generated_video, generated_command],
570
- fn=update,
571
- cache_examples=False, # Set to True if example files are stable and processing is slow
572
- label="Example Use Cases (Click to Run)",
573
- run_on_click=True,
574
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
- # Removed the footer markdown about pull requests
577
 
578
  # --- Launch the App ---
579
- # Consider adding concurrency limits based on your hosting capabilities
580
- demo.queue(default_concurrency_limit=50)
581
- # demo.launch(show_api=False, server_name="0.0.0.0") # Allow external access if needed
582
- demo.launch(show_api=False,enable_analytics=False) # Default launch for local/Hugging Face Spaces
 
 
 
9
  import tempfile
10
  import shlex
11
  import shutil
12
+ import logging
13
+ import traceback # For detailed error logging
14
 
15
+ # --- Configuration ---
16
+
17
+ # Configure logging
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
 
20
  # Supported models configuration
21
  MODELS = {
22
+ # Format: "Display Name": {"base_url": "...", "env_key": "...", "model_name_for_api": "..."}
23
+ # Add your models here
24
+ "deepseek-ai/DeepSeek-V3": {
25
  "base_url": "https://api.deepseek.com/v1",
26
  "env_key": "DEEPSEEK_API_KEY",
27
+ "model_name_for_api": "deepseek-chat", # Use the specific model name required by DeepSeek API
28
  },
29
  "Qwen/Qwen2.5-Coder-32B-Instruct": {
30
+ "base_url": "https://api-inference.huggingface.co/v1/", # Check if correct for chat completions
31
  "env_key": "HF_TOKEN",
32
+ # Note: HF Inference API might use a different endpoint or format for chat completions.
33
+ # This base URL might be for text-generation. Adjust if needed.
34
+ # Also, the model name might need /chat/completions appended or similar.
35
+ "model_name_for_api": "Qwen/Qwen2.5-Coder-32B-Instruct", # Usually the model ID on HF
36
  },
37
+ # Example using a local server (like LM Studio, Ollama)
38
+ # "Local Model (via Ollama)": {
39
+ # "base_url": "http://localhost:11434/v1", # Ollama's OpenAI-compatible endpoint
40
+ # "env_key": "OLLAMA_API_KEY", # Often not needed, use "NONE" or similar if no key
41
+ # "model_name_for_api": "qwen:14b", # The specific model name served by Ollama
42
+ # },
43
  }
44
 
45
+ # Allowed media file extensions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  allowed_medias = [
47
  ".png", ".jpg", ".webp", ".jpeg", ".tiff", ".bmp", ".gif", ".svg",
48
+ ".mp3", ".wav", ".ogg", ".aac", ".flac", ".m4a",
49
  ".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".mpg", ".mpeg", ".m4v",
50
  ".3gp", ".3g2", ".3gpp",
51
  ]
52
 
53
+ # --- Global Variables ---
54
+ client = None
55
+ initial_model_choice = None
56
+
57
+ # --- Helper Functions ---
58
+
59
+ def get_first_available_key_config():
60
+ """Finds the first model config with a valid API key in environment variables."""
61
+ for model_display_name, config in MODELS.items():
62
+ api_key = os.environ.get(config["env_key"])
63
+ # Treat empty string "" as missing key, handle potential "NONE" placeholder
64
+ if api_key and api_key.upper() != "NONE":
65
+ logging.info(f"Found valid API key for model: {model_display_name}")
66
+ return model_display_name, config
67
+ logging.warning("No valid API keys found in environment variables for any configured models.")
68
+ return None, None
69
+
70
+ def initialize_client():
71
+ """Initializes the OpenAI client with the first available config."""
72
+ global client, initial_model_choice
73
+ initial_model_choice, config = get_first_available_key_config()
74
+ if config:
75
+ try:
76
+ api_key = os.environ.get(config["env_key"])
77
+ # Handle case where key is explicitly set to "NONE" or similar for keyless local models
78
+ effective_api_key = api_key if api_key and api_key.upper() != "NONE" else "required-but-not-used" # Placeholder for local models if needed
79
+
80
+ client = OpenAI(
81
+ base_url=config["base_url"],
82
+ api_key=effective_api_key,
83
+ )
84
+ logging.info(f"OpenAI client initialized for model: {initial_model_choice} using base_url: {config['base_url']}")
85
+ except Exception as e:
86
+ logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
87
+ client = None
88
+ initial_model_choice = list(MODELS.keys())[0] # Fallback UI selection
89
+ else:
90
+ client = None
91
+ # Set a default model choice for the UI even if client fails
92
+ initial_model_choice = list(MODELS.keys())[0] if MODELS else None
93
 
94
  def get_files_infos(files):
95
+ """Extracts metadata from uploaded files, handling potential errors."""
96
  results = []
97
  if not files:
98
  return results
99
 
100
  for file_obj in files:
101
  file_path = Path(file_obj.name)
102
+ info = {"error": None, "original_name": file_path.name}
103
  try:
104
  info["size"] = os.path.getsize(file_path)
105
+ # Sanitize filename (used in ffmpeg command)
106
+ info["name"] = file_path.name.replace(" ", "_")
107
+ # Validate sanitized name (basic check)
108
+ if not info["name"] or "/" in info["name"] or "\\" in info["name"]:
109
+ raise ValueError(f"Invalid sanitized filename generated: '{info['name']}'")
110
 
111
+ file_extension = file_path.suffix.lower()
112
+
113
+ # Video Processing
114
+ if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".mpg", ".mpeg", ".m4v", ".3gp", ".3g2", ".3gpp"):
115
  info["type"] = "video"
116
  try:
117
+ # Ensure ffmpeg is found by moviepy, handle potential issues
118
+ if not shutil.which("ffmpeg"):
119
+ raise FileNotFoundError("ffmpeg command not found in PATH. MoviePy cannot process video/audio.")
120
+ video = VideoFileClip(str(file_path), verbose=False)
121
  info["duration"] = video.duration
122
+ info["dimensions"] = f"{video.size[0]}x{video.size[1]}" if video.size else "N/A"
123
  if video.audio:
124
  info["type"] = "video/audio"
125
+ info["audio_channels"] = video.audio.nchannels if hasattr(video.audio, 'nchannels') else "N/A"
126
+ video.close() # Release file handle
127
  except UnicodeDecodeError as ude:
128
+ info["error"] = f"Metadata decoding error ({ude}). Duration/dimensions might be missing."
129
+ logging.warning(f"UnicodeDecodeError processing video '{info['original_name']}': {ude}")
130
+ except FileNotFoundError as fnf:
131
+ info["error"] = str(fnf)
132
+ logging.error(f"FFmpeg not found: {fnf}")
133
  except Exception as e:
134
  info["error"] = f"Error reading video metadata ({type(e).__name__})."
135
+ logging.warning(f"Error processing video '{info['original_name']}': {e}", exc_info=False) # Log less verbose traceback for common errors
136
 
137
+ # Audio Processing
138
+ elif file_extension in (".mp3", ".wav", ".ogg", ".aac", ".flac", ".m4a"):
139
  info["type"] = "audio"
140
  try:
141
+ if not shutil.which("ffmpeg"):
142
+ raise FileNotFoundError("ffmpeg command not found in PATH. MoviePy cannot process video/audio.")
143
+ audio = AudioFileClip(str(file_path), verbose=False)
144
+ info["duration"] = audio.duration
145
+ info["audio_channels"] = audio.nchannels if hasattr(audio, 'nchannels') else "N/A"
146
+ audio.close()
147
  except UnicodeDecodeError as ude:
148
+ info["error"] = f"Metadata decoding error ({ude}). Duration/channels might be missing."
149
+ logging.warning(f"UnicodeDecodeError processing audio '{info['original_name']}': {ude}")
150
+ except FileNotFoundError as fnf:
151
+ info["error"] = str(fnf)
152
+ logging.error(f"FFmpeg not found: {fnf}")
153
  except Exception as e:
154
  info["error"] = f"Error reading audio metadata ({type(e).__name__})."
155
+ logging.warning(f"Error processing audio '{info['original_name']}': {e}", exc_info=False)
156
 
157
+ # Image Processing
158
  elif file_extension in (".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".svg", ".webp"):
159
  info["type"] = "image"
160
  try:
 
162
  info["dimensions"] = f"{img.size[0]}x{img.size[1]}"
163
  except Exception as e:
164
  info["error"] = f"Error reading image metadata ({type(e).__name__})."
165
+ logging.warning(f"Error processing image '{info['original_name']}': {e}", exc_info=False)
166
 
167
  else:
168
  info["type"] = "unknown"
169
  info["error"] = "Unsupported file type."
170
+ logging.warning(f"Unsupported file type: {info['original_name']}")
171
 
172
  except OSError as ose:
173
  info["error"] = f"File system error: {ose}"
174
  logging.error(f"OSError accessing file {file_path}: {ose}", exc_info=True)
175
+ if "name" not in info: info["name"] = info["original_name"].replace(" ", "_") # Ensure sanitized name exists
176
+ except ValueError as ve: # Catch invalid sanitized name error
177
+ info["error"] = str(ve)
178
+ logging.error(f"Filename sanitization error for {info['original_name']}: {ve}")
179
+ if "name" not in info: info["name"] = f"invalid_name_{uuid.uuid4()}" # Provide a fallback name
180
  except Exception as e:
181
  info["error"] = f"Unexpected error processing file: {e}"
182
  logging.error(f"Unexpected error processing file {file_path}: {e}", exc_info=True)
183
+ if "name" not in info: info["name"] = info["original_name"].replace(" ", "_")
184
 
185
  results.append(info)
186
 
 
188
 
189
 
190
  def get_completion(prompt, files_info, top_p, temperature, model_choice):
191
+ """Generates the FFMPEG command using the selected AI model."""
192
+ global client # Ensure we use the potentially updated client
193
 
194
  if client is None:
195
+ # This should ideally be caught earlier, but double-check
196
+ raise gr.Error("API Client not initialized. Cannot contact AI.")
197
 
198
+ if model_choice not in MODELS:
199
+ raise ValueError(f"Model '{model_choice}' is not found in configuration.")
 
200
 
201
+ model_config = MODELS[model_choice]
202
+ model_name_for_api = model_config["model_name_for_api"]
 
 
 
203
 
204
+ # --- Create files info table (Markdown for the AI) ---
205
+ files_info_string = "| Type | Name (for command) | Dimensions | Duration (s) | Audio Channels | Status |\n"
206
+ files_info_string += "|------|--------------------|------------|--------------|----------------|--------|\n"
207
+
208
+ valid_files_count = 0
209
+ for file_info in files_info:
210
+ name_for_command = file_info.get("name", "N/A") # Use sanitized name
211
  file_type = file_info.get("type", "N/A")
212
  dimensions = file_info.get("dimensions", "-")
213
+ duration_val = file_info.get('duration')
214
+ duration_str = f"{duration_val:.2f}" if duration_val is not None else "-"
215
+ audio_ch_val = file_info.get('audio_channels')
216
+ audio_ch_str = str(audio_ch_val) if audio_ch_val is not None else "-"
217
  status = "Error" if file_info.get("error") else "OK"
218
+ if not file_info.get("error"):
219
+ valid_files_count += 1
220
 
221
+ files_info_string += f"| {file_type} | `{name_for_command}` | {dimensions} | {duration_str} | {audio_ch_str} | {status} |\n"
222
  if file_info.get("error"):
223
+ # Provide error details clearly
224
+ files_info_string += f"| `Error Details` | `{file_info['error'][:100]}` | - | - | - | - |\n" # Truncate long errors
225
+
226
+ if valid_files_count == 0:
227
+ raise gr.Error("No valid media files could be processed. Please check the file formats or errors.")
228
+
229
+ # --- Construct Messages for the AI ---
230
+ system_prompt = """You are a highly skilled FFMPEG expert simulating a command-line interface.
231
+ Given a list of media assets and a user's objective, generate the SIMPLEST POSSIBLE, SINGLE ffmpeg command to achieve the goal.
232
+
233
+ **Input Files:** Use the filenames provided in the 'Name (for command)' column of the asset list. These names have spaces replaced with underscores.
234
+ **Output File:** The final output MUST be named exactly `output.mp4`.
235
+ **Output Format:** The final output MUST be a video/mp4 container.
236
+
237
+ **Key Requirements:**
238
+ 1. **Single Command:** Output ONLY the ffmpeg command, on a single line. No explanations, no comments, no introductory text, no code blocks (like ```bash ... ```).
239
+ 2. **Simplicity:** Use the minimum required options. Avoid `-filter_complex` unless absolutely necessary. Prefer direct mapping, simple filters (`-vf`, `-af`), concatenation (`concat` demuxer), etc.
240
+ 3. **Correctness:** Ensure options, filter syntax, and stream mapping are correct.
241
+ 4. **Input Names:** Strictly use the provided sanitized input filenames (e.g., `My_Video.mp4`).
242
+ 5. **Output Name:** End the command with `-y output.mp4` (the `-y` allows overwriting).
243
+ 6. **Handle Errors:** If an asset has an 'Error' status, try to work around it if possible (e.g., ignore a faulty audio stream if only video is needed), or generate a command that likely fails gracefully if the task is impossible without that asset. Do NOT output error messages yourself, just the command.
244
+ 7. **Specific Tasks:**
245
+ * *Waveform:* If asked for waveform, use `showwaves` filter (e.g., `"[0:a]showwaves=s=1280x100:mode=line,format=pix_fmts=yuv420p[v]"`), map video and audio (`-map "[v]" -map 0:a?`), and consider making audio mono (`-ac 1`) unless stereo is requested. Use video dimensions if provided, otherwise default to something reasonable like 1280x100.
246
+ * *Image Sequence:* Use `-framerate` and pattern (`img%03d.png`) if applicable. For single images, use `-loop 1 -t duration`.
247
+ * *Text Overlay:* Use `drawtext` filter. Get position (e.g., `x=(w-text_w)/2:y=h-th-10`), font, size, color from user prompt if possible, otherwise use defaults.
248
+ * *Concatenation:* Prefer the `concat` demuxer (requires a temporary file list) over the `concat` filter if possible for simple cases without re-encoding. However, since you MUST output a single command, you might need to use the filter (`[0:v][1:v]concat=n=2:v=1[outv]`) if creating a temp file list isn't feasible within the single command constraint. Prioritize simplicity.
249
+
250
+ **Example Output:**
251
+ ffmpeg -i input_video.mp4 -vf "scale=1280:720" -c:a copy -y output.mp4
252
+
253
+ **DO NOT include ```bash or ``` anywhere in your response.** Just the raw command.
254
+ """
255
+ user_message_content = f"""Generate the single-line FFMPEG command based on the assets and objective.
256
+
257
+ **AVAILABLE ASSETS:**
 
 
258
 
259
  {files_info_string}
260
 
261
+ **OBJECTIVE:** {prompt}
 
262
 
263
+ **FFMPEG Command:**
264
+ """
 
 
 
 
 
 
 
 
 
 
 
265
 
266
+ messages = [
267
+ {"role": "system", "content": system_prompt},
268
+ {"role": "user", "content": user_message_content},
269
+ ]
 
270
 
271
+ try:
272
+ logging.info(f"Sending request to AI model: {model_name_for_api} at {client.base_url}")
273
+ # Optional: Log the prompt itself (can be very long)
274
+ # logging.debug(f"System Prompt:\n{system_prompt}")
275
+ # logging.debug(f"User Message:\n{user_message_content}")
276
 
277
  completion = client.chat.completions.create(
278
+ model=model_name_for_api,
279
  messages=messages,
280
  temperature=temperature,
281
  top_p=top_p,
282
+ max_tokens=1024, # Adjust token limit as needed
283
  )
284
+ content = completion.choices[0].message.content.strip()
285
+
286
+ logging.info(f"AI Raw Response: '{content}'")
287
+
288
+ # --- Command Validation and Cleaning ---
289
+ # Remove potential markdown code blocks manually if AI didn't follow instructions
290
+ if content.startswith("```") and content.endswith("```"):
291
+ content = re.sub(r"^```(?:bash|sh)?\s*", "", content)
292
+ content = re.sub(r"\s*```$", "", content)
293
+ content = content.strip()
294
+ logging.warning("AI included code blocks despite instructions, attempting cleanup.")
295
+
296
+ # Remove any leading text before "ffmpeg" if necessary
297
+ ffmpeg_index = content.lower().find("ffmpeg ")
298
+ if ffmpeg_index > 0:
299
+ logging.warning(f"AI included leading text, stripping: '{content[:ffmpeg_index]}'")
300
+ content = content[ffmpeg_index:]
301
+ elif ffmpeg_index == -1 and not content.lower().startswith("ffmpeg"):
302
+ logging.error(f"AI response does not contain 'ffmpeg': '{content}'")
303
+ raise ValueError("AI did not generate a valid ffmpeg command.")
304
+
305
+ # Ensure it ends with the expected output file pattern (flexible space before -y)
306
+ if not content.rstrip().endswith("-y output.mp4"):
307
+ logging.warning("AI response doesn't end with '-y output.mp4'. Appending it.")
308
+ # Append '-y output.mp4' if missing, trying to be robust
309
+ if content.rstrip().endswith("output.mp4"):
310
+ content = content.rstrip() + " -y output.mp4" # Add -y if only output.mp4 is there
311
+ elif not " output.mp4" in content: # Avoid adding if output.mp4 is elsewhere
312
+ content = content.rstrip() + " -y output.mp4"
313
+
314
+
315
+ # Remove potential extra newlines
316
+ command = content.replace('\n', ' ').replace('\r', '').strip()
317
+
318
+ if not command:
319
+ raise ValueError("AI generated an empty command string.")
320
+
321
+ logging.info(f"Cleaned AI Command: '{command}'")
322
  return command
323
 
324
  except Exception as e:
325
+ logging.error(f"Error during AI completion or processing: {e}", exc_info=True)
326
+ # Try to give a more specific error to the user
327
+ if "authentication" in str(e).lower():
328
+ raise gr.Error(f"AI API Authentication Error. Check your API key ({model_config['env_key']}). Error: {e}")
329
+ elif "rate limit" in str(e).lower():
330
+ raise gr.Error(f"AI API Rate Limit Exceeded. Please try again later. Error: {e}")
331
+ else:
332
+ raise gr.Error(f"Failed to get command from AI. Error: {e}")
333
 
334
+ # --- Main Gradio Update Function ---
335
 
336
  def update(
337
  files,
338
  prompt,
339
  top_p=1,
340
  temperature=1,
341
+ model_choice=None, # Default to None, will use initial_model_choice
342
  ):
343
+ """Handles the main logic: file processing, AI call, FFMPEG execution."""
344
+ # *** Fix: Declare global client at the beginning ***
345
+ global client
346
+
347
+ # Use initial choice if none provided (e.g., from direct call)
348
+ if model_choice is None:
349
+ model_choice = initial_model_choice
350
+
351
+ # --- Input Validations ---
352
  if not files:
353
+ raise gr.Error("Please upload at least one media file.")
354
  if not prompt:
355
+ raise gr.Error("📝 Please enter editing instructions (prompt).")
356
+ if not model_choice or model_choice not in MODELS:
357
+ raise gr.Error(f"❓ Invalid model selected: {model_choice}. Please choose from the list.")
358
+
359
+ # --- Check FFMPEG Availability ---
360
+ if not shutil.which("ffmpeg"):
361
+ error_msg = "❌ FFMPEG command not found in system PATH. This application requires FFMPEG to be installed and accessible."
362
+ logging.error(error_msg)
363
+ raise gr.Error(error_msg)
364
+
365
+ # --- Check and potentially update API client ---
366
+ model_config = MODELS[model_choice]
367
+ api_key_env_var = model_config["env_key"]
368
+ api_key = os.environ.get(api_key_env_var)
369
+ effective_api_key = api_key if api_key and api_key.upper() != "NONE" else "required-but-not-used"
370
+
371
+ # Check if key is missing (and not intentionally "NONE")
372
+ if not api_key and effective_api_key != "required-but-not-used":
373
+ raise gr.Error(f"🔑 API Key ({api_key_env_var}) for the selected model '{model_choice}' is missing. Please set it as an environment variable.")
374
+
375
+ # Initialize or update client if needed
376
+ if client is None:
377
+ logging.warning(f"Client was None, attempting re-initialization for model: {model_choice}")
378
+ try:
379
+ client = OpenAI(base_url=model_config["base_url"], api_key=effective_api_key)
380
  logging.info(f"API Client initialized/updated for model: {model_choice}")
381
+ except Exception as e:
382
+ logging.error(f"Failed to initialize API client: {e}", exc_info=True)
383
  raise gr.Error(f"Failed to initialize API client: {e}")
384
+ # If client exists, check if base_url or key needs update for the selected model
385
+ elif client.base_url != model_config["base_url"] or client.api_key != effective_api_key:
386
+ logging.info(f"Updating API client configuration for selected model: {model_choice}")
387
+ client.base_url = model_config["base_url"]
388
+ client.api_key = effective_api_key
389
 
390
 
391
+ # --- Get File Infos and Check for Errors ---
392
+ logging.info("Processing uploaded files...")
393
  files_info = get_files_infos(files)
394
+ file_errors = [f"- '{f.get('original_name', 'Unknown file')}': {f['error']}"
395
+ for f in files_info if f.get("error")]
396
  if file_errors:
397
+ error_message = "⚠️ Errors occurred while processing uploaded files:\n" + "\n".join(file_errors)
398
  logging.error(error_message)
399
+ # Allow proceeding if *some* files are okay, but warn the user.
400
+ # Let the AI decide how to handle the errored files based on the prompt.
401
+ # If *all* files have errors, then raise the error.
402
+ if len(file_errors) == len(files_info):
403
+ raise gr.Error(error_message + "\n\nCannot proceed as no files could be read.")
404
+ else:
405
+ gr.Warning(error_message + "\n\nAttempting to proceed with valid files. The AI will be informed about the errors.")
406
+
407
 
408
+ # --- Validate File Sizes and Durations (Optional limits) ---
409
  for file_info in files_info:
410
+ if not file_info.get("error"): # Only check valid files
411
+ if "size" in file_info and file_info["size"] > 1024 * 1024 * 1024: # 150MB limit
412
+ raise gr.Error(f"File '{file_info.get('original_name')}' ({file_info['size'] / (1024*1024):.1f}MB) exceeds the 150MB size limit.")
413
+ if file_info.get("type", "").startswith("video") and "duration" in file_info and file_info["duration"] > 300: # 5 minute limit for videos
414
+ raise gr.Error(f"Video '{file_info.get('original_name')}' ({file_info['duration']:.0f}s) exceeds the 5-minute duration limit.")
415
 
416
+ # --- Get FFMPEG Command from AI ---
417
  command_string = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
  try:
419
+ logging.info(f"Getting FFMPEG command from AI model: {model_choice}")
420
+ command_string = get_completion(
421
+ prompt, files_info, top_p, temperature, model_choice
422
+ )
423
+ except gr.Error as e:
424
+ raise e # Propagate Gradio errors directly
425
+ except Exception as e:
426
+ logging.error(f"Failed to get command from AI: {e}", exc_info=True)
427
+ raise gr.Error(f"Failed to get or process command from AI. Error: {e}")
428
+
429
+ if not command_string:
430
+ raise gr.Error("AI returned an empty command. Please try again or rephrase.")
431
+
432
+ # --- Prepare Temporary Directory and Execute FFMPEG ---
433
+ # Using 'with' ensures cleanup even if errors occur
434
+ with tempfile.TemporaryDirectory() as temp_dir:
435
+ logging.info(f"Created temporary directory: {temp_dir}")
436
+ final_output_location = None # Path to the final video outside temp dir
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
  try:
439
+ # Copy necessary files to temp dir using sanitized names
440
+ logging.info("Copying files to temporary directory...")
441
+ input_file_mapping = {} # Map sanitized name to original path if needed
442
+ for i, file_obj in enumerate(files):
443
+ file_info = files_info[i]
444
+ # Only copy files that were processed without error
445
+ if not file_info.get("error"):
446
+ original_path = Path(file_obj.name)
447
+ sanitized_name = file_info['name']
448
+ destination_path = Path(temp_dir) / sanitized_name
449
+ try:
450
+ shutil.copy(original_path, destination_path)
451
+ logging.info(f"Copied '{original_path.name}' -> '{destination_path}'")
452
+ input_file_mapping[sanitized_name] = original_path
453
+ except Exception as copy_err:
454
+ logging.error(f"Failed to copy file {original_path} to {destination_path}: {copy_err}")
455
+ # Raise error as ffmpeg will fail if inputs are missing
456
+ raise gr.Error(f"Failed to prepare input file: {original_path.name}. Error: {copy_err}")
457
+
458
+ # --- Parse and Validate FFMPEG Command ---
459
+ try:
460
+ # Split command string safely
461
+ args = shlex.split(command_string)
462
+ except ValueError as e:
463
+ logging.error(f"Command syntax error: {e}. Command: {command_string}")
464
+ raise gr.Error(f"Generated command has syntax errors (e.g., unbalanced quotes): {e}\nCommand: {command_string}")
465
+
466
+ if not args or args[0].lower() != "ffmpeg":
467
+ raise gr.Error(f"Generated command does not start with 'ffmpeg'. Command: {command_string}")
468
+
469
+ # --- Prepare Final Command Arguments ---
470
+ # Define the actual temporary output path *inside* the temp dir
471
+ temp_output_file_name = f"output_{uuid.uuid4()}.mp4"
472
+ temp_output_path = str(Path(temp_dir) / temp_output_file_name)
473
+
474
+ # Replace the placeholder 'output.mp4' with the actual temp output path
475
+ final_args = []
476
+ output_placeholder_found = False
477
+ for arg in args:
478
+ if arg == "output.mp4":
479
+ # Check if it's preceded by -y, if not, add -y
480
+ if final_args and final_args[-1] != "-y":
481
+ final_args.append("-y")
482
+ final_args.append(temp_output_path)
483
+ output_placeholder_found = True
484
+ else:
485
+ final_args.append(arg)
486
+
487
+ # If AI forgot output.mp4, add it (shouldn't happen with good prompting)
488
+ if not output_placeholder_found:
489
+ logging.warning("AI command did not include 'output.mp4'. Appending target output path.")
490
+ if final_args[-1] != "-y":
491
+ final_args.append("-y")
492
+ final_args.append(temp_output_path)
493
+
494
+
495
+ # --- Execute FFMPEG ---
496
+ logging.info(f"Executing FFMPEG: {' '.join(final_args)}")
497
+ try:
498
+ process = subprocess.run(
499
+ final_args,
500
+ cwd=temp_dir, # Execute in the directory with copied files
501
+ capture_output=True, # Captures stdout and stderr
502
+ text=True,
503
+ encoding='utf-8', errors='replace',
504
+ check=True, # Raise CalledProcessError if return code is non-zero
505
+ timeout=3000 # 5 minute timeout
506
+ )
507
+ logging.info("FFMPEG command executed successfully.")
508
+ # Log stderr as it often contains useful info/warnings
509
+ if process.stderr: logging.info(f"FFMPEG stderr:\n{process.stderr}")
510
+ # Log stdout only if needed for debugging
511
+ if process.stdout: logging.debug(f"FFMPEG stdout:\n{process.stdout}")
512
+
513
+ except subprocess.CalledProcessError as e:
514
+ error_output = e.stderr or e.stdout or "No output captured."
515
+ logging.error(f"FFMPEG execution failed! Return code: {e.returncode}\nCommand: {' '.join(e.cmd)}\nOutput:\n{error_output}")
516
+ error_summary = error_output.strip().split('\n')[-1] # Get last line
517
+ raise gr.Error(f"❌ FFMPEG execution failed: {error_summary}\n(Check logs/console for full command and error details)")
518
+ except subprocess.TimeoutExpired as e:
519
+ logging.error(f"FFMPEG command timed out after {e.timeout} seconds.\nCommand: {' '.join(e.cmd)}")
520
+ raise gr.Error(f"⏳ FFMPEG command timed out after {e.timeout} seconds. The operation might be too complex or files too large.")
521
+ except FileNotFoundError as e:
522
+ # This should be caught earlier, but double-check
523
+ logging.error(f"FFMPEG command failed: {e}. Is ffmpeg installed and in PATH?")
524
+ raise gr.Error(f"❌ FFMPEG execution failed: '{e.filename}' not found. Ensure FFMPEG is installed and accessible.")
525
+
526
+ # --- Copy Result Out of Temp Directory ---
527
+ if Path(temp_output_path).exists() and os.path.getsize(temp_output_path) > 0:
528
+ # Create an output directory if it doesn't exist
529
+ output_dir = Path("./output_videos")
530
+ output_dir.mkdir(parents=True, exist_ok=True)
531
+ # Copy to a filename based on UUID to avoid collisions
532
+ final_output_location = shutil.copy(temp_output_path, output_dir / f"{Path(temp_output_path).stem}.mp4")
533
+ logging.info(f"Copied final output video to: {final_output_location}")
534
+ else:
535
+ logging.error(f"FFMPEG seemed to succeed, but output file '{temp_output_path}' is missing or empty.")
536
+ raise gr.Error("❌ FFMPEG finished, but the output file was not created or is empty. Check the generated command and logs.")
537
 
538
+ # --- Prepare Display Command (using original placeholder) ---
539
+ display_command_markdown = f"### Generated Command\n```bash\n{command_string}\n```"
540
+
541
+ # --- Return Results ---
542
+ return final_output_location, gr.update(value=display_command_markdown)
543
+
544
+ except Exception as e:
545
+ # Catch any other unexpected errors during setup or execution within the temp dir
546
+ logging.error(f"Error during processing: {e}", exc_info=True)
547
+ # No need to manually cleanup temp_dir, 'with' handles it
548
+ if isinstance(e, gr.Error): raise e # Re-raise Gradio errors
549
+ else: raise gr.Error(f"An unexpected error occurred: {e}")
550
 
551
+ # --- Initialize Client on Startup ---
552
+ initialize_client()
553
+ if client is None and initial_model_choice:
554
+ logging.warning("Application starting without a functional AI client due to initialization errors or missing keys.")
555
+ # Consider showing a warning in the UI if possible, or rely on errors during `update`
556
 
557
 
558
+ # --- Gradio Interface Definition ---
559
+ with gr.Blocks(title="AI Video Editor - Edit with Natural Language", theme=gr.themes.Soft(primary_hue=gr.themes.colors.sky)) as demo:
560
  gr.Markdown(
561
  """
562
+ # 🏞️ AI Video Editor: Your Smart Editing Assistant 🎬
563
 
564
+ Welcome to the AI Video Editor! This tool uses AI models like **DeepSeek-V3** or **Qwen** to understand your editing needs in plain English.
565
+ Upload your media, describe the desired result, and the AI generates the **FFMPEG command** to create your video.
566
 
567
+ **No complex software needed!** Ideal for quick edits, learning FFMPEG, or automating simple video tasks. Trim, merge, add text, change speed, apply filters, combine media just tell the AI!
568
 
569
+ **Get started:** Upload files, type instructions, click **"🚀 Run Edit"**!
570
+ *(Ensure FFMPEG is installed on the system running this app.)*
571
  """,
572
  elem_id="header",
573
  )
 
575
  with gr.Accordion("📋 Usage Instructions & Examples", open=False):
576
  gr.Markdown(
577
  """
578
+ ### How to Use
579
+ 1. **Upload Files**: Use the "Upload Media Files" area.
580
+ 2. **Write Instructions**: Describe the edit in the "Instructions" box.
581
+ 3. **(Optional) Adjust Parameters**: Select AI model, tweak Top-p/Temperature for creativity.
582
+ 4. **Generate**: Click **"🚀 Run Edit"**.
583
+ 5. **Review**: Watch the result in "Generated Video Output". The FFMPEG command used appears below.
 
 
 
584
 
585
  ### Example Instructions
586
+ * `Trim the video to keep only the segment from 10s to 25s.`
587
+ * `Concatenate video1.mp4 and video2.mp4.`
588
+ * `Add text "Hello World" at the bottom center, white font, size 24.`
589
+ * `Convert video to black and white.`
590
+ * `Create slideshow from image1.jpg, image2.png (5s each) with background.mp3.`
591
+ * `Resize video to 1280x720.`
592
+ * `Speed up video 2x.`
593
+ * `Generate waveform visualization for the audio file, 1280x120 pixels.`
594
+
595
+ ### Tips
596
+ * **Be Specific**: "remove first 5 seconds" is better than "make shorter".
597
+ * **Use Filenames**: Refer to files like `Combine intro.mp4 and main.mp4` (AI uses names with underscores).
598
+ * **Details Matter**: For text, specify position, color, size. For fades, mention duration.
599
+ * **Keep it Simple**: One main goal per instruction works best.
 
 
 
 
600
  """
601
  )
602
 
 
604
  with gr.Column(scale=1):
605
  user_files = gr.File(
606
  file_count="multiple",
607
+ label="📤 Upload Media Files",
608
  file_types=allowed_medias,
 
609
  )
610
  user_prompt = gr.Textbox(
611
  placeholder="e.g., 'Combine video1.mp4 and video2.mp4'",
612
+ label="📝 Instructions / Editing Objective",
613
  lines=3,
614
  )
615
+ with gr.Accordion("⚙️ Advanced Parameters", open=False):
616
+ # Ensure initial_model_choice is valid before setting value
617
+ valid_initial_model = initial_model_choice if initial_model_choice in MODELS else (list(MODELS.keys())[0] if MODELS else None)
618
+ model_choice_dropdown = gr.Dropdown( # Changed to Dropdown for better UI with many models
619
  choices=list(MODELS.keys()),
620
+ value=valid_initial_model,
621
+ label="🧠 Select AI Model",
622
+ )
623
+ top_p_slider = gr.Slider(
624
  minimum=0.0, maximum=1.0, value=0.7, step=0.05,
625
+ label="Top-p (Diversity)", info="Lower values = more focused, higher = more random."
626
+ )
627
+ temperature_slider = gr.Slider(
628
+ minimum=0.0, maximum=2.0, value=0.2, step=0.1, # Default lower temp for more predictable ffmpeg
629
+ label="Temperature (Randomness)", info="Lower values = more deterministic, higher = more creative/random."
630
+ )
631
+ run_button = gr.Button("🚀 Run Edit", variant="primary")
632
 
633
  with gr.Column(scale=1):
634
+ generated_video_output = gr.Video(
635
+ label="🎬 Generated Video Output",
636
+ interactive=False,
637
  include_audio=True,
638
  )
639
+ generated_command_output = gr.Markdown(label="💻 Generated FFMPEG Command")
640
 
641
+ # --- Event Handling ---
642
+ run_button.click(
643
  fn=update,
644
+ inputs=[user_files, user_prompt, top_p_slider, temperature_slider, model_choice_dropdown],
645
+ outputs=[generated_video_output, generated_command_output],
646
+ api_name="generate_edit"
647
  )
648
 
649
+ # --- Examples ---
650
+ # IMPORTANT: Update example file paths relative to where you run the script!
651
+ # Create an 'examples' folder or adjust paths.
652
+ example_list = [
653
+ [
654
+ ["./examples/video1.mp4"], # Make sure this path exists
655
+ "Add text 'Watermark' to the top right corner, white font, size 18, slightly transparent.",
656
+ 0.7, 0.2, list(MODELS.keys())[0] if MODELS else None,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  ],
658
+ [
659
+ ["./examples/video1.mp4"],
660
+ "Cut the video to keep only 10 seconds, starting from 00:00:15.",
661
+ 0.7, 0.2, list(MODELS.keys())[min(1, len(MODELS)-1)] if len(MODELS) > 1 else (list(MODELS.keys())[0] if MODELS else None),
662
+ ],
663
+ [
664
+ ["./examples/video2.mp4"], # Make sure this path exists
665
+ "Convert the video to grayscale (black and white).",
666
+ 0.7, 0.2, list(MODELS.keys())[0] if MODELS else None,
667
+ ],
668
+ [
669
+ ["./examples/image1.jpg", "./examples/image2.png", "./examples/audio.mp3"], # Make sure paths exist
670
+ "Create a slideshow: image1.jpg for 5s, then image2.png for 5s. Use audio.mp3 as background music. Output size 1920x1080.",
671
+ 0.7, 0.2, list(MODELS.keys())[0] if MODELS else None,
672
+ ],
673
+ ]
674
+ # Filter out examples if no models are configured
675
+ valid_examples = [ex for ex in example_list if ex[4] is not None]
676
+
677
+ if valid_examples:
678
+ gr.Examples(
679
+ examples=valid_examples,
680
+ inputs=[user_files, user_prompt, top_p_slider, temperature_slider, model_choice_dropdown],
681
+ outputs=[generated_video_output, generated_command_output],
682
+ fn=update,
683
+ cache_examples=True, # Keep False unless examples are very stable and slow
684
+ label="✨ Example Use Cases (Click to Run)",
685
+ run_on_click=False,
686
+ )
687
+ else:
688
+ gr.Markdown("_(Examples disabled as no models seem to be configured with API keys)_")
689
 
690
+ # Footer removed as requested
691
 
692
  # --- Launch the App ---
693
+ if __name__ == "__main__":
694
+ # Set concurrency limit based on resources
695
+ demo.queue(default_concurrency_limit=20)
696
+ # Launch on 0.0.0.0 to make accessible on network if needed
697
+ # demo.launch(show_api=False, server_name="0.0.0.0")
698
+ demo.launch(show_api=False) # Default for local/Hugging Face Spaces