MultiAgent_System_for_Screenplay_Creation

Running

App Files Files Community

luke9705 commited on 19 days ago

Commit

f1e2fa3

1 Parent(s): 56f7f57

add tool for generation audio from sample; update agent initialization and Gradio interface structure; swapping back to Gemma for fast testing

Browse files

Files changed (1) hide show

app.py +33 -25

app.py CHANGED Viewed

@@ -44,7 +44,7 @@ def load_file(path: str) -> list | dict:
     if image is not None:
         return [image]
     elif ext.endswith(".mp3") or ext.endswith(".wav"):
-        return {"audio": text, "audio path": path}
     else:
         return {"raw document text": text, "file path": path}
@@ -157,7 +157,6 @@ def generate_audio(prompt: str, duration: int) -> gr.Component:
     Args:
         prompt: The text prompt to generate the audio from.
         duration: Duration of the generated audio in seconds. Max 30 seconds.
     Returns:
         gr.Component: The generated audio as a Gradio Audio component.
     """
@@ -167,18 +166,21 @@ def generate_audio(prompt: str, duration: int) -> gr.Component:
         name="Sound_Generator",
         description="Generate music or sound effects from a text prompt using MusicGen."
     )
-    sound = client(prompt, duration)
     return gr.Audio(value=sound)
 @tool
-def generate_audio_from_sample(prompt: str, duration: int, sample: list[int, np.ndarray] = None) -> gr.Component:
     """
     Generate audio from a text prompt + audio sample using MusicGen.
     Args:
         prompt: The text prompt to generate the audio from.
         duration: Duration of the generated audio in seconds. Max 30 seconds.
-        sample: Optional audio sample to guide generation.
     Returns:
         gr.Component: The generated audio as a Gradio Audio component.
@@ -189,21 +191,24 @@ def generate_audio_from_sample(prompt: str, duration: int, sample: list[int, np.
         name="Sound_Generator",
         description="Generate music or sound effects from a text prompt using MusicGen."
     )
-    sound = client(prompt, duration, sample)
     return gr.Audio(value=sound)
 ## agent definition
 class Agent:
     def __init__(self, ):
         #client = HfApiModel("deepseek-ai/DeepSeek-R1-0528", provider="nebius", api_key=os.getenv("NEBIUS_API_KEY"))
-        client = OpenAIServerModel(
             model_id="claude-opus-4-20250514",
             api_base="https://api.anthropic.com/v1/",
             api_key=os.environ["ANTHROPIC_API_KEY"],
-        )
         self.agent = CodeAgent(
             model=client,
             tools=[DuckDuckGoSearchTool(max_results=5),
@@ -271,23 +276,26 @@ def initialize_agent():
     return agent
 ## gradio interface
-with gr.Blocks() as demo:
-    global agent
-    agent = initialize_agent()
-    gr.ChatInterface(
-                        fn=respond,
-                        type='messages',
-                        multimodal=True,
-                        title='MultiAgent System for Screenplay Creation and Editing',
-                        show_progress='full',
-                        fill_height=True,
-                        fill_width=True,
-                        save_history=True,
-                        additional_inputs=[
                         gr.Checkbox(value=False, label="Web Search",
-                                    info="Enable web search to find information online. If disabled, the agent will only use the provided files and images.",
-                                    render=False),
-                    ])
 if __name__ == "__main__":

     if image is not None:
         return [image]
     elif ext.endswith(".mp3") or ext.endswith(".wav"):
+        return {"audio path": path}
     else:
         return {"raw document text": text, "file path": path}
     Args:
         prompt: The text prompt to generate the audio from.
         duration: Duration of the generated audio in seconds. Max 30 seconds.
     Returns:
         gr.Component: The generated audio as a Gradio Audio component.
     """
         name="Sound_Generator",
         description="Generate music or sound effects from a text prompt using MusicGen."
     )
+    if duration > 30:
+        sound = client(prompt, 30)
+    else:
+        sound = client(prompt, duration)
     return gr.Audio(value=sound)
 @tool
+def generate_audio_from_sample(prompt: str, duration: int, sample_path: str = None) -> gr.Component:
     """
     Generate audio from a text prompt + audio sample using MusicGen.
     Args:
         prompt: The text prompt to generate the audio from.
         duration: Duration of the generated audio in seconds. Max 30 seconds.
+        sample_path: audio sample path to guide generation.
     Returns:
         gr.Component: The generated audio as a Gradio Audio component.
         name="Sound_Generator",
         description="Generate music or sound effects from a text prompt using MusicGen."
     )
+    if duration > 30:
+        sound = client(prompt, 30, sample_path)
+    else:
+        sound = client(prompt, duration, sample_path)
     return gr.Audio(value=sound)
 ## agent definition
 class Agent:
     def __init__(self, ):
         #client = HfApiModel("deepseek-ai/DeepSeek-R1-0528", provider="nebius", api_key=os.getenv("NEBIUS_API_KEY"))
+        client = HfApiModel("google/gemma-3-27b-it", provider="nebius", api_key=os.getenv("NEBIUS_API_KEY"))
+        """client = OpenAIServerModel(
             model_id="claude-opus-4-20250514",
             api_base="https://api.anthropic.com/v1/",
             api_key=os.environ["ANTHROPIC_API_KEY"],
+        )"""
         self.agent = CodeAgent(
             model=client,
             tools=[DuckDuckGoSearchTool(max_results=5),
     return agent
 ## gradio interface
+global agent
+agent = initialize_agent()
+demo = gr.ChatInterface(
+                    fn=respond,
+                    type='messages',
+                    multimodal=True,
+                    title='MultiAgent System for Screenplay Creation and Editing',
+                    show_progress='full',
+                    fill_height=True,
+                    fill_width=True,
+                    save_history=True,
+                    autoscroll=True,
+                    additional_inputs=[
                         gr.Checkbox(value=False, label="Web Search",
+                                info="Enable web search to find information online. If disabled, the agent will only use the provided files and images.",
+                                render=False),
+                            ],
+                    additional_inputs_accordion=gr.Accordion(label="Tools available: ", open=True, render=False)
+                        )
 if __name__ == "__main__":