Spaces:
Runtime error
Runtime error
recorded query data, user data, and step data
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +2 -0
- app.py +195 -42
- demo_solver_cache/20250217_062225_8ce3e482/query_image.jpg +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_1.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_10.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_11.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_12.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_13.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_14.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_15.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_16.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_17.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_18.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_19.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_2.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_20.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_3.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_4.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_5.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_6.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_7.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_8.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_9.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/user_feedback.json +0 -22
- demo_solver_cache/20250217_063316_09285db1/query_image.jpg +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_1.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_10.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_11.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_12.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_13.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_14.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_15.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_16.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_17.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_18.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_19.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_2.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_20.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_3.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_4.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_5.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_6.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_7.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_8.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_9.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/user_feedback.json +0 -12
- demo_solver_cache/20250217_183323_b0e58b32/query_image.jpg +0 -0
- demo_solver_cache/20250217_183323_b0e58b32/user_feedback.json +0 -32
- feedback_dataset/feedback-20250217_212246.json +0 -0
- feedback_dataset/feedback-20250217_212401.json +0 -10
.gitignore
CHANGED
@@ -175,4 +175,6 @@ detected_objects/
|
|
175 |
|
176 |
# [Gradio]
|
177 |
demo_solver_cache/
|
|
|
178 |
backups/
|
|
|
|
175 |
|
176 |
# [Gradio]
|
177 |
demo_solver_cache/
|
178 |
+
solver_cache/
|
179 |
backups/
|
180 |
+
data/
|
app.py
CHANGED
@@ -22,41 +22,125 @@ from octotools.models.memory import Memory
|
|
22 |
from octotools.models.executor import Executor
|
23 |
from octotools.models.utils import make_json_serializable
|
24 |
|
25 |
-
from utils import save_feedback
|
26 |
|
27 |
-
|
28 |
-
########### Test Huggingface Dataset ###########
|
29 |
from pathlib import Path
|
30 |
from huggingface_hub import CommitScheduler
|
31 |
|
32 |
-
# Add these near the top of the file with other constants
|
33 |
-
DATASET_DIR = Path("feedback_dataset")
|
34 |
-
DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
35 |
-
DATASET_PATH = DATASET_DIR / f"feedback-{time.strftime('%Y%m%d_%H%M%S')}.json"
|
36 |
-
|
37 |
# Get Huggingface token from environment variable
|
38 |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
scheduler = CommitScheduler(
|
41 |
repo_id="lupantech/OctoTools-Gradio-Demo-User-Data",
|
42 |
repo_type="dataset",
|
43 |
folder_path=DATASET_DIR,
|
44 |
-
path_in_repo="
|
45 |
token=HF_TOKEN
|
46 |
)
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
########### End of Test Huggingface Dataset ###########
|
61 |
|
62 |
class Solver:
|
@@ -72,7 +156,7 @@ class Solver:
|
|
72 |
verbose: bool = True,
|
73 |
max_steps: int = 10,
|
74 |
max_time: int = 60,
|
75 |
-
|
76 |
):
|
77 |
self.planner = planner
|
78 |
self.memory = memory
|
@@ -83,7 +167,7 @@ class Solver:
|
|
83 |
self.verbose = verbose
|
84 |
self.max_steps = max_steps
|
85 |
self.max_time = max_time
|
86 |
-
self.
|
87 |
|
88 |
self.output_types = output_types.lower().split(',')
|
89 |
assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
|
@@ -109,14 +193,14 @@ class Solver:
|
|
109 |
# os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
|
110 |
# img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
|
111 |
|
112 |
-
img_path = os.path.join(self.
|
113 |
user_image.save(img_path)
|
114 |
else:
|
115 |
img_path = None
|
116 |
|
117 |
# Set tool cache directory
|
118 |
-
|
119 |
-
self.executor.set_query_cache_dir(
|
120 |
|
121 |
# Step 1: Display the received inputs
|
122 |
if user_image:
|
@@ -145,6 +229,13 @@ class Solver:
|
|
145 |
metadata={"title": "🔍 Query Analysis"}))
|
146 |
yield messages
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
# Step 5: Execution loop (similar to your step-by-step solver)
|
149 |
while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
|
150 |
step_count += 1
|
@@ -158,6 +249,14 @@ class Solver:
|
|
158 |
user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
|
159 |
)
|
160 |
context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
# Display the step information
|
163 |
messages.append(ChatMessage(
|
@@ -183,6 +282,21 @@ class Solver:
|
|
183 |
result = self.executor.execute_tool_command(tool_name, command)
|
184 |
result = make_json_serializable(result)
|
185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
messages.append(ChatMessage(
|
187 |
role="assistant",
|
188 |
content=f"{json.dumps(result, indent=4)}",
|
@@ -194,6 +308,14 @@ class Solver:
|
|
194 |
stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
|
195 |
conclusion = self.planner.extract_conclusion(stop_verification)
|
196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
messages.append(ChatMessage(
|
198 |
role="assistant",
|
199 |
content=f"🛑 Step {step_count} Conclusion: {conclusion}"))
|
@@ -208,15 +330,29 @@ class Solver:
|
|
208 |
messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
|
209 |
yield messages
|
210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
if 'direct' in self.output_types:
|
212 |
direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
|
213 |
messages.append(ChatMessage(role="assistant", content=f"🔹 Direct Output:\n{direct_output}"))
|
214 |
yield messages
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
# Step 8: Completion Message
|
217 |
messages.append(ChatMessage(role="assistant", content="✅ Problem-solving process completed."))
|
218 |
yield messages
|
219 |
-
|
220 |
|
221 |
def parse_arguments():
|
222 |
parser = argparse.ArgumentParser(description="Run the OctoTools demo with specified parameters.")
|
@@ -230,7 +366,8 @@ def parse_arguments():
|
|
230 |
help="Comma-separated list of required outputs (base,final,direct)"
|
231 |
)
|
232 |
parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
|
233 |
-
parser.add_argument("--root_cache_dir", default="
|
|
|
234 |
parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
|
235 |
|
236 |
# NOTE: Add new arguments
|
@@ -245,18 +382,28 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
|
|
245 |
Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
|
246 |
"""
|
247 |
|
248 |
-
# Generate
|
249 |
query_id = time.strftime("%Y%m%d_%H%M%S") + "_" + str(uuid.uuid4())[:8] # e.g, 20250217_062225_612f2474
|
250 |
print(f"Query ID: {query_id}")
|
251 |
|
|
|
|
|
|
|
|
|
252 |
# Create a directory for the query ID
|
253 |
-
|
254 |
-
os.makedirs(
|
255 |
-
args.root_cache_dir = query_dir
|
256 |
|
257 |
if api_key is None:
|
258 |
return [["assistant", "⚠️ Error: OpenAI API Key is required."]]
|
259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
# # Initialize Tools
|
261 |
# enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
|
262 |
|
@@ -284,7 +431,7 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
|
|
284 |
# Instantiate Executor
|
285 |
executor = Executor(
|
286 |
llm_engine_name=llm_model_engine,
|
287 |
-
|
288 |
enable_signal=False,
|
289 |
api_key=api_key
|
290 |
)
|
@@ -300,16 +447,23 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
|
|
300 |
verbose=args.verbose,
|
301 |
max_steps=max_steps,
|
302 |
max_time=max_time,
|
303 |
-
|
304 |
)
|
305 |
|
306 |
if solver is None:
|
307 |
return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
|
308 |
|
|
|
309 |
messages = [] # Initialize message list
|
310 |
for message_batch in solver.stream_solve_user_problem(user_query, user_image, api_key, messages):
|
311 |
yield [msg for msg in message_batch] # Ensure correct format for Gradio Chatbot
|
312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
|
314 |
def main(args):
|
315 |
#################### Gradio Interface ####################
|
@@ -325,8 +479,8 @@ def main(args):
|
|
325 |
|
326 |
[Website](https://octotools.github.io/) |
|
327 |
[Github](https://github.com/octotools/octotools) |
|
328 |
-
[arXiv](https://arxiv.org/abs/2502.
|
329 |
-
[Paper](https://arxiv.org/pdf/2502.
|
330 |
[Tool Cards](https://octotools.github.io/#tool-cards) |
|
331 |
[Example Visualizations](https://octotools.github.io/#visualization) |
|
332 |
[Discord](https://discord.gg/NMJx66DC)
|
@@ -424,20 +578,20 @@ def main(args):
|
|
424 |
|
425 |
# Update the button click handlers
|
426 |
upvote_btn.click(
|
427 |
-
fn=lambda: save_feedback(
|
428 |
inputs=[],
|
429 |
outputs=[]
|
430 |
)
|
431 |
|
432 |
downvote_btn.click(
|
433 |
-
fn=lambda: save_feedback(
|
434 |
inputs=[],
|
435 |
outputs=[]
|
436 |
)
|
437 |
|
438 |
# Add handler for comment submission
|
439 |
comment_textbox.submit(
|
440 |
-
fn=lambda comment: save_feedback(
|
441 |
inputs=[comment_textbox],
|
442 |
outputs=[]
|
443 |
)
|
@@ -481,9 +635,6 @@ def main(args):
|
|
481 |
if __name__ == "__main__":
|
482 |
args = parse_arguments()
|
483 |
|
484 |
-
# Manually set enabled tools
|
485 |
-
# args.enabled_tools = "Generalist_Solution_Generator_Tool"
|
486 |
-
|
487 |
# All tools
|
488 |
all_tools = [
|
489 |
"Generalist_Solution_Generator_Tool",
|
@@ -504,5 +655,7 @@ if __name__ == "__main__":
|
|
504 |
]
|
505 |
args.enabled_tools = ",".join(all_tools)
|
506 |
|
|
|
|
|
507 |
main(args)
|
508 |
|
|
|
22 |
from octotools.models.executor import Executor
|
23 |
from octotools.models.utils import make_json_serializable
|
24 |
|
|
|
25 |
|
|
|
|
|
26 |
from pathlib import Path
|
27 |
from huggingface_hub import CommitScheduler
|
28 |
|
|
|
|
|
|
|
|
|
|
|
29 |
# Get Huggingface token from environment variable
|
30 |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
31 |
|
32 |
+
########### Test Huggingface Dataset ###########
|
33 |
+
# Update the HuggingFace dataset constants
|
34 |
+
DATASET_DIR = Path("solver_cache") # the directory to save the dataset
|
35 |
+
DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
36 |
+
|
37 |
+
global QUERY_ID
|
38 |
+
QUERY_ID = None
|
39 |
+
|
40 |
scheduler = CommitScheduler(
|
41 |
repo_id="lupantech/OctoTools-Gradio-Demo-User-Data",
|
42 |
repo_type="dataset",
|
43 |
folder_path=DATASET_DIR,
|
44 |
+
path_in_repo="solver_cache", # Update path in repo
|
45 |
token=HF_TOKEN
|
46 |
)
|
47 |
|
48 |
+
|
49 |
+
def save_query_data(query_id: str, query: str, image_path: str) -> None:
|
50 |
+
"""Save query data to Huggingface dataset"""
|
51 |
+
# Save query metadata
|
52 |
+
query_cache_dir = DATASET_DIR / query_id
|
53 |
+
query_cache_dir.mkdir(parents=True, exist_ok=True)
|
54 |
+
query_file = query_cache_dir / "query_metadata.json"
|
55 |
+
|
56 |
+
query_metadata = {
|
57 |
+
"query_id": query_id,
|
58 |
+
"query_text": query,
|
59 |
+
"datetime": time.strftime("%Y%m%d_%H%M%S"),
|
60 |
+
"image_path": image_path if image_path else None
|
61 |
+
}
|
62 |
+
|
63 |
+
print(f"Saving query metadata to {query_file}")
|
64 |
+
with query_file.open("w") as f:
|
65 |
+
json.dump(query_metadata, f, indent=4)
|
66 |
+
|
67 |
+
# # NOTE: As we are using the same name for the query cache directory as the dataset directory,
|
68 |
+
# # NOTE: we don't need to copy the content from the query cache directory to the query directory.
|
69 |
+
# # Copy all content from root_cache_dir to query_dir
|
70 |
+
# import shutil
|
71 |
+
# shutil.copytree(args.root_cache_dir, query_data_dir, dirs_exist_ok=True)
|
72 |
+
|
73 |
+
|
74 |
+
def save_feedback(query_id: str, feedback_type: str, feedback_text: str = None) -> None:
|
75 |
+
"""
|
76 |
+
Save user feedback to the query directory.
|
77 |
+
|
78 |
+
Args:
|
79 |
+
query_id: Unique identifier for the query
|
80 |
+
feedback_type: Type of feedback ('upvote', 'downvote', or 'comment')
|
81 |
+
feedback_text: Optional text feedback from user
|
82 |
+
"""
|
83 |
+
|
84 |
+
feedback_data_dir = DATASET_DIR / query_id
|
85 |
+
feedback_data_dir.mkdir(parents=True, exist_ok=True)
|
86 |
+
|
87 |
+
feedback_data = {
|
88 |
+
"query_id": query_id,
|
89 |
+
"feedback_type": feedback_type,
|
90 |
+
"feedback_text": feedback_text,
|
91 |
+
"datetime": time.strftime("%Y%m%d_%H%M%S")
|
92 |
+
}
|
93 |
+
|
94 |
+
# Save feedback in the query directory
|
95 |
+
feedback_file = feedback_data_dir / "feedback.json"
|
96 |
+
print(f"Saving feedback to {feedback_file}")
|
97 |
+
|
98 |
+
# If feedback file exists, update it
|
99 |
+
if feedback_file.exists():
|
100 |
+
with feedback_file.open("r") as f:
|
101 |
+
existing_feedback = json.load(f)
|
102 |
+
# Convert to list if it's a single feedback entry
|
103 |
+
if not isinstance(existing_feedback, list):
|
104 |
+
existing_feedback = [existing_feedback]
|
105 |
+
existing_feedback.append(feedback_data)
|
106 |
+
feedback_data = existing_feedback
|
107 |
+
|
108 |
+
# Write feedback data
|
109 |
+
with feedback_file.open("w") as f:
|
110 |
+
json.dump(feedback_data, f, indent=4)
|
111 |
+
|
112 |
+
|
113 |
+
def save_steps_data(query_id: str, memory: Memory) -> None:
|
114 |
+
"""Save steps data to Huggingface dataset"""
|
115 |
+
steps_file = DATASET_DIR / query_id / "all_steps.json"
|
116 |
+
|
117 |
+
memory_actions = memory.get_actions()
|
118 |
+
memory_actions = make_json_serializable(memory_actions) # NOTE: make the memory actions serializable
|
119 |
+
print("Memory actions: ", memory_actions)
|
120 |
+
|
121 |
+
with steps_file.open("w") as f:
|
122 |
+
json.dump(memory_actions, f, indent=4)
|
123 |
+
|
124 |
+
|
125 |
+
def save_module_data(query_id: str, key: str, value: Any) -> None:
|
126 |
+
"""Save module data to Huggingface dataset"""
|
127 |
+
try:
|
128 |
+
key = key.replace(" ", "_").lower()
|
129 |
+
module_file = DATASET_DIR / query_id / f"{key}.json"
|
130 |
+
value = make_json_serializable(value) # NOTE: make the value serializable
|
131 |
+
with module_file.open("a") as f:
|
132 |
+
json.dump(value, f, indent=4)
|
133 |
+
except Exception as e:
|
134 |
+
print(f"Warning: Failed to save as JSON: {e}")
|
135 |
+
# Fallback to saving as text file
|
136 |
+
text_file = DATASET_DIR / query_id / f"{key}.txt"
|
137 |
+
try:
|
138 |
+
with text_file.open("a") as f:
|
139 |
+
f.write(str(value) + "\n")
|
140 |
+
print(f"Successfully saved as text file: {text_file}")
|
141 |
+
except Exception as e:
|
142 |
+
print(f"Error: Failed to save as text file: {e}")
|
143 |
+
|
144 |
########### End of Test Huggingface Dataset ###########
|
145 |
|
146 |
class Solver:
|
|
|
156 |
verbose: bool = True,
|
157 |
max_steps: int = 10,
|
158 |
max_time: int = 60,
|
159 |
+
query_cache_dir: str = "solver_cache"
|
160 |
):
|
161 |
self.planner = planner
|
162 |
self.memory = memory
|
|
|
167 |
self.verbose = verbose
|
168 |
self.max_steps = max_steps
|
169 |
self.max_time = max_time
|
170 |
+
self.query_cache_dir = query_cache_dir
|
171 |
|
172 |
self.output_types = output_types.lower().split(',')
|
173 |
assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
|
|
|
193 |
# os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
|
194 |
# img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
|
195 |
|
196 |
+
img_path = os.path.join(self.query_cache_dir, 'query_image.jpg')
|
197 |
user_image.save(img_path)
|
198 |
else:
|
199 |
img_path = None
|
200 |
|
201 |
# Set tool cache directory
|
202 |
+
_tool_cache_dir = os.path.join(self.query_cache_dir, "tool_cache") # NOTE: This is the directory for tool cache
|
203 |
+
self.executor.set_query_cache_dir(_tool_cache_dir) # NOTE: set query cache directory
|
204 |
|
205 |
# Step 1: Display the received inputs
|
206 |
if user_image:
|
|
|
229 |
metadata={"title": "🔍 Query Analysis"}))
|
230 |
yield messages
|
231 |
|
232 |
+
# Save the query analysis data
|
233 |
+
query_analysis_data = {
|
234 |
+
"query_analysis": query_analysis,
|
235 |
+
"time": round(time.time() - start_time, 5)
|
236 |
+
}
|
237 |
+
save_module_data(QUERY_ID, "step_0_query_analysis", query_analysis_data)
|
238 |
+
|
239 |
# Step 5: Execution loop (similar to your step-by-step solver)
|
240 |
while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
|
241 |
step_count += 1
|
|
|
249 |
user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
|
250 |
)
|
251 |
context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
|
252 |
+
step_data = {
|
253 |
+
"step_count": step_count,
|
254 |
+
"context": context,
|
255 |
+
"sub_goal": sub_goal,
|
256 |
+
"tool_name": tool_name,
|
257 |
+
"time": round(time.time() - start_time, 5)
|
258 |
+
}
|
259 |
+
save_module_data(QUERY_ID, f"step_{step_count}_action_prediction", step_data)
|
260 |
|
261 |
# Display the step information
|
262 |
messages.append(ChatMessage(
|
|
|
282 |
result = self.executor.execute_tool_command(tool_name, command)
|
283 |
result = make_json_serializable(result)
|
284 |
|
285 |
+
# Save the command generation data
|
286 |
+
command_generation_data = {
|
287 |
+
"explanation": explanation,
|
288 |
+
"command": command,
|
289 |
+
"time": round(time.time() - start_time, 5)
|
290 |
+
}
|
291 |
+
save_module_data(QUERY_ID, f"step_{step_count}_command_generation", command_generation_data)
|
292 |
+
|
293 |
+
# Save the command execution data
|
294 |
+
command_execution_data = {
|
295 |
+
"result": result,
|
296 |
+
"time": round(time.time() - start_time, 5)
|
297 |
+
}
|
298 |
+
save_module_data(QUERY_ID, f"step_{step_count}_command_execution", command_execution_data)
|
299 |
+
|
300 |
messages.append(ChatMessage(
|
301 |
role="assistant",
|
302 |
content=f"{json.dumps(result, indent=4)}",
|
|
|
308 |
stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
|
309 |
conclusion = self.planner.extract_conclusion(stop_verification)
|
310 |
|
311 |
+
# Save the context verification data
|
312 |
+
context_verification_data = {
|
313 |
+
"stop_verification": stop_verification,
|
314 |
+
"conclusion": conclusion,
|
315 |
+
"time": round(time.time() - start_time, 5)
|
316 |
+
}
|
317 |
+
save_module_data(QUERY_ID, f"step_{step_count}_context_verification", context_verification_data)
|
318 |
+
|
319 |
messages.append(ChatMessage(
|
320 |
role="assistant",
|
321 |
content=f"🛑 Step {step_count} Conclusion: {conclusion}"))
|
|
|
330 |
messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
|
331 |
yield messages
|
332 |
|
333 |
+
# Save the final output data
|
334 |
+
final_output_data = {
|
335 |
+
"final_output": final_output,
|
336 |
+
"time": round(time.time() - start_time, 5)
|
337 |
+
}
|
338 |
+
save_module_data(QUERY_ID, "final_output", final_output_data)
|
339 |
+
|
340 |
if 'direct' in self.output_types:
|
341 |
direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
|
342 |
messages.append(ChatMessage(role="assistant", content=f"🔹 Direct Output:\n{direct_output}"))
|
343 |
yield messages
|
344 |
|
345 |
+
# Save the direct output data
|
346 |
+
direct_output_data = {
|
347 |
+
"direct_output": direct_output,
|
348 |
+
"time": round(time.time() - start_time, 5)
|
349 |
+
}
|
350 |
+
save_module_data(QUERY_ID, "direct_output", direct_output_data)
|
351 |
+
|
352 |
# Step 8: Completion Message
|
353 |
messages.append(ChatMessage(role="assistant", content="✅ Problem-solving process completed."))
|
354 |
yield messages
|
355 |
+
|
356 |
|
357 |
def parse_arguments():
|
358 |
parser = argparse.ArgumentParser(description="Run the OctoTools demo with specified parameters.")
|
|
|
366 |
help="Comma-separated list of required outputs (base,final,direct)"
|
367 |
)
|
368 |
parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
|
369 |
+
parser.add_argument("--root_cache_dir", default="solver_cache", help="Path to solver cache directory.")
|
370 |
+
parser.add_argument("--query_id", default=None, help="Query ID.")
|
371 |
parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
|
372 |
|
373 |
# NOTE: Add new arguments
|
|
|
382 |
Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
|
383 |
"""
|
384 |
|
385 |
+
# Generate Unique Query ID (Date and first 8 characters of UUID)
|
386 |
query_id = time.strftime("%Y%m%d_%H%M%S") + "_" + str(uuid.uuid4())[:8] # e.g, 20250217_062225_612f2474
|
387 |
print(f"Query ID: {query_id}")
|
388 |
|
389 |
+
# NOTE: update the global variable to save the query ID
|
390 |
+
global QUERY_ID
|
391 |
+
QUERY_ID = query_id
|
392 |
+
|
393 |
# Create a directory for the query ID
|
394 |
+
query_cache_dir = os.path.join(DATASET_DIR.name, query_id) # NOTE
|
395 |
+
os.makedirs(query_cache_dir, exist_ok=True)
|
|
|
396 |
|
397 |
if api_key is None:
|
398 |
return [["assistant", "⚠️ Error: OpenAI API Key is required."]]
|
399 |
|
400 |
+
# Save the query data
|
401 |
+
save_query_data(
|
402 |
+
query_id=query_id,
|
403 |
+
query=user_query,
|
404 |
+
image_path=os.path.join(query_cache_dir, 'query_image.jpg') if user_image else None
|
405 |
+
)
|
406 |
+
|
407 |
# # Initialize Tools
|
408 |
# enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
|
409 |
|
|
|
431 |
# Instantiate Executor
|
432 |
executor = Executor(
|
433 |
llm_engine_name=llm_model_engine,
|
434 |
+
query_cache_dir=query_cache_dir, # NOTE
|
435 |
enable_signal=False,
|
436 |
api_key=api_key
|
437 |
)
|
|
|
447 |
verbose=args.verbose,
|
448 |
max_steps=max_steps,
|
449 |
max_time=max_time,
|
450 |
+
query_cache_dir=query_cache_dir # NOTE
|
451 |
)
|
452 |
|
453 |
if solver is None:
|
454 |
return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
|
455 |
|
456 |
+
|
457 |
messages = [] # Initialize message list
|
458 |
for message_batch in solver.stream_solve_user_problem(user_query, user_image, api_key, messages):
|
459 |
yield [msg for msg in message_batch] # Ensure correct format for Gradio Chatbot
|
460 |
|
461 |
+
# Save steps
|
462 |
+
save_steps_data(
|
463 |
+
query_id=query_id,
|
464 |
+
memory=memory
|
465 |
+
)
|
466 |
+
|
467 |
|
468 |
def main(args):
|
469 |
#################### Gradio Interface ####################
|
|
|
479 |
|
480 |
[Website](https://octotools.github.io/) |
|
481 |
[Github](https://github.com/octotools/octotools) |
|
482 |
+
[arXiv](https://arxiv.org/abs/2502.11271) |
|
483 |
+
[Paper](https://arxiv.org/pdf/2502.11271) |
|
484 |
[Tool Cards](https://octotools.github.io/#tool-cards) |
|
485 |
[Example Visualizations](https://octotools.github.io/#visualization) |
|
486 |
[Discord](https://discord.gg/NMJx66DC)
|
|
|
578 |
|
579 |
# Update the button click handlers
|
580 |
upvote_btn.click(
|
581 |
+
fn=lambda: save_feedback(QUERY_ID, "upvote"),
|
582 |
inputs=[],
|
583 |
outputs=[]
|
584 |
)
|
585 |
|
586 |
downvote_btn.click(
|
587 |
+
fn=lambda: save_feedback(QUERY_ID, "downvote"),
|
588 |
inputs=[],
|
589 |
outputs=[]
|
590 |
)
|
591 |
|
592 |
# Add handler for comment submission
|
593 |
comment_textbox.submit(
|
594 |
+
fn=lambda comment: save_feedback(QUERY_ID, "comment", comment),
|
595 |
inputs=[comment_textbox],
|
596 |
outputs=[]
|
597 |
)
|
|
|
635 |
if __name__ == "__main__":
|
636 |
args = parse_arguments()
|
637 |
|
|
|
|
|
|
|
638 |
# All tools
|
639 |
all_tools = [
|
640 |
"Generalist_Solution_Generator_Tool",
|
|
|
655 |
]
|
656 |
args.enabled_tools = ",".join(all_tools)
|
657 |
|
658 |
+
# NOTE: Use the same name for the query cache directory as the dataset directory
|
659 |
+
args.root_cache_dir = DATASET_DIR.name
|
660 |
main(args)
|
661 |
|
demo_solver_cache/20250217_062225_8ce3e482/query_image.jpg
DELETED
Binary file (42 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_1.png
DELETED
Binary file (7.75 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_10.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_11.png
DELETED
Binary file (7.77 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_12.png
DELETED
Binary file (7.71 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_13.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_14.png
DELETED
Binary file (7.47 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_15.png
DELETED
Binary file (8.05 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_16.png
DELETED
Binary file (7.86 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_17.png
DELETED
Binary file (7.88 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_18.png
DELETED
Binary file (7.76 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_19.png
DELETED
Binary file (8.02 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_2.png
DELETED
Binary file (7.65 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_20.png
DELETED
Binary file (8.03 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_3.png
DELETED
Binary file (7.92 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_4.png
DELETED
Binary file (7.71 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_5.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_6.png
DELETED
Binary file (7.82 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_7.png
DELETED
Binary file (7.53 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_8.png
DELETED
Binary file (7.67 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_9.png
DELETED
Binary file (7.41 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/user_feedback.json
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"timestamp": "20250217_062307",
|
4 |
-
"feedback_type": "upvote",
|
5 |
-
"comment": null
|
6 |
-
},
|
7 |
-
{
|
8 |
-
"timestamp": "20250217_062315",
|
9 |
-
"feedback_type": "downvote",
|
10 |
-
"comment": null
|
11 |
-
},
|
12 |
-
{
|
13 |
-
"timestamp": "20250217_062322",
|
14 |
-
"feedback_type": "upvote",
|
15 |
-
"comment": null
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"timestamp": "20250217_062333",
|
19 |
-
"feedback_type": "It is helpful!",
|
20 |
-
"comment": null
|
21 |
-
}
|
22 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo_solver_cache/20250217_063316_09285db1/query_image.jpg
DELETED
Binary file (42 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_1.png
DELETED
Binary file (7.75 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_10.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_11.png
DELETED
Binary file (7.77 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_12.png
DELETED
Binary file (7.71 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_13.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_14.png
DELETED
Binary file (7.47 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_15.png
DELETED
Binary file (8.05 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_16.png
DELETED
Binary file (7.86 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_17.png
DELETED
Binary file (7.88 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_18.png
DELETED
Binary file (7.76 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_19.png
DELETED
Binary file (8.02 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_2.png
DELETED
Binary file (7.65 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_20.png
DELETED
Binary file (8.03 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_3.png
DELETED
Binary file (7.92 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_4.png
DELETED
Binary file (7.71 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_5.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_6.png
DELETED
Binary file (7.82 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_7.png
DELETED
Binary file (7.53 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_8.png
DELETED
Binary file (7.67 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_9.png
DELETED
Binary file (7.41 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/user_feedback.json
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"timestamp": "20250217_063350",
|
4 |
-
"feedback_type": "upvote",
|
5 |
-
"comment": null
|
6 |
-
},
|
7 |
-
{
|
8 |
-
"timestamp": "20250217_063359",
|
9 |
-
"feedback_type": "Thanks! It is interesting!",
|
10 |
-
"comment": null
|
11 |
-
}
|
12 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo_solver_cache/20250217_183323_b0e58b32/query_image.jpg
DELETED
Binary file (42 kB)
|
|
demo_solver_cache/20250217_183323_b0e58b32/user_feedback.json
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"timestamp": "20250217_190313",
|
4 |
-
"feedback_type": "upvote",
|
5 |
-
"comment": null
|
6 |
-
},
|
7 |
-
{
|
8 |
-
"timestamp": "20250217_190319",
|
9 |
-
"feedback_type": "downvote",
|
10 |
-
"comment": null
|
11 |
-
},
|
12 |
-
{
|
13 |
-
"timestamp": "20250217_190321",
|
14 |
-
"feedback_type": "upvote",
|
15 |
-
"comment": null
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"timestamp": "20250217_190322",
|
19 |
-
"feedback_type": "downvote",
|
20 |
-
"comment": null
|
21 |
-
},
|
22 |
-
{
|
23 |
-
"timestamp": "20250217_190338",
|
24 |
-
"feedback_type": "Thanks! It is interesting!",
|
25 |
-
"comment": null
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"timestamp": "20250217_190341",
|
29 |
-
"feedback_type": "Thanks! It is interesting!",
|
30 |
-
"comment": null
|
31 |
-
}
|
32 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
feedback_dataset/feedback-20250217_212246.json
DELETED
File without changes
|
feedback_dataset/feedback-20250217_212401.json
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212450"}
|
2 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212452"}
|
3 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "It is good!", "comment": null, "datetime": "20250217_212459"}
|
4 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212523"}
|
5 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212524"}
|
6 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212524"}
|
7 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "downvote", "comment": null, "datetime": "20250217_212524"}
|
8 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "It is good!", "comment": null, "datetime": "20250217_212526"}
|
9 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212619"}
|
10 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "It is good!", "comment": null, "datetime": "20250217_212650"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|