dung-vpt-uney commited on
Commit
9c4a163
·
1 Parent(s): c3e1463

Deploy latest CoRGI Gradio demo

Browse files
PROGRESS_LOG.md CHANGED
@@ -14,7 +14,7 @@
14
  - Introduced structured logging for the app (`app.py`) and pipeline execution to trace model loads, cache hits, and Gradio lifecycle events on Spaces.
15
  - Reworked the Gradio UI to show per-step panels with annotated evidence galleries, giving each CoRGI reasoning step its own window alongside the final synthesized answer.
16
  - Preloaded the default Qwen3-VL model/tokenizer at import so Spaces load the GPU weights before serving requests.
17
- - Switched inference to bfloat16, tightened defaults (max steps/regions = 3), and moved the @spaces.GPU decorator down to the raw `_chat` call so each generation stays within the 120 s ZeroGPU budget.
18
 
19
  ## 2024-10-21
20
  - Updated default checkpoints to `Qwen/Qwen3-VL-8B-Thinking` and verified CLI/Gradio/test coverage.
 
14
  - Introduced structured logging for the app (`app.py`) and pipeline execution to trace model loads, cache hits, and Gradio lifecycle events on Spaces.
15
  - Reworked the Gradio UI to show per-step panels with annotated evidence galleries, giving each CoRGI reasoning step its own window alongside the final synthesized answer.
16
  - Preloaded the default Qwen3-VL model/tokenizer at import so Spaces load the GPU weights before serving requests.
17
+ - Switched inference to bfloat16, tightened defaults (max steps/regions = 3), added per-stage timers, and moved the @spaces.GPU decorator down to the raw `_chat` call so each generation stays within the 120 s ZeroGPU budget.
18
 
19
  ## 2024-10-21
20
  - Updated default checkpoints to `Qwen/Qwen3-VL-8B-Thinking` and verified CLI/Gradio/test coverage.
README.md CHANGED
@@ -47,3 +47,4 @@ python app.py
47
  - **ROI Extraction**: Shows the source image with every grounded bounding box plus per-evidence crops, and lists the prompts used for each verification step.
48
  - **Evidence Descriptions**: Summarises each grounded region (bbox, description, confidence) with the associated ROI prompts.
49
  - **Answer Synthesis**: Highlights the final answer, supporting context, and the synthesis prompt/response pair.
 
 
47
  - **ROI Extraction**: Shows the source image with every grounded bounding box plus per-evidence crops, and lists the prompts used for each verification step.
48
  - **Evidence Descriptions**: Summarises each grounded region (bbox, description, confidence) with the associated ROI prompts.
49
  - **Answer Synthesis**: Highlights the final answer, supporting context, and the synthesis prompt/response pair.
50
+ - **Performance**: Reports per-stage timings (reasoning, ROI extraction, synthesis) plus overall latency so you can monitor ZeroGPU runtime limits.
corgi/__pycache__/gradio_app.cpython-313.pyc CHANGED
Binary files a/corgi/__pycache__/gradio_app.cpython-313.pyc and b/corgi/__pycache__/gradio_app.cpython-313.pyc differ
 
corgi/__pycache__/pipeline.cpython-313.pyc CHANGED
Binary files a/corgi/__pycache__/pipeline.cpython-313.pyc and b/corgi/__pycache__/pipeline.cpython-313.pyc differ
 
corgi/__pycache__/types.cpython-313.pyc CHANGED
Binary files a/corgi/__pycache__/types.cpython-313.pyc and b/corgi/__pycache__/types.cpython-313.pyc differ
 
corgi/gradio_app.py CHANGED
@@ -158,6 +158,7 @@ def _empty_ui_payload(message: str) -> Dict[str, object]:
158
  "evidence_prompt": placeholder_prompt,
159
  "answer_process_markdown": message,
160
  "answer_prompt": placeholder_prompt,
 
161
  }
162
 
163
 
@@ -270,6 +271,20 @@ def _prepare_ui_payload(
270
  ]
271
  answer_process_markdown = "\n".join(answer_process_lines)
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  return {
274
  "answer_markdown": answer_text,
275
  "chain_markdown": chain_markdown,
@@ -281,6 +296,7 @@ def _prepare_ui_payload(
281
  "evidence_prompt": evidence_prompt_md,
282
  "answer_process_markdown": answer_process_markdown,
283
  "answer_prompt": answer_prompt_md,
 
284
  }
285
 
286
 
@@ -456,6 +472,8 @@ def build_demo(
456
  with gr.Tab("Answer Synthesis"):
457
  answer_process_markdown = gr.Markdown("_No answer generated yet._")
458
  answer_prompt_markdown = gr.Markdown("```text\nAwaiting answer prompt...\n```")
 
 
459
 
460
  def _on_submit(state_data, image, question, model_id, max_steps, max_regions):
461
  pipeline_state = state_data if isinstance(state_data, PipelineState) else None
@@ -479,6 +497,7 @@ def build_demo(
479
  payload["evidence_prompt"],
480
  payload["answer_process_markdown"],
481
  payload["answer_prompt"],
 
482
  ]
483
 
484
  output_components = [
@@ -493,6 +512,7 @@ def build_demo(
493
  evidence_prompt_markdown,
494
  answer_process_markdown,
495
  answer_prompt_markdown,
 
496
  ]
497
 
498
  run_button.click(
 
158
  "evidence_prompt": placeholder_prompt,
159
  "answer_process_markdown": message,
160
  "answer_prompt": placeholder_prompt,
161
+ "timing_markdown": message,
162
  }
163
 
164
 
 
271
  ]
272
  answer_process_markdown = "\n".join(answer_process_lines)
273
 
274
+ timing_lines: List[str] = []
275
+ if result.timings:
276
+ total_entry = next((t for t in result.timings if t.name == "total_pipeline"), None)
277
+ if total_entry:
278
+ timing_lines.append(f"**Total pipeline:** {total_entry.duration_ms/1000:.2f} s")
279
+ for timing in result.timings:
280
+ if timing.name == "total_pipeline":
281
+ continue
282
+ label = timing.name.replace("_", " ")
283
+ if timing.step_index is not None:
284
+ label += f" (step {timing.step_index})"
285
+ timing_lines.append(f"- {label}: {timing.duration_ms/1000:.2f} s")
286
+ timing_markdown = "\n".join(timing_lines) if timing_lines else "_No timing data available._"
287
+
288
  return {
289
  "answer_markdown": answer_text,
290
  "chain_markdown": chain_markdown,
 
296
  "evidence_prompt": evidence_prompt_md,
297
  "answer_process_markdown": answer_process_markdown,
298
  "answer_prompt": answer_prompt_md,
299
+ "timing_markdown": timing_markdown,
300
  }
301
 
302
 
 
472
  with gr.Tab("Answer Synthesis"):
473
  answer_process_markdown = gr.Markdown("_No answer generated yet._")
474
  answer_prompt_markdown = gr.Markdown("```text\nAwaiting answer prompt...\n```")
475
+ with gr.Tab("Performance"):
476
+ timing_markdown = gr.Markdown("_No timing data available._")
477
 
478
  def _on_submit(state_data, image, question, model_id, max_steps, max_regions):
479
  pipeline_state = state_data if isinstance(state_data, PipelineState) else None
 
497
  payload["evidence_prompt"],
498
  payload["answer_process_markdown"],
499
  payload["answer_prompt"],
500
+ payload["timing_markdown"],
501
  ]
502
 
503
  output_components = [
 
512
  evidence_prompt_markdown,
513
  answer_process_markdown,
514
  answer_prompt_markdown,
515
+ timing_markdown,
516
  ]
517
 
518
  run_button.click(
corgi/pipeline.py CHANGED
@@ -3,14 +3,18 @@ from __future__ import annotations
3
  from dataclasses import dataclass, field
4
  from typing import List, Optional, Protocol
5
 
 
 
6
  from PIL import Image
7
 
8
  from .types import (
9
  GroundedEvidence,
10
  PromptLog,
11
  ReasoningStep,
 
12
  evidences_to_serializable,
13
  prompt_logs_to_serializable,
 
14
  steps_to_serializable,
15
  )
16
 
@@ -58,6 +62,8 @@ class PipelineResult:
58
  reasoning_log: Optional[PromptLog] = None
59
  grounding_logs: List[PromptLog] = field(default_factory=list)
60
  answer_log: Optional[PromptLog] = None
 
 
61
 
62
  def to_json(self) -> dict:
63
  payload = {
@@ -65,6 +71,7 @@ class PipelineResult:
65
  "steps": steps_to_serializable(self.steps),
66
  "evidence": evidences_to_serializable(self.evidence),
67
  "answer": self.answer,
 
68
  }
69
  reasoning_entries = (
70
  prompt_logs_to_serializable([self.reasoning_log]) if self.reasoning_log else []
@@ -73,6 +80,7 @@ class PipelineResult:
73
  payload["reasoning_log"] = reasoning_entries[0]
74
 
75
  payload["grounding_logs"] = prompt_logs_to_serializable(self.grounding_logs)
 
76
 
77
  answer_entries = prompt_logs_to_serializable([self.answer_log]) if self.answer_log else []
78
  if answer_entries:
@@ -97,21 +105,37 @@ class CoRGIPipeline:
97
  max_regions: int = 3,
98
  ) -> PipelineResult:
99
  self._vlm.reset_logs()
 
 
 
 
100
  steps = self._vlm.structured_reasoning(image=image, question=question, max_steps=max_steps)
 
 
 
101
  evidences: List[GroundedEvidence] = []
102
  for step in steps:
103
  if not step.needs_vision:
104
  continue
 
 
105
  step_evs = self._vlm.extract_step_evidence(
106
  image=image,
107
  question=question,
108
  step=step,
109
  max_regions=max_regions,
110
  )
 
 
111
  if not step_evs:
112
  continue
113
  evidences.extend(step_evs[:max_regions])
 
114
  answer = self._vlm.synthesize_answer(image=image, question=question, steps=steps, evidences=evidences)
 
 
 
 
115
  return PipelineResult(
116
  question=question,
117
  steps=steps,
@@ -120,6 +144,8 @@ class CoRGIPipeline:
120
  reasoning_log=self._vlm.reasoning_log,
121
  grounding_logs=list(self._vlm.grounding_logs),
122
  answer_log=self._vlm.answer_log,
 
 
123
  )
124
 
125
 
 
3
  from dataclasses import dataclass, field
4
  from typing import List, Optional, Protocol
5
 
6
+ import time
7
+
8
  from PIL import Image
9
 
10
  from .types import (
11
  GroundedEvidence,
12
  PromptLog,
13
  ReasoningStep,
14
+ StageTiming,
15
  evidences_to_serializable,
16
  prompt_logs_to_serializable,
17
+ stage_timings_to_serializable,
18
  steps_to_serializable,
19
  )
20
 
 
62
  reasoning_log: Optional[PromptLog] = None
63
  grounding_logs: List[PromptLog] = field(default_factory=list)
64
  answer_log: Optional[PromptLog] = None
65
+ timings: List[StageTiming] = field(default_factory=list)
66
+ total_duration_ms: float = 0.0
67
 
68
  def to_json(self) -> dict:
69
  payload = {
 
71
  "steps": steps_to_serializable(self.steps),
72
  "evidence": evidences_to_serializable(self.evidence),
73
  "answer": self.answer,
74
+ "total_duration_ms": self.total_duration_ms,
75
  }
76
  reasoning_entries = (
77
  prompt_logs_to_serializable([self.reasoning_log]) if self.reasoning_log else []
 
80
  payload["reasoning_log"] = reasoning_entries[0]
81
 
82
  payload["grounding_logs"] = prompt_logs_to_serializable(self.grounding_logs)
83
+ payload["timings"] = stage_timings_to_serializable(self.timings)
84
 
85
  answer_entries = prompt_logs_to_serializable([self.answer_log]) if self.answer_log else []
86
  if answer_entries:
 
105
  max_regions: int = 3,
106
  ) -> PipelineResult:
107
  self._vlm.reset_logs()
108
+ timings: List[StageTiming] = []
109
+ total_start = time.monotonic()
110
+
111
+ reasoning_start = time.monotonic()
112
  steps = self._vlm.structured_reasoning(image=image, question=question, max_steps=max_steps)
113
+ reasoning_duration = (time.monotonic() - reasoning_start) * 1000.0
114
+ timings.append(StageTiming(name="structured_reasoning", duration_ms=reasoning_duration))
115
+
116
  evidences: List[GroundedEvidence] = []
117
  for step in steps:
118
  if not step.needs_vision:
119
  continue
120
+ stage_name = f"roi_step_{step.index}"
121
+ grounding_start = time.monotonic()
122
  step_evs = self._vlm.extract_step_evidence(
123
  image=image,
124
  question=question,
125
  step=step,
126
  max_regions=max_regions,
127
  )
128
+ grounding_duration = (time.monotonic() - grounding_start) * 1000.0
129
+ timings.append(StageTiming(name=stage_name, duration_ms=grounding_duration, step_index=step.index))
130
  if not step_evs:
131
  continue
132
  evidences.extend(step_evs[:max_regions])
133
+ answer_start = time.monotonic()
134
  answer = self._vlm.synthesize_answer(image=image, question=question, steps=steps, evidences=evidences)
135
+ answer_duration = (time.monotonic() - answer_start) * 1000.0
136
+ timings.append(StageTiming(name="answer_synthesis", duration_ms=answer_duration))
137
+ total_duration = (time.monotonic() - total_start) * 1000.0
138
+ timings.append(StageTiming(name="total_pipeline", duration_ms=total_duration))
139
  return PipelineResult(
140
  question=question,
141
  steps=steps,
 
144
  reasoning_log=self._vlm.reasoning_log,
145
  grounding_logs=list(self._vlm.grounding_logs),
146
  answer_log=self._vlm.answer_log,
147
+ timings=timings,
148
+ total_duration_ms=total_duration,
149
  )
150
 
151
 
corgi/types.py CHANGED
@@ -38,6 +38,15 @@ class PromptLog:
38
  stage: Optional[str] = None
39
 
40
 
 
 
 
 
 
 
 
 
 
41
  def steps_to_serializable(steps: List[ReasoningStep]) -> List[Dict[str, object]]:
42
  """Helper to convert steps into JSON-friendly dictionaries."""
43
 
@@ -85,3 +94,16 @@ def prompt_logs_to_serializable(logs: List[PromptLog]) -> List[Dict[str, object]
85
  item["stage"] = log.stage
86
  serializable.append(item)
87
  return serializable
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  stage: Optional[str] = None
39
 
40
 
41
+ @dataclass(frozen=True)
42
+ class StageTiming:
43
+ """Timing metadata for a pipeline stage or sub-step."""
44
+
45
+ name: str
46
+ duration_ms: float
47
+ step_index: Optional[int] = None
48
+
49
+
50
  def steps_to_serializable(steps: List[ReasoningStep]) -> List[Dict[str, object]]:
51
  """Helper to convert steps into JSON-friendly dictionaries."""
52
 
 
94
  item["stage"] = log.stage
95
  serializable.append(item)
96
  return serializable
97
+
98
+
99
+ def stage_timings_to_serializable(timings: List[StageTiming]) -> List[Dict[str, object]]:
100
+ serializable: List[Dict[str, object]] = []
101
+ for timing in timings:
102
+ item: Dict[str, object] = {
103
+ "name": timing.name,
104
+ "duration_ms": timing.duration_ms,
105
+ }
106
+ if timing.step_index is not None:
107
+ item["step_index"] = timing.step_index
108
+ serializable.append(item)
109
+ return serializable