Spaces:
Running
Running
add truth data viewer
Browse files- app.py +4 -3
- data_utils.py +4 -1
- eval_utils.py +1 -1
app.py
CHANGED
|
@@ -104,7 +104,8 @@ def sample_explore_item(model_name, size_H, size_W):
|
|
| 104 |
puzzle_solved = explore_item['correct_cells'] == explore_item['total_cells']
|
| 105 |
cell_acc = explore_item["correct_cells"] / explore_item["total_cells"] * 100
|
| 106 |
model_eval_md = f"### π Evaluation:\n\n **Total Cells**: {explore_item['total_cells']} | **Correct Cells**: {explore_item['correct_cells']} | **Puzzle solved**: {puzzle_solved} | **Cell Acc**: {cell_acc:.2f}%"
|
| 107 |
-
|
|
|
|
| 108 |
|
| 109 |
|
| 110 |
def _tab_explore():
|
|
@@ -124,11 +125,11 @@ def _tab_explore():
|
|
| 124 |
puzzle_md = gr.Markdown("### π¦ Puzzle: \n\nTo be loaded", elem_id="puzzle-md", elem_classes="box_md")
|
| 125 |
model_reasoning_md = gr.Markdown("### π€ Reasoning: \n\nTo be loaded", elem_id="model-reasoning-md", elem_classes="box_md")
|
| 126 |
model_prediction_md = gr.Markdown("### π¬ Answer: \n\nTo be loaded", elem_id="model-prediction-md", elem_classes="box_md")
|
|
|
|
| 127 |
model_eval_md = gr.Markdown("### π Evaluation: \n\nTo be loaded", elem_id="model-eval-md", elem_classes="box_md")
|
| 128 |
-
|
| 129 |
explore_button.click(fn=sample_explore_item,
|
| 130 |
inputs=[model_selection, size_H_selection, size_W_selection],
|
| 131 |
-
outputs=[puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md])
|
| 132 |
|
| 133 |
|
| 134 |
|
|
|
|
| 104 |
puzzle_solved = explore_item['correct_cells'] == explore_item['total_cells']
|
| 105 |
cell_acc = explore_item["correct_cells"] / explore_item["total_cells"] * 100
|
| 106 |
model_eval_md = f"### π Evaluation:\n\n **Total Cells**: {explore_item['total_cells']} | **Correct Cells**: {explore_item['correct_cells']} | **Puzzle solved**: {puzzle_solved} | **Cell Acc**: {cell_acc:.2f}%"
|
| 107 |
+
turht_solution_md = f"### β
Truth Solution:\n\n{explore_item['truth_solution_table']}"
|
| 108 |
+
return puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md, turht_solution_md
|
| 109 |
|
| 110 |
|
| 111 |
def _tab_explore():
|
|
|
|
| 125 |
puzzle_md = gr.Markdown("### π¦ Puzzle: \n\nTo be loaded", elem_id="puzzle-md", elem_classes="box_md")
|
| 126 |
model_reasoning_md = gr.Markdown("### π€ Reasoning: \n\nTo be loaded", elem_id="model-reasoning-md", elem_classes="box_md")
|
| 127 |
model_prediction_md = gr.Markdown("### π¬ Answer: \n\nTo be loaded", elem_id="model-prediction-md", elem_classes="box_md")
|
| 128 |
+
turht_solution_md = gr.Markdown("### β
Truth Solution: \n\nTo be loaded", elem_id="truth-solution-md", elem_classes="box_md")
|
| 129 |
model_eval_md = gr.Markdown("### π Evaluation: \n\nTo be loaded", elem_id="model-eval-md", elem_classes="box_md")
|
|
|
|
| 130 |
explore_button.click(fn=sample_explore_item,
|
| 131 |
inputs=[model_selection, size_H_selection, size_W_selection],
|
| 132 |
+
outputs=[puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md, turht_solution_md])
|
| 133 |
|
| 134 |
|
| 135 |
|
data_utils.py
CHANGED
|
@@ -92,6 +92,8 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
|
|
| 92 |
continue
|
| 93 |
if "child" in item["puzzle"].lower() or "mother" in item["puzzle"].lower():
|
| 94 |
continue
|
|
|
|
|
|
|
| 95 |
prediction_reasoning = prediction_json.get("reasoning", "")
|
| 96 |
prediction_table = prediction_json["solution"]
|
| 97 |
if prediction_table is not None:
|
|
@@ -120,10 +122,11 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
|
|
| 120 |
table_md = tabulate(rows, headers=headers, tablefmt="github")
|
| 121 |
explore_item["solution_table_md"] = table_md
|
| 122 |
|
| 123 |
-
this_total_cells, this_correct_cells = eval_each_puzzle(explore_item["id"], prediction_table)
|
| 124 |
# print(table_md)
|
| 125 |
explore_item["correct_cells"] = this_correct_cells
|
| 126 |
explore_item["total_cells"] = this_total_cells
|
|
|
|
| 127 |
return explore_item
|
| 128 |
|
| 129 |
|
|
|
|
| 92 |
continue
|
| 93 |
if "child" in item["puzzle"].lower() or "mother" in item["puzzle"].lower():
|
| 94 |
continue
|
| 95 |
+
if "loves the spaghetti eater" in item["puzzle"].lower():
|
| 96 |
+
continue
|
| 97 |
prediction_reasoning = prediction_json.get("reasoning", "")
|
| 98 |
prediction_table = prediction_json["solution"]
|
| 99 |
if prediction_table is not None:
|
|
|
|
| 122 |
table_md = tabulate(rows, headers=headers, tablefmt="github")
|
| 123 |
explore_item["solution_table_md"] = table_md
|
| 124 |
|
| 125 |
+
this_total_cells, this_correct_cells, truth_solution_table = eval_each_puzzle(explore_item["id"], prediction_table)
|
| 126 |
# print(table_md)
|
| 127 |
explore_item["correct_cells"] = this_correct_cells
|
| 128 |
explore_item["total_cells"] = this_total_cells
|
| 129 |
+
explore_item["truth_solution_table"] = tabulate(truth_solution_table["rows"], headers=truth_solution_table["header"], tablefmt="github")
|
| 130 |
return explore_item
|
| 131 |
|
| 132 |
|
eval_utils.py
CHANGED
|
@@ -83,7 +83,7 @@ def eval_each_puzzle(id, prediction_table):
|
|
| 83 |
predicted_cell = prediction_table[house][column].lower().strip()
|
| 84 |
if truth_cell == predicted_cell:
|
| 85 |
this_correct_cells += 1
|
| 86 |
-
return this_total_cells, this_correct_cells
|
| 87 |
|
| 88 |
def eval_model(model, filepath):
|
| 89 |
global private_solutions
|
|
|
|
| 83 |
predicted_cell = prediction_table[house][column].lower().strip()
|
| 84 |
if truth_cell == predicted_cell:
|
| 85 |
this_correct_cells += 1
|
| 86 |
+
return this_total_cells, this_correct_cells, private_solutions[id]
|
| 87 |
|
| 88 |
def eval_model(model, filepath):
|
| 89 |
global private_solutions
|