Commit
·
3b1cf79
1
Parent(s):
4544df4
update latest data
Browse files- data/data_viewer.jsonl +2 -2
- data/fact_results/{doubao-deepresearch → doubao_deep_research_20250630}/fact_result.txt +0 -0
- data/leaderboard.csv +2 -2
- data/raw_data/kimi-researcher.jsonl +2 -2
- data/raw_results/kimi-researcher/race_result.txt +5 -5
- data/raw_results/kimi-researcher/raw_results.jsonl +2 -2
data/data_viewer.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffd1a662c13226e50fe9e690d43e4575ba524e6e73f77d4195d4f012adcee642
|
3 |
+
size 42227460
|
data/fact_results/{doubao-deepresearch → doubao_deep_research_20250630}/fact_result.txt
RENAMED
File without changes
|
data/leaderboard.csv
CHANGED
@@ -2,8 +2,8 @@ model,overall_score,comprehensiveness,insight,instruction_following,readability,
|
|
2 |
gemini-2.5-pro-deepresearch,48.92,48.45,48.30,49.29,49.77,78.30,165.34
|
3 |
openai-deepresearch,46.45,46.46,43.73,49.39,47.22,75.01,39.79
|
4 |
claude-research,45.00,45.34,42.79,47.58,44.66,-,-
|
5 |
-
|
6 |
-
|
7 |
perplexity-Research,40.46,39.10,35.65,46.11,43.08,82.63,31.20
|
8 |
grok-deeper-search,38.22,36.08,30.89,46.59,42.17,73.08,8.58
|
9 |
sonar-reasoning-pro,37.76,34.96,31.65,44.93,42.42,45.19,9.39
|
|
|
2 |
gemini-2.5-pro-deepresearch,48.92,48.45,48.30,49.29,49.77,78.30,165.34
|
3 |
openai-deepresearch,46.45,46.46,43.73,49.39,47.22,75.01,39.79
|
4 |
claude-research,45.00,45.34,42.79,47.58,44.66,-,-
|
5 |
+
kimi-researcher,44.64,44.96,41.97,47.14,45.59,-,-
|
6 |
+
doubao-deepresearch,44.34,44.84,40.56,47.95,44.69,-,-
|
7 |
perplexity-Research,40.46,39.10,35.65,46.11,43.08,82.63,31.20
|
8 |
grok-deeper-search,38.22,36.08,30.89,46.59,42.17,73.08,8.58
|
9 |
sonar-reasoning-pro,37.76,34.96,31.65,44.93,42.42,45.19,9.39
|
data/raw_data/kimi-researcher.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d3d49b2ef4a8aaa41e48a2e1d5b19dbf270eea29f4f06b8c01ced7c8b8f9830
|
3 |
+
size 5166726
|
data/raw_results/kimi-researcher/race_result.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
Comprehensiveness: 0.
|
2 |
-
Insight: 0.
|
3 |
-
Instruction Following: 0.
|
4 |
-
Readability: 0.
|
5 |
-
Overall Score: 0.
|
|
|
1 |
+
Comprehensiveness: 0.4496
|
2 |
+
Insight: 0.4197
|
3 |
+
Instruction Following: 0.4714
|
4 |
+
Readability: 0.4559
|
5 |
+
Overall Score: 0.4464
|
data/raw_results/kimi-researcher/raw_results.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:008a4e5d05c8524174eaf672316936a94c39554c55d3df106622d1a28f344e2c
|
3 |
+
size 52426
|