Ayanami0730 commited on
Commit
3b1cf79
·
1 Parent(s): 4544df4

update latest data

Browse files
data/data_viewer.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:647067a9eec626525fa41f257123b5b35f9daf6e9862467e9dc259f987ce621f
3
- size 40834049
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd1a662c13226e50fe9e690d43e4575ba524e6e73f77d4195d4f012adcee642
3
+ size 42227460
data/fact_results/{doubao-deepresearch → doubao_deep_research_20250630}/fact_result.txt RENAMED
File without changes
data/leaderboard.csv CHANGED
@@ -2,8 +2,8 @@ model,overall_score,comprehensiveness,insight,instruction_following,readability,
2
  gemini-2.5-pro-deepresearch,48.92,48.45,48.30,49.29,49.77,78.30,165.34
3
  openai-deepresearch,46.45,46.46,43.73,49.39,47.22,75.01,39.79
4
  claude-research,45.00,45.34,42.79,47.58,44.66,-,-
5
- doubao-deepresearch,44.34,44.84,40.56,47.95,44.69,52.86,52.62
6
- kimi-researcher,42.69,42.82,39.40,45.30,44.68,-,-
7
  perplexity-Research,40.46,39.10,35.65,46.11,43.08,82.63,31.20
8
  grok-deeper-search,38.22,36.08,30.89,46.59,42.17,73.08,8.58
9
  sonar-reasoning-pro,37.76,34.96,31.65,44.93,42.42,45.19,9.39
 
2
  gemini-2.5-pro-deepresearch,48.92,48.45,48.30,49.29,49.77,78.30,165.34
3
  openai-deepresearch,46.45,46.46,43.73,49.39,47.22,75.01,39.79
4
  claude-research,45.00,45.34,42.79,47.58,44.66,-,-
5
+ kimi-researcher,44.64,44.96,41.97,47.14,45.59,-,-
6
+ doubao-deepresearch,44.34,44.84,40.56,47.95,44.69,-,-
7
  perplexity-Research,40.46,39.10,35.65,46.11,43.08,82.63,31.20
8
  grok-deeper-search,38.22,36.08,30.89,46.59,42.17,73.08,8.58
9
  sonar-reasoning-pro,37.76,34.96,31.65,44.93,42.42,45.19,9.39
data/raw_data/kimi-researcher.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:868ac817d88b63a7a253ecf4439b85205ad1c49f2879f4b46f1a9a34d6cf804f
3
- size 3773315
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3d49b2ef4a8aaa41e48a2e1d5b19dbf270eea29f4f06b8c01ced7c8b8f9830
3
+ size 5166726
data/raw_results/kimi-researcher/race_result.txt CHANGED
@@ -1,5 +1,5 @@
1
- Comprehensiveness: 0.4282
2
- Insight: 0.3940
3
- Instruction Following: 0.4530
4
- Readability: 0.4468
5
- Overall Score: 0.4269
 
1
+ Comprehensiveness: 0.4496
2
+ Insight: 0.4197
3
+ Instruction Following: 0.4714
4
+ Readability: 0.4559
5
+ Overall Score: 0.4464
data/raw_results/kimi-researcher/raw_results.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2117bd7eb0cc91e705d64cb3013b2bf89cffce190f57b0fde5638a2efd6f027d
3
- size 52510
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008a4e5d05c8524174eaf672316936a94c39554c55d3df106622d1a28f344e2c
3
+ size 52426