ggcristian commited on
Commit
1df4c13
·
1 Parent(s): b410573

Release TuRTLe V2

Browse files
app.py CHANGED
@@ -191,19 +191,16 @@ with gr.Blocks(
191
 
192
  gr.HTML(
193
  """
194
- <p align="center" style="margin-bottom: -10px;">
195
- <img src='/gradio_api/file=logo.png' alt='TuRTLe Logo' width='220'/> <br/>
196
- </p>
197
  """
198
  )
199
  gr.HTML(
200
  """
201
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
202
  <script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
203
- <div style="text-align: center; margin-bottom: 15px;">
204
- <p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard! TuRTLe is a <b>unified evaluation framework designed to systematically assess Large Language Models (LLMs) in RTL (Register-Transfer Level) generation</b> for hardware design.
205
- Evaluation criteria include <b>syntax correctness, functional accuracy, synthesizability, and post-synthesis quality</b> (PPA: Power, Performance, Area). TuRTLe integrates multiple benchmarks to highlight strengths and weaknesses of available LLMs.
206
- Use the filters below to explore different RTL benchmarks and models.</p>
207
  <a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
208
  <button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
209
  GitHub Repo
@@ -221,9 +218,19 @@ with gr.Blocks(
221
  How to submit
222
  </button>
223
  </a>
224
- <p style="margin-top: 15px;">If you have any inquiries or wish to collaborate:
225
- <a href="mailto:hpai@bsc.es">hpai@bsc.es</a>
226
- </p>
 
 
 
 
 
 
 
 
 
 
227
  </div>
228
  """
229
  )
@@ -353,7 +360,7 @@ with gr.Blocks(
353
  citation_button = gr.Textbox(
354
  value=CITATION_BUTTON_TEXT,
355
  label=CITATION_BUTTON_LABEL,
356
- lines=20,
357
  elem_id="citation-button",
358
  show_copy_button=True,
359
  )
 
191
 
192
  gr.HTML(
193
  """
194
+ <div align="center">
195
+ <img src='/gradio_api/file=logo.png' alt='TuRTLe Logo' width='220'/>
196
+ </div>
197
  """
198
  )
199
  gr.HTML(
200
  """
201
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
202
  <script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
203
+ <div style="text-align: center; margin-bottom: 0px; margin-top: 0px;">
 
 
 
204
  <a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
205
  <button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
206
  GitHub Repo
 
218
  How to submit
219
  </button>
220
  </a>
221
+ <p style="margin-top: 15px;">If you have any inquiries or wish to collaborate:
222
+ <a href="mailto:hpai@bsc.es">hpai@bsc.es</a>
223
+ </p>
224
+ </div>
225
+ """
226
+ )
227
+ gr.HTML(
228
+ """
229
+ <div style=" margin-top:-10px !important;">
230
+ <p style="margin-bottom: 15px; text-align: start !important;">Welcome to the TuRTLe Model Leaderboard! TuRTLe is a <b>unified evaluation framework designed to systematically assess Large Language Models (LLMs) in RTL (Register-Transfer Level) generation</b> for hardware design.
231
+ Evaluation criteria include <b>syntax correctness, functional accuracy, synthesizability, and post-synthesis quality</b> (PPA: Power, Performance, Area). TuRTLe integrates multiple benchmarks to highlight strengths and weaknesses of available LLMs.
232
+ Use the filters below to explore different RTL benchmarks and models.</p>
233
+ <p style="margin-top: 15px; text-align: start !important; "><span style="font-variant: small-caps; font-weight: bold;">NEW UPDATE (JUNE 2025)</span>: We make our framework open-source on GitHub, and add 6 new recent models! For a total of 27 models and 5 RTL benchmarks.</p>
234
  </div>
235
  """
236
  )
 
360
  citation_button = gr.Textbox(
361
  value=CITATION_BUTTON_TEXT,
362
  label=CITATION_BUTTON_LABEL,
363
+ lines=10,
364
  elem_id="citation-button",
365
  show_copy_button=True,
366
  )
results/aggregated_scores.csv CHANGED
@@ -1,22 +1,28 @@
1
- Model,Agg S2R,Agg MC,Agg VerilogEval S2R,Agg VerilogEval MC,Agg RTLLM,Agg VeriGen
2
- DeepSeek R1,74.84,75.51,77.01,77.81,68.06,54.4
3
- Llama 3.1 405B,49.72,42.8,53.98,42.92,36.43,41.67
4
- Llama 3.(1-3) 70B,39.0,38.49,38.64,37.45,40.12,48.05
5
- Qwen2.5 72B,49.23,48.82,49.17,51.22,49.45,26.75
6
- Qwen2.5 32B,50.58,40.73,50.53,41.85,50.71,30.46
7
- StarChat2 15B v0.1,39.04,38.9,37.45,37.69,44.0,49.99
8
- DeepSeek R1 Distill Qwen 14B,22.98,23.61,23.21,23.47,22.27,24.91
9
- CodeLlama 70B,31.46,31.29,34.17,29.8,22.99,44.96
10
- QwenCoder 2.5 32B,42.53,43.71,42.27,43.96,43.33,41.4
11
- DeepSeek Coder 33B,25.71,36.47,19.49,37.25,45.11,29.29
12
- QwenCoder 2.5 14B,36.75,38.49,35.61,39.03,40.33,33.55
13
- OpenCoder 8B,31.13,34.76,27.12,34.55,43.63,36.67
14
- QwenCoder 2.5 7B,13.86,32.31,6.31,31.75,37.41,37.47
15
- "DeepSeek Coder 6,7B",31.6,30.03,28.69,30.41,40.67,26.61
16
- RTLCoder Mistral,21.86,27.2,22.73,26.21,19.15,36.3
17
- RTLCoder DeepSeek,32.21,37.6,31.75,37.47,33.64,38.81
18
- OriGen,37.22,41.29,46.0,41.97,9.82,35.07
19
- HaVen-CodeQwen,41.66,46.09,42.97,46.57,37.55,41.74
20
- CodeV-CL-7B,28.19,35.7,25.75,35.39,35.79,38.53
21
- CodeV-QW-7B,20.79,47.26,18.73,50.28,27.23,19.55
22
- CodeV-DS-6.7B,18.19,44.1,14.28,47.05,30.39,17.03
 
 
 
 
 
 
 
1
+ Model,Agg S2R,Agg MC,Agg VerilogEval S2R,Agg VerilogEval MC,Agg RTLLM,Agg VeriGen
2
+ DeepSeek R1,75.53,72.96,77.67,77.55,68.49,57.82
3
+ Llama 3.1 405B,53.23,53.88,56.55,54.35,42.26,52.35
4
+ Qwen3 236B A22B,69.82,61.71,74.83,68.36,53.31,39.8
5
+ Llama 3.(1-3) 70B,39.48,43.29,39.47,40.83,39.53,51.42
6
+ Qwen2.5 72B,49.36,47.23,50.22,50.74,46.51,35.65
7
+ QwQ 32B,62.6,39.46,65.02,38.68,54.6,42.03
8
+ Qwen2.5 32B,50.39,38.93,50.86,41.01,48.86,32.09
9
+ StarChat2 15B v0.1,38.76,38.98,36.68,35.58,45.61,50.2
10
+ DeepSeek R1 Distill Qwen 14B,23.14,23.3,24.94,24.3,17.22,20.01
11
+ CodeLlama 70B,33.04,32.86,32.2,32.27,35.81,34.8
12
+ DeepSeek Coder 33B,27.03,36.31,22.65,37.64,41.47,31.91
13
+ QwenCoder 2.5 32B,44.02,43.75,43.68,44.05,45.15,42.76
14
+ QwenCoder 2.5 14B,37.69,38.97,35.32,40.26,45.5,34.72
15
+ DeepCoder 14B,26.4,30.75,27.32,33.18,23.38,22.75
16
+ OpenCoder 8B,30.06,35.86,26.8,34.57,40.83,40.1
17
+ SeedCoder 8B,50.89,34.05,51.71,36.52,48.2,25.89
18
+ SeedCoder 8B Reasoning,43.75,47.1,47.85,46.76,30.22,48.23
19
+ QwenCoder 2.5 7B,14.15,32.86,6.57,33.0,39.16,32.4
20
+ "DeepSeek Coder 6,7B",31.87,27.89,28.88,28.99,41.75,24.27
21
+ RTLCoder Mistral,21.82,28.65,23.71,26.34,15.58,36.27
22
+ RTLCoder DeepSeek,37.22,36.64,38.33,36.13,33.58,38.33
23
+ OriGen,52.88,51.89,53.0,50.0,52.47,58.12
24
+ CodeV R1 Distill Qwen 7B,36.12,26.84,32.35,20.56,48.57,47.55
25
+ HaVen-CodeQwen,43.58,47.13,44.67,47.23,39.98,46.8
26
+ CodeV-CL-7B,14.73,33.73,12.71,30.93,21.38,42.97
27
+ CodeV-QW-7B,20.37,50.11,18.82,50.64,25.48,48.38
28
+ CodeV-DS-6.7B,19.62,47.1,14.8,46.08,35.52,50.46
results/parse.py CHANGED
@@ -1,96 +1,159 @@
1
  import csv
2
  import json
3
  import locale
 
4
  from typing import Dict, Union
5
 
6
  import pandas as pd
7
 
8
  model_details = {
9
- "DeepSeek R1": ("https://huggingface.co/deepseek-ai/DeepSeek-R1", 685, "General"),
 
 
 
 
 
10
  "Llama 3.1 405B": (
11
- "https://huggingface.co/meta-llama/Llama-3.1-405B",
12
  406,
13
  "General",
 
 
 
 
 
 
 
14
  ),
15
  "Llama 3.(1-3) 70B": (
16
  "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
17
  70.6,
18
  "General",
 
19
  ),
20
  "Qwen2.5 72B": (
21
  "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
22
  72.7,
23
  "General",
 
24
  ),
25
- "Qwen2.5 32B": ("https://huggingface.co/Qwen/Qwen2.5-32B", 32.5, "General"),
 
26
  "StarChat2 15B v0.1": (
27
  "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
28
  16,
29
  "General",
 
30
  ),
31
  "DeepSeek R1 Distill Qwen 14B": (
32
  "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
33
  14.8,
34
  "General",
 
35
  ),
36
  "CodeLlama 70B": (
37
  "https://huggingface.co/codellama/CodeLlama-70b-hf",
38
  69,
39
  "Coding",
 
40
  ),
41
  "QwenCoder 2.5 32B": (
42
  "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
43
  32.5,
44
  "Coding",
 
45
  ),
46
  "DeepSeek Coder 33B": (
47
  "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
48
  33.3,
49
  "Coding",
 
50
  ),
51
  "QwenCoder 2.5 14B": (
52
  "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
53
  14.7,
54
  "Coding",
 
 
 
 
 
 
 
55
  ),
56
  "OpenCoder 8B": (
57
  "https://huggingface.co/infly/OpenCoder-8B-Instruct",
58
  7.77,
59
  "Coding",
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  ),
61
  "QwenCoder 2.5 7B": (
62
  "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
63
  7.61,
64
  "Coding",
 
65
  ),
66
  "DeepSeek Coder 6,7B": (
67
  "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
68
  6.74,
69
  "Coding",
 
70
  ),
71
  "HaVen-CodeQwen": (
72
  "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
73
  7.25,
74
  "RTL-Specific",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  ),
76
- "CodeV-CL-7B": ("https://huggingface.co/yang-z/CodeV-CL-7B", 6.74, "RTL-Specific"),
77
- "CodeV-QW-7B": ("https://huggingface.co/yang-z/CodeV-QW-7B", 7.25, "RTL-Specific"),
78
  "CodeV-DS-6.7B": (
79
  "https://huggingface.co/yang-z/CodeV-DS-6.7B",
80
  6.74,
81
  "RTL-Specific",
 
82
  ),
83
  "RTLCoder Mistral": (
84
  "https://huggingface.co/ishorn5/RTLCoder-v1.1",
85
  7.24,
86
  "RTL-Specific",
 
87
  ),
88
  "RTLCoder DeepSeek": (
89
  "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
90
  6.74,
91
  "RTL-Specific",
 
92
  ),
93
- "OriGen": ("https://huggingface.co/henryen/OriGen_Fix", 6.74, "RTL-Specific"),
94
  }
95
 
96
 
@@ -107,13 +170,14 @@ def get_headers(reader, agg=False) -> Union[list, list]:
107
  return metrics, benchs
108
 
109
 
110
- def get_model_params_and_url(model) -> Union[str, str, float]:
111
  if model not in model_details:
112
  return "-", "-", "-"
113
  url = model_details[model][0]
114
  params = model_details[model][1]
115
  type = model_details[model][2]
116
- return url, params, type
 
117
 
118
 
119
  def parse_results(csv_path: str) -> list[dict]:
@@ -123,12 +187,12 @@ def parse_results(csv_path: str) -> list[dict]:
123
  """
124
  dataset = []
125
  models = []
126
- with open(csv_path, newline="") as csvfile:
127
  reader = csv.reader(csvfile, delimiter=",")
128
  metrics, benchs = get_headers(reader)
129
  for i, row in enumerate(reader):
130
  model = row[0]
131
- url, params, type = get_model_params_and_url(model)
132
  models.append(model)
133
  row = row[1:]
134
  ctr = 0
@@ -143,6 +207,7 @@ def parse_results(csv_path: str) -> list[dict]:
143
  record["Result"] = float(row[ctr].replace(",", "."))
144
  record["Model URL"] = url
145
  record["Params"] = params
 
146
  dataset.append(record)
147
  ctr += 1
148
  print(models)
 
1
  import csv
2
  import json
3
  import locale
4
+ import os
5
  from typing import Dict, Union
6
 
7
  import pandas as pd
8
 
9
  model_details = {
10
+ "DeepSeek R1": (
11
+ "https://huggingface.co/deepseek-ai/DeepSeek-R1",
12
+ 685,
13
+ "General",
14
+ "V1",
15
+ ),
16
  "Llama 3.1 405B": (
17
+ "https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
18
  406,
19
  "General",
20
+ "V1",
21
+ ),
22
+ "Qwen3 236B A22B": (
23
+ "https://huggingface.co/Qwen/Qwen3-235B-A22B",
24
+ 235,
25
+ "General",
26
+ "V2",
27
  ),
28
  "Llama 3.(1-3) 70B": (
29
  "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
30
  70.6,
31
  "General",
32
+ "V1",
33
  ),
34
  "Qwen2.5 72B": (
35
  "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
36
  72.7,
37
  "General",
38
+ "V1",
39
  ),
40
+ "QwQ 32B": ("https://huggingface.co/Qwen/QwQ-32B", 32.8, "General", "V2"),
41
+ "Qwen2.5 32B": ("https://huggingface.co/Qwen/Qwen2.5-32B", 32.5, "General", "V1"),
42
  "StarChat2 15B v0.1": (
43
  "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
44
  16,
45
  "General",
46
+ "V1",
47
  ),
48
  "DeepSeek R1 Distill Qwen 14B": (
49
  "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
50
  14.8,
51
  "General",
52
+ "V1",
53
  ),
54
  "CodeLlama 70B": (
55
  "https://huggingface.co/codellama/CodeLlama-70b-hf",
56
  69,
57
  "Coding",
58
+ "V1",
59
  ),
60
  "QwenCoder 2.5 32B": (
61
  "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
62
  32.5,
63
  "Coding",
64
+ "V1",
65
  ),
66
  "DeepSeek Coder 33B": (
67
  "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
68
  33.3,
69
  "Coding",
70
+ "V1",
71
  ),
72
  "QwenCoder 2.5 14B": (
73
  "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
74
  14.7,
75
  "Coding",
76
+ "V1",
77
+ ),
78
+ "DeepCoder 14B": (
79
+ "https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
80
+ 14.8,
81
+ "Coding",
82
+ "V2",
83
  ),
84
  "OpenCoder 8B": (
85
  "https://huggingface.co/infly/OpenCoder-8B-Instruct",
86
  7.77,
87
  "Coding",
88
+ "V1",
89
+ ),
90
+ "SeedCoder 8B": (
91
+ "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
92
+ 8.25,
93
+ "Coding",
94
+ "V2",
95
+ ),
96
+ "SeedCoder 8B Reasoning": (
97
+ "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
98
+ 8.25,
99
+ "Coding",
100
+ "V2",
101
  ),
102
  "QwenCoder 2.5 7B": (
103
  "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
104
  7.61,
105
  "Coding",
106
+ "V1",
107
  ),
108
  "DeepSeek Coder 6,7B": (
109
  "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
110
  6.74,
111
  "Coding",
112
+ "V1",
113
  ),
114
  "HaVen-CodeQwen": (
115
  "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
116
  7.25,
117
  "RTL-Specific",
118
+ "V1",
119
+ ),
120
+ "CodeV R1 Distill Qwen 7B": (
121
+ "https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
122
+ 7.62,
123
+ "RTL-Specific",
124
+ "V2",
125
+ ),
126
+ "CodeV-CL-7B": (
127
+ "https://huggingface.co/yang-z/CodeV-CL-7B",
128
+ 6.74,
129
+ "RTL-Specific",
130
+ "V1",
131
+ ),
132
+ "CodeV-QW-7B": (
133
+ "https://huggingface.co/yang-z/CodeV-QW-7B",
134
+ 7.25,
135
+ "RTL-Specific",
136
+ "V1",
137
  ),
 
 
138
  "CodeV-DS-6.7B": (
139
  "https://huggingface.co/yang-z/CodeV-DS-6.7B",
140
  6.74,
141
  "RTL-Specific",
142
+ "V1",
143
  ),
144
  "RTLCoder Mistral": (
145
  "https://huggingface.co/ishorn5/RTLCoder-v1.1",
146
  7.24,
147
  "RTL-Specific",
148
+ "V1",
149
  ),
150
  "RTLCoder DeepSeek": (
151
  "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
152
  6.74,
153
  "RTL-Specific",
154
+ "V1",
155
  ),
156
+ "OriGen": ("https://huggingface.co/henryen/OriGen_Fix", 6.74, "RTL-Specific", "V1"),
157
  }
158
 
159
 
 
170
  return metrics, benchs
171
 
172
 
173
+ def get_model_params_and_url(model) -> Union[str, str, float, str]:
174
  if model not in model_details:
175
  return "-", "-", "-"
176
  url = model_details[model][0]
177
  params = model_details[model][1]
178
  type = model_details[model][2]
179
+ release = model_details[model][3]
180
+ return url, params, type, release
181
 
182
 
183
  def parse_results(csv_path: str) -> list[dict]:
 
187
  """
188
  dataset = []
189
  models = []
190
+ with open(os.path.join("results", csv_path), newline="") as csvfile:
191
  reader = csv.reader(csvfile, delimiter=",")
192
  metrics, benchs = get_headers(reader)
193
  for i, row in enumerate(reader):
194
  model = row[0]
195
+ url, params, type, release = get_model_params_and_url(model)
196
  models.append(model)
197
  row = row[1:]
198
  ctr = 0
 
207
  record["Result"] = float(row[ctr].replace(",", "."))
208
  record["Model URL"] = url
209
  record["Params"] = params
210
+ record["Release"] = release
211
  dataset.append(record)
212
  ctr += 1
213
  print(models)
results/results.csv CHANGED
@@ -1,23 +1,29 @@
1
  ,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area,EM,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area
2
  ,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,RTL-Repo,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen
3
- DeepSeek R1,"96,54","91,43","79,74","67,76","78,97","63,27","38,94","35,64","37,82","31,95","38,76","34,5","33,02","97,95","94,12","80,26",60,"79,62","54,12","39,35","26,62","38,16","26,97","39,21","28,01"
4
- Llama 3.1 405B,"89,1","65,71","57,05","37,55","56,67","35,92","27,18","19,7",27,"16,04","26,79","18,91","33,29","91,41","72,94","44,74","45,88","44,1","44,71","21,98","19,24","20,74","22,19","21,66","21,08"
5
- Llama 3.(1-3) 70B,"67,69","73,88",40,"43,27","39,87","39,18","19,76","20,58","18,69","19,32","19,51","20,28","28,62","70,51","78,82","38,59","48,24","37,95","48,24","18,97","23,22","18,35","24,15","18,86","24,71"
6
- Qwen2.5 72B,"81,15","82,04","51,15","47,35","50,38","46,53","25,4","24,83","23,83","23,88","24,52","25,46","37,19","81,67","70,59","53,08","27,06","52,56","27,06","26,08","12,74","24,92","13,5","25,83","13,89"
7
- Qwen2.5 32B,"89,36","86,94","52,95","50,61","51,67","46,94","26,02","25,9","24,46","23,87","25,32","26,29","28,67","93,46","67,06","43,08","32,94","42,31","30,59","21,14",15,"20,48","15,38","21,15","15,31"
8
- StarChat2 15B v0.1,"86,54","85,71","38,72","42,45","38,59","42,45","19,18","22,44","17,99","21,03",19,"22,53","13,24","81,54","92,94","39,36","50,59","38,59","50,59","19,21","24,02","18,28","25,23","19,05","25,73"
9
- DeepSeek R1 Distill Qwen 14B,"41,28","40,82","23,85","20,82","23,72","20,41","11,81","12,46","11,25","10,24","11,76","10,7","20,65","42,18","62,35","24,36","25,88","24,1","25,88","11,96","11,54","11,38","12,93","11,86","12,9"
10
- CodeLlama 70B,"72,05","41,63","35,51","23,27","35,38","22,86","17,32","11,92","16,74","10,85","17,2","11,71","24,58","89,36","89,41","30,9","45,88","30,9","45,88","15,3","21,74","14,19","22,88","15,21","22,82"
11
- QwenCoder 2.5 32B,"87,69","79,59","45,64","43,27","43,33","42,04","21,51","22,02","20,72","20,95","21,17","22,03","30,44","84,87","72,94","45,51","41,18","44,87","41,18","22,26","20,56","21,48","20,67","22,2","20,87"
12
- DeepSeek Coder 33B,"57,82","83,67","19,87","43,67","19,87","42,86","9,94","23,28","9,83","21,19","9,47","23,2","30,58","78,72","83,53","39,49","29,41","38,33","29,41","18,92","14,52","18,2","14,74","18,76","14,67"
13
- QwenCoder 2.5 14B,"79,74","78,37","37,82","41,63","37,05","40,41","18,03","20,14","17,6","20,1","17,78","20,25","37,16","79,36","67,06","40,26","34,12","39,49","34,12","19,74","16,5","19,07","17,07","19,73","16,75"
14
- OpenCoder 8B,"75,77","75,1","28,59","46,53","28,21","42,86","13,81","22,24","13,16","21,47","13,71","21,73","16,63","79,87","92,94","36,03","43,53","35,51","37,65","17,57","17,19","16,74","18,76","17,52","19,06"
15
- QwenCoder 2.5 7B,"19,62","77,96","6,41","37,96","6,41","35,51","3,12","19,26","3,18","17,98","3,16","18,87","28,45","75,9","71,76","32,44","37,65","32,44","37,65","16,2","18,38","15,26","18,91","16,16","18,92"
16
- "DeepSeek Coder 6,7B","80,12","78,37","29,87","40,41","29,36","37,96","14,71","20,72","13,69","19,25","14,64","21,03","24,57","68,85","81,18","32,82","27,06","31,15","27,06","15,53","12,94","14,62","13,39","15,46","13,58"
17
- RTLCoder Mistral,"52,05","38,78","23,59","19,18","23,59","19,18","11,67","10,08","10,87","8,7","11,56","9,95","14,97","63,59","85,88","26,92","35,29","26,92","35,29","13,43","18,49","12,53","17,61","13,36","18,35"
18
- RTLCoder DeepSeek,"75,26","68,57","33,33","37,14","32,95","33,06","16,02","17,29","15,71","16,35","15,9","16,82","19,76","84,1","84,71","39,23","38,82","38,59","38,82","19,08","19,1","18,31","19,35","18,82","19,76"
19
- OriGen,"91,02","23,67","46,54","12,65","46,92","10,61","23,38","5,33","22,18","4,61","23,44","4,79","19,45","79,35","87,06","43,07","35,29","42,95","35,29","21,5","16,55","20,13","17,7","21,33","18,35"
20
- HaVen-CodeQwen,"90,26","82,45","45,9","40,41","44,36","38,37","21,77","19,1","21,23","18,31","21,46","18,92","25,38","93,33","97,65",50,"48,24","48,72","42,35","23,37","20,21","23,39","21,15","23,09","21,25"
21
- CodeV-CL-7B,"55,38","69,8","27,05","37,14","26,79","35,1","13,2","18,92","12,39","16,88","13,03","17,89","12,39","91,92","98,82","36,79","44,71","36,41","38,82","18,15","19,06","16,88","19,38","18,05","19,35"
22
- CodeV-QW-7B,"41,79","71,02","19,1","35,51","18,72","27,76","9,36","14,85","9,36","12,21","9,38","13,78","20,56","93,85","57,65","52,56","25,88","51,15",20,"25,64","9,39","24,22","9,99","25,56","9,94"
23
- CodeV-DS-6.7B,"30,77","62,45","14,87","33,88","14,62","30,61","7,3","15,49","6,9","14,75","7,22","15,35","21,06","95,13","58,82","48,85","23,53","48,33","17,65","24,02","8,26","22,82","8,81","23,73","8,47"
 
 
 
 
 
 
 
1
  ,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area,EM,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area
2
  ,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,RTL-Repo,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen
3
+ DeepSeek R1,97.18,89.80,79.74,65.71,79.62,63.27,78.33,71.34,76.49,64.06,78.19,70.08,-1,97.44,96.47,79.49,60.00,79.49,60.00,78.27,50.25,76.43,60.15,77.96,63.07
4
+ Llama 3.1 405B,87.44,77.14,58.97,45.71,58.85,41.63,57.58,50.88,55.93,32.44,56.13,43.45,34.62,88.59,95.29,56.15,52.94,55.90,52.94,55.13,49.22,53.45,52.52,54.48,55.31
5
+ Qwen3 236B A22B,91.28,80.41,76.92,53.06,76.79,51.43,75.25,57.77,73.56,49.20,75.67,52.95,41.94,82.18,91.76,69.62,40.00,69.62,40.00,69.04,39.09,66.89,40.16,69.15,40.14
6
+ Llama 3.(1-3) 70B,66.15,73.88,40.64,42.45,40.64,39.18,40.46,40.81,38.08,38.14,39.86,39.65,28.72,84.74,89.41,41.67,51.76,41.67,51.76,41.38,50.61,39.75,51.76,41.36,51.88
7
+ Qwen2.5 72B,82.18,79.59,52.44,45.31,51.92,44.08,51.83,46.47,48.75,45.40,50.09,47.65,37.44,80.90,84.71,52.95,35.29,52.69,35.29,51.66,35.82,49.37,35.20,51.18,35.94
8
+ QwQ 32B,87.95,82.45,66.41,56.73,66.41,52.24,66.15,55.83,63.80,51.91,65.12,56.07,-1,58.97,68.24,40.00,42.35,39.62,42.35,39.40,40.90,37.53,42.31,39.10,42.87
9
+ Qwen2.5 32B,88.59,84.08,52.56,50.20,52.18,46.12,52.32,49.73,49.43,46.43,50.82,50.43,28.93,93.21,85.88,41.54,32.94,41.54,32.94,41.31,30.65,40.48,33.11,41.23,32.50
10
+ StarChat2 15B v0.1,88.46,84.90,37.95,44.49,37.95,44.08,37.56,46.95,35.30,43.22,37.19,46.65,13.42,79.74,92.94,36.41,51.76,36.03,51.76,36.08,46.30,34.91,51.49,35.76,52.80
11
+ DeepSeek R1 Distill Qwen 14B,42.18,34.69,25.51,18.37,25.51,16.33,25.36,17.86,24.19,16.48,25.27,17.33,-1,45.00,44.71,25.64,21.18,25.26,21.18,24.79,17.65,23.48,21.08,24.63,21.29
12
+ CodeLlama 70B,67.05,69.80,33.08,36.33,33.08,34.29,32.69,37.19,31.46,34.29,32.44,35.95,24.33,90.77,88.24,33.33,35.29,33.33,35.29,33.02,34.03,30.80,35.15,32.99,35.21
13
+ DeepSeek Coder 33B,62.82,83.67,23.33,42.45,23.08,42.04,22.86,42.29,22.81,39.42,22.29,42.71,24.58,75.26,88.24,39.62,31.76,39.36,31.76,38.23,32.16,36.79,31.46,37.90,32.12
14
+ QwenCoder 2.5 32B,87.18,77.96,45.00,43.27,44.87,43.27,44.25,46.82,43.03,43.20,43.76,45.42,31.07,83.72,87.06,45.64,42.35,45.13,42.35,44.59,42.79,43.01,42.24,44.55,43.25
15
+ QwenCoder 2.5 14B,78.97,81.63,37.82,46.12,37.44,45.31,35.94,45.82,34.83,44.64,35.18,46.05,37.53,80.00,83.53,41.67,35.29,41.15,35.29,40.74,34.17,39.20,35.32,40.83,34.67
16
+ DeepCoder 14B,43.85,39.59,28.08,23.67,28.08,22.04,27.94,25.00,26.26,22.00,27.77,23.15,-1,61.92,48.24,34.10,23.53,33.72,23.53,33.70,21.18,32.17,23.43,33.67,23.65
17
+ OpenCoder 8B,78.21,75.92,28.46,42.86,27.82,40.82,27.34,41.36,25.95,39.77,27.11,41.36,16.17,80.00,95.29,35.64,41.18,35.38,41.18,35.12,37.69,33.47,41.05,35.13,41.55
18
+ SeedCoder 8B,91.41,85.31,53.46,47.35,53.33,46.53,52.86,49.42,50.62,45.60,51.65,49.59,28.23,77.44,94.12,37.31,30.59,37.31,27.06,37.32,23.53,35.35,26.92,36.89,27.23
19
+ SeedCoder 8B Reasoning,67.82,53.47,49.23,30.20,49.23,29.39,48.92,32.04,46.76,28.64,47.87,29.99,-1,83.33,78.82,48.21,50.59,48.08,50.59,47.78,41.74,45.44,50.02,47.06,52.92
20
+ QwenCoder 2.5 7B,20.13,76.33,6.92,38.78,6.67,37.14,6.51,40.65,6.63,37.25,6.56,39.58,28.33,74.10,90.59,33.72,32.94,33.72,32.94,33.59,30.67,31.78,33.01,33.62,33.51
21
+ "DeepSeek Coder 6,7B",82.05,78.78,29.62,41.22,29.49,38.78,29.51,42.62,27.73,39.33,29.41,43.30,24.63,67.18,84.71,31.67,24.71,29.87,24.71,29.78,23.53,27.98,24.50,29.21,24.79
22
+ RTLCoder Mistral,54.87,32.24,24.62,16.33,24.62,15.92,24.28,16.03,22.78,14.71,24.06,16.00,14.77,60.51,85.88,27.05,36.47,27.05,36.47,26.94,34.63,25.22,36.55,26.87,37.64
23
+ RTLCoder DeepSeek,84.62,73.06,39.49,37.14,39.49,34.69,38.91,34.30,37.52,32.76,38.55,33.69,19.35,77.31,85.88,36.92,40.00,36.79,40.00,36.94,35.57,34.84,39.83,36.62,39.60
24
+ OriGen,96.15,81.63,54.23,50.61,54.23,50.61,54.29,53.10,51.57,50.86,53.15,53.44,17.07,92.44,98.82,50.77,58.82,50.77,58.82,50.95,54.14,48.53,58.81,50.51,61.40
25
+ CodeV R1 Distill Qwen 7B,56.92,73.06,33.33,49.80,33.33,47.35,32.58,49.25,32.01,47.45,32.45,49.01,-1,92.69,89.41,21.28,49.41,21.28,49.41,21.04,43.68,19.59,49.06,21.05,49.91
26
+ HaVen-CodeQwen,93.33,80.41,47.31,42.86,46.15,41.22,45.08,40.59,44.26,38.83,44.68,40.53,25.14,93.59,100.00,50.13,47.06,49.49,47.06,47.55,46.60,47.05,47.14,47.09,46.67
27
+ CodeV-CL-7B,32.18,48.16,13.08,24.49,12.95,21.63,12.80,22.25,12.51,20.59,12.82,21.29,12.27,92.05,98.82,31.79,43.53,31.79,43.53,31.74,42.25,29.45,43.46,31.61,43.20
28
+ CodeV-QW-7B,45.38,68.16,19.62,34.29,18.97,26.53,18.91,28.14,18.71,21.80,18.85,26.50,20.94,93.33,100.00,52.31,48.24,51.54,48.24,51.69,48.14,48.79,48.18,51.45,48.81
29
+ CodeV-DS-6.7B,33.59,67.35,15.00,38.78,15.00,37.14,15.10,35.56,14.46,35.13,14.85,35.88,21.26,95.51,100.00,47.05,50.59,47.05,50.59,47.37,50.47,44.35,50.54,46.52,50.36
results/results.json CHANGED
The diff for this file is too large to render. See raw diff
 
results/v1/aggregated_scores.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Agg S2R,Agg MC,Agg VerilogEval S2R,Agg VerilogEval MC,Agg RTLLM,Agg VeriGen
2
+ DeepSeek R1,74.84,75.51,77.01,77.81,68.06,54.4
3
+ Llama 3.1 405B,49.72,42.8,53.98,42.92,36.43,41.67
4
+ Llama 3.(1-3) 70B,39.0,38.49,38.64,37.45,40.12,48.05
5
+ Qwen2.5 72B,49.23,48.82,49.17,51.22,49.45,26.75
6
+ Qwen2.5 32B,50.58,40.73,50.53,41.85,50.71,30.46
7
+ StarChat2 15B v0.1,39.04,38.9,37.45,37.69,44.0,49.99
8
+ DeepSeek R1 Distill Qwen 14B,22.98,23.61,23.21,23.47,22.27,24.91
9
+ CodeLlama 70B,31.46,31.29,34.17,29.8,22.99,44.96
10
+ QwenCoder 2.5 32B,42.53,43.71,42.27,43.96,43.33,41.4
11
+ DeepSeek Coder 33B,25.71,36.47,19.49,37.25,45.11,29.29
12
+ QwenCoder 2.5 14B,36.75,38.49,35.61,39.03,40.33,33.55
13
+ OpenCoder 8B,31.13,34.76,27.12,34.55,43.63,36.67
14
+ QwenCoder 2.5 7B,13.86,32.31,6.31,31.75,37.41,37.47
15
+ "DeepSeek Coder 6,7B",31.6,30.03,28.69,30.41,40.67,26.61
16
+ RTLCoder Mistral,21.86,27.2,22.73,26.21,19.15,36.3
17
+ RTLCoder DeepSeek,32.21,37.6,31.75,37.47,33.64,38.81
18
+ OriGen,37.22,41.29,46.0,41.97,9.82,35.07
19
+ HaVen-CodeQwen,41.66,46.09,42.97,46.57,37.55,41.74
20
+ CodeV-CL-7B,28.19,35.7,25.75,35.39,35.79,38.53
21
+ CodeV-QW-7B,20.79,47.26,18.73,50.28,27.23,19.55
22
+ CodeV-DS-6.7B,18.19,44.1,14.28,47.05,30.39,17.03
results/v1/results.csv ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area,EM,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area
2
+ ,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,RTL-Repo,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen
3
+ DeepSeek R1,"96,54","91,43","79,74","67,76","78,97","63,27","38,94","35,64","37,82","31,95","38,76","34,5","33,02","97,95","94,12","80,26",60,"79,62","54,12","39,35","26,62","38,16","26,97","39,21","28,01"
4
+ Llama 3.1 405B,"89,1","65,71","57,05","37,55","56,67","35,92","27,18","19,7",27,"16,04","26,79","18,91","33,29","91,41","72,94","44,74","45,88","44,1","44,71","21,98","19,24","20,74","22,19","21,66","21,08"
5
+ Llama 3.(1-3) 70B,"67,69","73,88",40,"43,27","39,87","39,18","19,76","20,58","18,69","19,32","19,51","20,28","28,62","70,51","78,82","38,59","48,24","37,95","48,24","18,97","23,22","18,35","24,15","18,86","24,71"
6
+ Qwen2.5 72B,"81,15","82,04","51,15","47,35","50,38","46,53","25,4","24,83","23,83","23,88","24,52","25,46","37,19","81,67","70,59","53,08","27,06","52,56","27,06","26,08","12,74","24,92","13,5","25,83","13,89"
7
+ Qwen2.5 32B,"89,36","86,94","52,95","50,61","51,67","46,94","26,02","25,9","24,46","23,87","25,32","26,29","28,67","93,46","67,06","43,08","32,94","42,31","30,59","21,14",15,"20,48","15,38","21,15","15,31"
8
+ StarChat2 15B v0.1,"86,54","85,71","38,72","42,45","38,59","42,45","19,18","22,44","17,99","21,03",19,"22,53","13,24","81,54","92,94","39,36","50,59","38,59","50,59","19,21","24,02","18,28","25,23","19,05","25,73"
9
+ DeepSeek R1 Distill Qwen 14B,"41,28","40,82","23,85","20,82","23,72","20,41","11,81","12,46","11,25","10,24","11,76","10,7","20,65","42,18","62,35","24,36","25,88","24,1","25,88","11,96","11,54","11,38","12,93","11,86","12,9"
10
+ CodeLlama 70B,"72,05","41,63","35,51","23,27","35,38","22,86","17,32","11,92","16,74","10,85","17,2","11,71","24,58","89,36","89,41","30,9","45,88","30,9","45,88","15,3","21,74","14,19","22,88","15,21","22,82"
11
+ QwenCoder 2.5 32B,"87,69","79,59","45,64","43,27","43,33","42,04","21,51","22,02","20,72","20,95","21,17","22,03","30,44","84,87","72,94","45,51","41,18","44,87","41,18","22,26","20,56","21,48","20,67","22,2","20,87"
12
+ DeepSeek Coder 33B,"57,82","83,67","19,87","43,67","19,87","42,86","9,94","23,28","9,83","21,19","9,47","23,2","30,58","78,72","83,53","39,49","29,41","38,33","29,41","18,92","14,52","18,2","14,74","18,76","14,67"
13
+ QwenCoder 2.5 14B,"79,74","78,37","37,82","41,63","37,05","40,41","18,03","20,14","17,6","20,1","17,78","20,25","37,16","79,36","67,06","40,26","34,12","39,49","34,12","19,74","16,5","19,07","17,07","19,73","16,75"
14
+ OpenCoder 8B,"75,77","75,1","28,59","46,53","28,21","42,86","13,81","22,24","13,16","21,47","13,71","21,73","16,63","79,87","92,94","36,03","43,53","35,51","37,65","17,57","17,19","16,74","18,76","17,52","19,06"
15
+ QwenCoder 2.5 7B,"19,62","77,96","6,41","37,96","6,41","35,51","3,12","19,26","3,18","17,98","3,16","18,87","28,45","75,9","71,76","32,44","37,65","32,44","37,65","16,2","18,38","15,26","18,91","16,16","18,92"
16
+ "DeepSeek Coder 6,7B","80,12","78,37","29,87","40,41","29,36","37,96","14,71","20,72","13,69","19,25","14,64","21,03","24,57","68,85","81,18","32,82","27,06","31,15","27,06","15,53","12,94","14,62","13,39","15,46","13,58"
17
+ RTLCoder Mistral,"52,05","38,78","23,59","19,18","23,59","19,18","11,67","10,08","10,87","8,7","11,56","9,95","14,97","63,59","85,88","26,92","35,29","26,92","35,29","13,43","18,49","12,53","17,61","13,36","18,35"
18
+ RTLCoder DeepSeek,"75,26","68,57","33,33","37,14","32,95","33,06","16,02","17,29","15,71","16,35","15,9","16,82","19,76","84,1","84,71","39,23","38,82","38,59","38,82","19,08","19,1","18,31","19,35","18,82","19,76"
19
+ OriGen,"91,02","23,67","46,54","12,65","46,92","10,61","23,38","5,33","22,18","4,61","23,44","4,79","19,45","79,35","87,06","43,07","35,29","42,95","35,29","21,5","16,55","20,13","17,7","21,33","18,35"
20
+ HaVen-CodeQwen,"90,26","82,45","45,9","40,41","44,36","38,37","21,77","19,1","21,23","18,31","21,46","18,92","25,38","93,33","97,65",50,"48,24","48,72","42,35","23,37","20,21","23,39","21,15","23,09","21,25"
21
+ CodeV-CL-7B,"55,38","69,8","27,05","37,14","26,79","35,1","13,2","18,92","12,39","16,88","13,03","17,89","12,39","91,92","98,82","36,79","44,71","36,41","38,82","18,15","19,06","16,88","19,38","18,05","19,35"
22
+ CodeV-QW-7B,"41,79","71,02","19,1","35,51","18,72","27,76","9,36","14,85","9,36","12,21","9,38","13,78","20,56","93,85","57,65","52,56","25,88","51,15",20,"25,64","9,39","24,22","9,99","25,56","9,94"
23
+ CodeV-DS-6.7B,"30,77","62,45","14,87","33,88","14,62","30,61","7,3","15,49","6,9","14,75","7,22","15,35","21,06","95,13","58,82","48,85","23,53","48,33","17,65","24,02","8,26","22,82","8,81","23,73","8,47"
results/v1/results.json ADDED
The diff for this file is too large to render. See raw diff
 
utils.py CHANGED
@@ -1,89 +1,216 @@
1
- import pandas as pd
 
2
  import gradio as gr
3
- import plotly.graph_objects as go
4
- import plotly.express as px
5
  import numpy as np
 
 
 
6
 
7
- type_emoji = {
8
- "RTL-Specific": "🔴",
9
- "General": "🟢",
10
- "Coding": "🔵"
11
- }
 
 
 
12
 
13
- def model_hyperlink(link, model_name):
14
- return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
15
 
16
  def handle_special_cases(benchmark, metric):
17
- if metric == 'Exact Matching (EM)':
18
- benchmark = 'RTL-Repo'
19
- elif benchmark == 'RTL-Repo':
20
- metric = 'Exact Matching (EM)'
21
  return benchmark, metric
22
 
 
23
  def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
24
- details = subset[['Model', 'Model URL', 'Model Type', 'Params']].drop_duplicates('Model')
25
- filtered_df = subset[['Model', 'Score']].rename(columns={'Score': 'Exact Matching (EM)'})
26
- filtered_df = pd.merge(filtered_df, details, on='Model', how='left')
27
- filtered_df['Model'] = filtered_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
28
- filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
29
- filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
30
- filtered_df = filtered_df.sort_values(by='Exact Matching (EM)', ascending=False).reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
31
  return filtered_df
32
 
 
33
  def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
34
- details = subset[['Model', 'Model URL', 'Model Type', 'Params']].drop_duplicates('Model')
35
- pivot_df = subset.pivot_table(index='Model', columns='Metric', values='Score', aggfunc='mean').reset_index()
36
-
 
 
 
 
37
  if df_agg is not None and agg_column is not None and agg_column in df_agg.columns:
38
- agg_data = df_agg[['Model', agg_column]].rename(columns={agg_column: 'Aggregated ⬆️'})
39
- pivot_df = pd.merge(pivot_df, agg_data, on='Model', how='left')
40
- else:# fallback
41
- pivot_df['Aggregated ⬆️'] = pivot_df.mean(axis=1, numeric_only=True).round(2)
42
-
43
- pivot_df = pd.merge(pivot_df, details, on='Model', how='left')
44
- pivot_df['Model'] = pivot_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
45
- pivot_df['Type'] = pivot_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
46
- pivot_df.rename(columns={'Syntax (STX)': 'STX', 'Functionality (FNC)': 'FNC', 'Synthesis (SYN)': 'SYN', 'Performance': 'Perf'}, inplace=True)
47
-
48
- columns_order = ['Type', 'Model', 'Params', 'Aggregated ⬆️', 'STX', 'FNC', 'SYN', 'Power', 'Perf', 'Area']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  pivot_df = pivot_df[[col for col in columns_order if col in pivot_df.columns]]
50
- pivot_df = pivot_df.sort_values(by='Aggregated ⬆️', ascending=False).reset_index(drop=True)
 
 
51
  return pivot_df
52
 
53
- def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
54
- details = subset[['Model', 'Model URL', 'Model Type', 'Params']].drop_duplicates('Model')
55
- pivot_df = subset.pivot_table(index='Model', columns='Metric', values='Score', aggfunc='mean').reset_index().round(2)
56
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  if df_agg is not None:
58
  if agg_column is not None and agg_column in df_agg.columns:
59
- agg_data = df_agg[['Model', agg_column]].rename(columns={agg_column: 'Aggregated ⬆️'})
60
- pivot_df = pd.merge(pivot_df, agg_data, on='Model', how='left')
 
 
61
  else:
62
- agg_columns = [col for col in df_agg.columns if col.startswith('Agg ')]
63
  if agg_columns:
64
- df_agg['Average_Agg'] = df_agg[agg_columns].mean(axis=1)
65
- agg_data = df_agg[['Model', 'Average_Agg']].rename(columns={'Average_Agg': 'Aggregated ⬆️'})
66
- pivot_df = pd.merge(pivot_df, agg_data, on='Model', how='left')
67
- else: # fallback
68
- pivot_df['Aggregated ⬆️'] = pivot_df.mean(axis=1, numeric_only=True).round(2)
69
- else: # fallback
70
- pivot_df['Aggregated ⬆️'] = pivot_df.mean(axis=1, numeric_only=True).round(2)
71
-
72
- pivot_df = pd.merge(pivot_df, details, on='Model', how='left')
73
- pivot_df['Model'] = pivot_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
74
- pivot_df['Type'] = pivot_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
75
-
76
- pivot_df.rename(columns={
77
- 'Exact Matching (EM)': 'EM',
78
- 'Syntax (STX)': 'Avg STX',
79
- 'Functionality (FNC)': 'Avg FNC',
80
- 'Synthesis (SYN)': 'Avg SYN',
81
- 'Power': 'Avg Power',
82
- 'Performance': 'Avg Perf',
83
- 'Area': 'Avg Area',
84
- }, inplace=True)
85
-
86
- columns_order = ['Type', 'Model', 'Params', 'Aggregated ⬆️', 'Avg STX', 'Avg FNC', 'Avg SYN', 'Avg Power', 'Avg Perf', 'Avg Area']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  pivot_df = pivot_df[[col for col in columns_order if col in pivot_df.columns]]
88
- pivot_df = pivot_df.sort_values(by='Aggregated ⬆️', ascending=False).reset_index(drop=True)
 
 
89
  return pivot_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
  import gradio as gr
 
 
4
  import numpy as np
5
+ import pandas as pd
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
 
9
+ type_emoji = {"RTL-Specific": "🔴", "General": "🟢", "Coding": "🔵"}
10
+
11
+
12
+ def model_hyperlink(link, model_name, release):
13
+ if release == "V1":
14
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
15
+ else:
16
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a> <span style="font-variant: all-small-caps; font-weight: 600">new</span>'
17
 
 
 
18
 
19
  def handle_special_cases(benchmark, metric):
20
+ if metric == "Exact Matching (EM)":
21
+ benchmark = "RTL-Repo"
22
+ elif benchmark == "RTL-Repo":
23
+ metric = "Exact Matching (EM)"
24
  return benchmark, metric
25
 
26
+
27
  def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
28
+ subset = subset.drop(subset[subset.Score < 0.0].index)
29
+ details = subset[
30
+ ["Model", "Model URL", "Model Type", "Params", "Release"]
31
+ ].drop_duplicates("Model")
32
+ filtered_df = subset[["Model", "Score"]].rename(
33
+ columns={"Score": "Exact Matching (EM)"}
34
+ )
35
+ filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
36
+ filtered_df["Model"] = filtered_df.apply(
37
+ lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"]),
38
+ axis=1,
39
+ )
40
+ filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
41
+ filtered_df = filtered_df[["Type", "Model", "Params", "Exact Matching (EM)"]]
42
+ filtered_df = filtered_df.sort_values(
43
+ by="Exact Matching (EM)", ascending=False
44
+ ).reset_index(drop=True)
45
  return filtered_df
46
 
47
+
48
  def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
49
+ details = subset[
50
+ ["Model", "Model URL", "Model Type", "Params", "Release"]
51
+ ].drop_duplicates("Model")
52
+ pivot_df = subset.pivot_table(
53
+ index="Model", columns="Metric", values="Score", aggfunc="mean"
54
+ ).reset_index()
55
+
56
  if df_agg is not None and agg_column is not None and agg_column in df_agg.columns:
57
+ agg_data = df_agg[["Model", agg_column]].rename(
58
+ columns={agg_column: "Aggregated ⬆️"}
59
+ )
60
+ pivot_df = pd.merge(pivot_df, agg_data, on="Model", how="left")
61
+ else: # fallback
62
+ pivot_df["Aggregated ⬆️"] = pivot_df.mean(axis=1, numeric_only=True).round(2)
63
+
64
+ pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
65
+ pivot_df["Model"] = pivot_df.apply(
66
+ lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"]),
67
+ axis=1,
68
+ )
69
+ pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
70
+ pivot_df.rename(
71
+ columns={
72
+ "Syntax (STX)": "STX",
73
+ "Functionality (FNC)": "FNC",
74
+ "Synthesis (SYN)": "SYN",
75
+ "Performance": "Perf",
76
+ },
77
+ inplace=True,
78
+ )
79
+
80
+ columns_order = [
81
+ "Type",
82
+ "Model",
83
+ "Params",
84
+ "Aggregated ⬆️",
85
+ "STX",
86
+ "FNC",
87
+ "SYN",
88
+ "Power",
89
+ "Perf",
90
+ "Area",
91
+ ]
92
  pivot_df = pivot_df[[col for col in columns_order if col in pivot_df.columns]]
93
+ pivot_df = pivot_df.sort_values(by="Aggregated ⬆️", ascending=False).reset_index(
94
+ drop=True
95
+ )
96
  return pivot_df
97
 
98
+
99
+ def custom_agg_s2r(vals):
100
+ s2r_val = vals.iloc[0]
101
+ rtllm_val = vals.iloc[1]
102
+ w1 = 155
103
+ w2 = 47
104
+ result = (w1 * s2r_val + w2 * rtllm_val) / (w1 + w2)
105
+ return round(result, 2)
106
+
107
+
108
+ def custom_agg_cc(vals):
109
+ veval_val = vals.iloc[0]
110
+ vgen_val = vals.iloc[1]
111
+ w1 = 155
112
+ w2 = 17
113
+ result = (w1 * veval_val + w2 * vgen_val) / (w1 + w2)
114
+ return round(result, 2)
115
+
116
+
117
+ def filter_bench_all(
118
+ subset: pd.DataFrame, df_agg=None, agg_column=None
119
+ ) -> pd.DataFrame:
120
+ details = subset[
121
+ ["Model", "Model URL", "Model Type", "Params", "Release"]
122
+ ].drop_duplicates("Model")
123
+ if "RTLLM" in subset["Benchmark"].unique():
124
+ pivot_df = (
125
+ subset.pivot_table(
126
+ index="Model", columns="Metric", values="Score", aggfunc=custom_agg_s2r
127
+ )
128
+ .reset_index()
129
+ .round(2)
130
+ )
131
+ else:
132
+ pivot_df = (
133
+ subset.pivot_table(
134
+ index="Model", columns="Metric", values="Score", aggfunc=custom_agg_cc
135
+ )
136
+ .reset_index()
137
+ .round(2)
138
+ )
139
+
140
  if df_agg is not None:
141
  if agg_column is not None and agg_column in df_agg.columns:
142
+ agg_data = df_agg[["Model", agg_column]].rename(
143
+ columns={agg_column: "Aggregated ⬆️"}
144
+ )
145
+ pivot_df = pd.merge(pivot_df, agg_data, on="Model", how="left")
146
  else:
147
+ agg_columns = [col for col in df_agg.columns if col.startswith("Agg ")]
148
  if agg_columns:
149
+ df_agg["Average_Agg"] = df_agg[agg_columns].mean(axis=1)
150
+ agg_data = df_agg[["Model", "Average_Agg"]].rename(
151
+ columns={"Average_Agg": "Aggregated ⬆️"}
152
+ )
153
+ pivot_df = pd.merge(pivot_df, agg_data, on="Model", how="left")
154
+ else: # fallback
155
+ pivot_df["Aggregated ⬆️"] = pivot_df.mean(
156
+ axis=1, numeric_only=True
157
+ ).round(2)
158
+ else: # fallback
159
+ print("We do mean")
160
+ pivot_df["Aggregated ⬆️"] = pivot_df.mean(axis=1, numeric_only=True).round(2)
161
+
162
+ pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
163
+ pivot_df["Model"] = pivot_df.apply(
164
+ lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"]),
165
+ axis=1,
166
+ )
167
+ pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
168
+
169
+ pivot_df.rename(
170
+ columns={
171
+ "Exact Matching (EM)": "EM",
172
+ "Syntax (STX)": "Agg STX",
173
+ "Functionality (FNC)": "Agg FNC",
174
+ "Synthesis (SYN)": "Agg SYN",
175
+ "Power": "Agg Power",
176
+ "Performance": "Agg Perf",
177
+ "Area": "Agg Area",
178
+ },
179
+ inplace=True,
180
+ )
181
+
182
+ columns_order = [
183
+ "Type",
184
+ "Model",
185
+ "Params",
186
+ "Aggregated ⬆️",
187
+ "Agg STX",
188
+ "Agg FNC",
189
+ "Agg SYN",
190
+ "Agg Power",
191
+ "Agg Perf",
192
+ "Agg Area",
193
+ ]
194
  pivot_df = pivot_df[[col for col in columns_order if col in pivot_df.columns]]
195
+ pivot_df = pivot_df.sort_values(by="Aggregated ⬆️", ascending=False).reset_index(
196
+ drop=True
197
+ )
198
  return pivot_df
199
+
200
+
201
+ def agg_S2R_metrics(verilog_eval_rtl, rtllm):
202
+ if not verilog_eval_rtl or not rtllm:
203
+ return None
204
+ w1 = 155
205
+ w2 = 47
206
+ result = (w1 * verilog_eval_rtl + w2 * rtllm) / (w1 + w2)
207
+ return round(result, 2)
208
+
209
+
210
+ def agg_MC_metrics(verilog_eval_cc, verigen):
211
+ if not verilog_eval_cc or not verigen:
212
+ return None
213
+ w1 = 155
214
+ w2 = 17
215
+ result = (w1 * verilog_eval_cc + w2 * verigen) / (w1 + w2)
216
+ return round(result, 2)