Commit
·
7ce5aba
1
Parent(s):
2139f5c
add the missing models
Browse files
app.py
CHANGED
@@ -77,6 +77,20 @@ def filter_and_search_models(
|
|
77 |
architecture_mask |= filtered_df["Model Name"].str.contains(
|
78 |
"google", case=False, na=False
|
79 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
filtered_df = filtered_df[architecture_mask]
|
82 |
|
@@ -136,6 +150,10 @@ def create_html_table(df):
|
|
136 |
row_class = "qwen-row"
|
137 |
elif "google" in model_name:
|
138 |
row_class = "google-row"
|
|
|
|
|
|
|
|
|
139 |
|
140 |
html += f'<tr class="{row_class}">'
|
141 |
for i, col in enumerate(df.columns):
|
@@ -149,7 +167,14 @@ def create_html_table(df):
|
|
149 |
else: # Score columns
|
150 |
cell_class = "score-cell"
|
151 |
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
html += "</tr>"
|
154 |
html += "</tbody>"
|
155 |
html += "</table>"
|
@@ -205,8 +230,10 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
205 |
("🐧 Qwen", "qwen"),
|
206 |
("🦙 Llama", "llama"),
|
207 |
("🔷 Gemma", "google"),
|
|
|
|
|
208 |
],
|
209 |
-
value=["llama", "deepseek", "qwen", "google"],
|
210 |
label="",
|
211 |
elem_classes="architecture-filter",
|
212 |
container=False,
|
@@ -232,7 +259,7 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
232 |
"",
|
233 |
["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
|
234 |
"Combined Score",
|
235 |
-
["llama", "deepseek", "qwen", "google"],
|
236 |
)
|
237 |
),
|
238 |
elem_id="leaderboard-table",
|
@@ -382,6 +409,23 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
382 |
word-wrap: break-word;
|
383 |
}
|
384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
.size-cell {
|
386 |
text-align: center;
|
387 |
font-weight: 500;
|
@@ -428,6 +472,22 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
428 |
background-color: #ffe6f0;
|
429 |
}
|
430 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
431 |
.size-filter {
|
432 |
margin-top: 10px;
|
433 |
}
|
@@ -569,6 +629,34 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
569 |
box-shadow: 0 2px 4px rgba(236, 72, 153, 0.3) !important;
|
570 |
}
|
571 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
572 |
/* Search and Filter Section Styling */
|
573 |
.search-input input {
|
574 |
border: 2px solid #e9ecef !important;
|
@@ -652,6 +740,117 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
652 |
.size-filter {
|
653 |
margin-top: 10px;
|
654 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
"""
|
656 |
|
657 |
# Launch the app
|
|
|
77 |
architecture_mask |= filtered_df["Model Name"].str.contains(
|
78 |
"google", case=False, na=False
|
79 |
)
|
80 |
+
elif arch == "mistral":
|
81 |
+
architecture_mask |= filtered_df["Model Name"].str.contains(
|
82 |
+
"mistralai", case=False, na=False
|
83 |
+
)
|
84 |
+
elif arch == "others":
|
85 |
+
# Include models that don't match any of the main categories
|
86 |
+
others_mask = ~(
|
87 |
+
filtered_df["Model Name"].str.contains("meta-llama", case=False, na=False) |
|
88 |
+
filtered_df["Model Name"].str.contains("deepseek", case=False, na=False) |
|
89 |
+
filtered_df["Model Name"].str.contains("Qwen", case=False, na=False) |
|
90 |
+
filtered_df["Model Name"].str.contains("google", case=False, na=False) |
|
91 |
+
filtered_df["Model Name"].str.contains("mistralai", case=False, na=False)
|
92 |
+
)
|
93 |
+
architecture_mask |= others_mask
|
94 |
|
95 |
filtered_df = filtered_df[architecture_mask]
|
96 |
|
|
|
150 |
row_class = "qwen-row"
|
151 |
elif "google" in model_name:
|
152 |
row_class = "google-row"
|
153 |
+
elif "mistralai" in model_name:
|
154 |
+
row_class = "mistral-row"
|
155 |
+
else:
|
156 |
+
row_class = "others-row"
|
157 |
|
158 |
html += f'<tr class="{row_class}">'
|
159 |
for i, col in enumerate(df.columns):
|
|
|
167 |
else: # Score columns
|
168 |
cell_class = "score-cell"
|
169 |
|
170 |
+
# Create Hugging Face link for model name
|
171 |
+
if col == "Model Name":
|
172 |
+
hf_url = f"https://huggingface.co/{model_name}"
|
173 |
+
cell_content = f'<a href="{hf_url}" target="_blank" class="model-link">{model_name}</a>'
|
174 |
+
else:
|
175 |
+
cell_content = str(row[col])
|
176 |
+
|
177 |
+
html += f'<td class="{cell_class}">{cell_content}</td>'
|
178 |
html += "</tr>"
|
179 |
html += "</tbody>"
|
180 |
html += "</table>"
|
|
|
230 |
("🐧 Qwen", "qwen"),
|
231 |
("🦙 Llama", "llama"),
|
232 |
("🔷 Gemma", "google"),
|
233 |
+
("🌟 Mistral", "mistral"),
|
234 |
+
("🔧 Others", "others"),
|
235 |
],
|
236 |
+
value=["llama", "deepseek", "qwen", "google", "mistral", "others"],
|
237 |
label="",
|
238 |
elem_classes="architecture-filter",
|
239 |
container=False,
|
|
|
259 |
"",
|
260 |
["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
|
261 |
"Combined Score",
|
262 |
+
["llama", "deepseek", "qwen", "google", "mistral", "others"],
|
263 |
)
|
264 |
),
|
265 |
elem_id="leaderboard-table",
|
|
|
409 |
word-wrap: break-word;
|
410 |
}
|
411 |
|
412 |
+
.model-link {
|
413 |
+
color: #0066cc !important;
|
414 |
+
text-decoration: none !important;
|
415 |
+
font-weight: 500 !important;
|
416 |
+
transition: all 0.2s ease !important;
|
417 |
+
border-bottom: 1px solid transparent !important;
|
418 |
+
}
|
419 |
+
|
420 |
+
.model-link:hover {
|
421 |
+
color: #0052a3 !important;
|
422 |
+
border-bottom: 1px solid #0066cc !important;
|
423 |
+
background-color: rgba(0, 102, 204, 0.05) !important;
|
424 |
+
padding: 2px 4px !important;
|
425 |
+
border-radius: 4px !important;
|
426 |
+
margin: -2px -4px !important;
|
427 |
+
}
|
428 |
+
|
429 |
.size-cell {
|
430 |
text-align: center;
|
431 |
font-weight: 500;
|
|
|
472 |
background-color: #ffe6f0;
|
473 |
}
|
474 |
|
475 |
+
.mistral-row {
|
476 |
+
background-color: #faf5ff;
|
477 |
+
}
|
478 |
+
|
479 |
+
.mistral-row:hover {
|
480 |
+
background-color: #f3e8ff;
|
481 |
+
}
|
482 |
+
|
483 |
+
.others-row {
|
484 |
+
background-color: #f8fafc;
|
485 |
+
}
|
486 |
+
|
487 |
+
.others-row:hover {
|
488 |
+
background-color: #f1f5f9;
|
489 |
+
}
|
490 |
+
|
491 |
.size-filter {
|
492 |
margin-top: 10px;
|
493 |
}
|
|
|
629 |
box-shadow: 0 2px 4px rgba(236, 72, 153, 0.3) !important;
|
630 |
}
|
631 |
|
632 |
+
/* Mistral styling */
|
633 |
+
.architecture-filter label:nth-child(5) {
|
634 |
+
background: #faf5ff !important;
|
635 |
+
border-color: #d8b4fe !important;
|
636 |
+
color: #7c3aed !important;
|
637 |
+
}
|
638 |
+
|
639 |
+
.architecture-filter label:nth-child(5):has(input[type="checkbox"]:checked) {
|
640 |
+
background: #8b5cf6 !important;
|
641 |
+
border-color: #8b5cf6 !important;
|
642 |
+
color: white !important;
|
643 |
+
box-shadow: 0 2px 4px rgba(139, 92, 246, 0.3) !important;
|
644 |
+
}
|
645 |
+
|
646 |
+
/* Others styling */
|
647 |
+
.architecture-filter label:nth-child(6) {
|
648 |
+
background: #f8fafc !important;
|
649 |
+
border-color: #cbd5e1 !important;
|
650 |
+
color: #475569 !important;
|
651 |
+
}
|
652 |
+
|
653 |
+
.architecture-filter label:nth-child(6):has(input[type="checkbox"]:checked) {
|
654 |
+
background: #64748b !important;
|
655 |
+
border-color: #64748b !important;
|
656 |
+
color: white !important;
|
657 |
+
box-shadow: 0 2px 4px rgba(100, 116, 139, 0.3) !important;
|
658 |
+
}
|
659 |
+
|
660 |
/* Search and Filter Section Styling */
|
661 |
.search-input input {
|
662 |
border: 2px solid #e9ecef !important;
|
|
|
740 |
.size-filter {
|
741 |
margin-top: 10px;
|
742 |
}
|
743 |
+
|
744 |
+
/* Dark Mode Specific Styles */
|
745 |
+
@media (prefers-color-scheme: dark) {
|
746 |
+
.leaderboard-table {
|
747 |
+
background: #1f2937 !important;
|
748 |
+
color: #f9fafb !important;
|
749 |
+
}
|
750 |
+
|
751 |
+
.leaderboard-table th {
|
752 |
+
background-color: #374151 !important;
|
753 |
+
color: #f9fafb !important;
|
754 |
+
border-bottom: 2px solid #4b5563 !important;
|
755 |
+
}
|
756 |
+
|
757 |
+
.leaderboard-table td {
|
758 |
+
color: #f9fafb !important;
|
759 |
+
border-bottom: 1px solid #374151 !important;
|
760 |
+
}
|
761 |
+
|
762 |
+
.leaderboard-table tbody tr:hover {
|
763 |
+
background-color: #374151 !important;
|
764 |
+
}
|
765 |
+
|
766 |
+
.rank-cell {
|
767 |
+
background-color: #374151 !important;
|
768 |
+
color: #f9fafb !important;
|
769 |
+
}
|
770 |
+
|
771 |
+
.model-cell {
|
772 |
+
color: #f9fafb !important;
|
773 |
+
}
|
774 |
+
|
775 |
+
.size-cell {
|
776 |
+
color: #d1d5db !important;
|
777 |
+
}
|
778 |
+
|
779 |
+
.score-cell {
|
780 |
+
color: #f9fafb !important;
|
781 |
+
}
|
782 |
+
|
783 |
+
/* Dark mode row colors with better contrast */
|
784 |
+
.llama-row {
|
785 |
+
background-color: rgba(245, 158, 11, 0.1) !important;
|
786 |
+
}
|
787 |
+
|
788 |
+
.llama-row:hover {
|
789 |
+
background-color: rgba(245, 158, 11, 0.2) !important;
|
790 |
+
}
|
791 |
+
|
792 |
+
.deepseek-row {
|
793 |
+
background-color: rgba(59, 130, 246, 0.1) !important;
|
794 |
+
}
|
795 |
+
|
796 |
+
.deepseek-row:hover {
|
797 |
+
background-color: rgba(59, 130, 246, 0.2) !important;
|
798 |
+
}
|
799 |
+
|
800 |
+
.qwen-row {
|
801 |
+
background-color: rgba(34, 197, 94, 0.1) !important;
|
802 |
+
}
|
803 |
+
|
804 |
+
.qwen-row:hover {
|
805 |
+
background-color: rgba(34, 197, 94, 0.2) !important;
|
806 |
+
}
|
807 |
+
|
808 |
+
.google-row {
|
809 |
+
background-color: rgba(236, 72, 153, 0.2) !important;
|
810 |
+
}
|
811 |
+
|
812 |
+
.google-row:hover {
|
813 |
+
background-color: rgba(236, 72, 153, 0.2) !important;
|
814 |
+
}
|
815 |
+
|
816 |
+
.mistral-row {
|
817 |
+
background-color: rgba(139, 92, 246, 0.1) !important;
|
818 |
+
}
|
819 |
+
|
820 |
+
.mistral-row:hover {
|
821 |
+
background-color: rgba(139, 92, 246, 0.2) !important;
|
822 |
+
}
|
823 |
+
|
824 |
+
.others-row {
|
825 |
+
background-color: rgba(107, 114, 128, 0.1) !important;
|
826 |
+
}
|
827 |
+
|
828 |
+
.others-row:hover {
|
829 |
+
background-color: rgba(107, 114, 128, 0.2) !important;
|
830 |
+
}
|
831 |
+
|
832 |
+
.leaderboard-container {
|
833 |
+
border: 1px solid #4b5563 !important;
|
834 |
+
}
|
835 |
+
|
836 |
+
.model-cell {
|
837 |
+
color: #f9fafb !important;
|
838 |
+
}
|
839 |
+
|
840 |
+
.model-link {
|
841 |
+
color: #60a5fa !important;
|
842 |
+
}
|
843 |
+
|
844 |
+
.model-link:hover {
|
845 |
+
color: #93c5fd !important;
|
846 |
+
border-bottom: 1px solid #60a5fa !important;
|
847 |
+
background-color: rgba(96, 165, 250, 0.1) !important;
|
848 |
+
}
|
849 |
+
|
850 |
+
.size-cell {
|
851 |
+
color: #d1d5db !important;
|
852 |
+
}
|
853 |
+
}
|
854 |
"""
|
855 |
|
856 |
# Launch the app
|