Upload index.html
Browse files- index.html +34 -16
index.html
CHANGED
|
@@ -51,10 +51,20 @@
|
|
| 51 |
}
|
| 52 |
|
| 53 |
const gguf_quants = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
"Q2_K": 3.35,
|
| 55 |
"Q3_K_S": 3.5,
|
| 56 |
"Q3_K_M": 3.91,
|
| 57 |
"Q3_K_L": 4.27,
|
|
|
|
| 58 |
"Q4_0": 4.55,
|
| 59 |
"Q4_K_S": 4.58,
|
| 60 |
"Q4_K_M": 4.85,
|
|
@@ -186,9 +196,6 @@
|
|
| 186 |
layer_size_el.innerText = layer_size.toFixed(2)
|
| 187 |
|
| 188 |
const context_dealloc = context_loc === "vram" ? (context_size / 2**30) : 0;
|
| 189 |
-
|
| 190 |
-
console.log(context_loc)
|
| 191 |
-
|
| 192 |
const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size)
|
| 193 |
|
| 194 |
const layers_offload_el = document.getElementById("layersoffload");
|
|
@@ -209,7 +216,7 @@
|
|
| 209 |
GGUF Model, Can I split it?
|
| 210 |
</h1>
|
| 211 |
<h3 class="font-semibold leading-6 text-gray-900">
|
| 212 |
-
Based on NyxKrage's LLM VRAM calculator
|
| 213 |
</h3>
|
| 214 |
</div>
|
| 215 |
<div class="flex flex-col gap-10">
|
|
@@ -434,18 +441,29 @@
|
|
| 434 |
<!-- Panel -->
|
| 435 |
<div
|
| 436 |
x-data="{ quants: [
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
x-ref="panel"
|
| 450 |
x-show="open"
|
| 451 |
x-transition.origin.top.left
|
|
|
|
| 51 |
}
|
| 52 |
|
| 53 |
const gguf_quants = {
|
| 54 |
+
"IQ1_S": 1.56,
|
| 55 |
+
"IQ1_M": 1.75,
|
| 56 |
+
"IQ2_XXS": 2.06,
|
| 57 |
+
"IQ2_XS": 2.31,
|
| 58 |
+
"IQ2_S": 2.5,
|
| 59 |
+
"IQ3_XXS": 3.06,
|
| 60 |
+
"IQ3_XS": 3.3,
|
| 61 |
+
"IQ3_S": 3.44,
|
| 62 |
+
"IQ3_M": 3.66,
|
| 63 |
"Q2_K": 3.35,
|
| 64 |
"Q3_K_S": 3.5,
|
| 65 |
"Q3_K_M": 3.91,
|
| 66 |
"Q3_K_L": 4.27,
|
| 67 |
+
"IQ4_XS": 4.25,
|
| 68 |
"Q4_0": 4.55,
|
| 69 |
"Q4_K_S": 4.58,
|
| 70 |
"Q4_K_M": 4.85,
|
|
|
|
| 196 |
layer_size_el.innerText = layer_size.toFixed(2)
|
| 197 |
|
| 198 |
const context_dealloc = context_loc === "vram" ? (context_size / 2**30) : 0;
|
|
|
|
|
|
|
|
|
|
| 199 |
const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size)
|
| 200 |
|
| 201 |
const layers_offload_el = document.getElementById("layersoffload");
|
|
|
|
| 216 |
GGUF Model, Can I split it?
|
| 217 |
</h1>
|
| 218 |
<h3 class="font-semibold leading-6 text-gray-900">
|
| 219 |
+
Based on <a href="https://huggingface.co/NyxKrage" style="color: blue;">NyxKrage</a>'s <a href="https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator" style="color: blue;">LLM VRAM calculator</a>
|
| 220 |
</h3>
|
| 221 |
</div>
|
| 222 |
<div class="flex flex-col gap-10">
|
|
|
|
| 441 |
<!-- Panel -->
|
| 442 |
<div
|
| 443 |
x-data="{ quants: [
|
| 444 |
+
'IQ1_S',
|
| 445 |
+
'IQ1_M',
|
| 446 |
+
'IQ2_XXS',
|
| 447 |
+
'IQ2_XS',
|
| 448 |
+
'IQ2_S',
|
| 449 |
+
'IQ3_XXS',
|
| 450 |
+
'IQ3_XS',
|
| 451 |
+
'IQ3_S',
|
| 452 |
+
'IQ3_M',
|
| 453 |
+
'Q2_K',
|
| 454 |
+
'Q3_K_S',
|
| 455 |
+
'Q3_K_M',
|
| 456 |
+
'Q3_K_L',
|
| 457 |
+
'IQ4_XS',
|
| 458 |
+
'Q4_0',
|
| 459 |
+
'Q4_K_S',
|
| 460 |
+
'Q4_K_M',
|
| 461 |
+
'Q5_0',
|
| 462 |
+
'Q5_K_S',
|
| 463 |
+
'Q5_K_M',
|
| 464 |
+
'Q6_K',
|
| 465 |
+
'Q8_0'
|
| 466 |
+
]}"
|
| 467 |
x-ref="panel"
|
| 468 |
x-show="open"
|
| 469 |
x-transition.origin.top.left
|