oobabooga commited on
Commit
3fd6ae9
·
verified ·
1 Parent(s): b09c07f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -20
app.py CHANGED
@@ -1,10 +1,12 @@
1
- import gradio as gr
2
- from math import exp
3
  import re
4
  import struct
5
- import requests
6
- import io
7
  from enum import IntEnum
 
 
 
 
 
8
 
9
 
10
  class GGUFValueType(IntEnum):
@@ -274,28 +276,27 @@ def estimate_vram(metadata, gpu_layers, ctx_size, cache_type):
274
  gpu_layers = n_layers
275
 
276
  # Convert cache_type to numeric
277
- cache_type_map = {'fp16': 16, 'q8_0': 8, 'q4_0': 4}
278
- cache_type_numeric = cache_type_map.get(cache_type, 16)
 
 
 
 
279
 
280
  # Derived features
281
  size_per_layer = size_in_mb / max(n_layers, 1e-6)
282
- context_per_layer = context_length / max(n_layers, 1e-6)
283
- ffn_per_embedding = feed_forward_dim / max(embedding_dim, 1e-6)
284
- kv_cache_factor = n_kv_heads * cache_type_numeric * ctx_size
285
-
286
- # Helper function for smaller
287
- def smaller(x, y):
288
- return 1 if x < y else 0
289
 
290
  # Calculate VRAM using the model
 
291
  vram = (
292
- (size_per_layer - 21.19195204848197)
293
- * exp(0.0001047328491557063 * size_in_mb * smaller(ffn_per_embedding, 2.671096993407845))
294
- + 0.0006621544775632052 * context_per_layer
295
- + 3.34664386576376e-05 * kv_cache_factor
296
- ) * (1.363306170123392 + gpu_layers) + 1255.163594536052
297
 
298
- return max(0, vram) # Ensure non-negative result
299
 
300
  except Exception as e:
301
  print(f"Error in VRAM calculation: {e}")
@@ -310,7 +311,7 @@ def estimate_vram_wrapper(model_metadata, gpu_layers, ctx_size, cache_type):
310
  # Use cache_type directly (it's already a string from the radio button)
311
  try:
312
  result = estimate_vram(model_metadata, gpu_layers, ctx_size, cache_type)
313
- conservative = result + 906
314
  return f"""<div id="vram-info">
315
  <div>Expected VRAM usage: <span class="value">{result:.0f} MiB</span></div>
316
  <div>Safe estimate: <span class="value">{conservative:.0f} MiB</span> - 95% chance the VRAM is at most this.</div>
 
1
+ import io
 
2
  import re
3
  import struct
 
 
4
  from enum import IntEnum
5
+ from math import floor
6
+
7
+ import requests
8
+
9
+ import gradio as gr
10
 
11
 
12
  class GGUFValueType(IntEnum):
 
276
  gpu_layers = n_layers
277
 
278
  # Convert cache_type to numeric
279
+ if cache_type == 'q4_0':
280
+ cache_type = 4
281
+ elif cache_type == 'q8_0':
282
+ cache_type = 8
283
+ else:
284
+ cache_type = 16
285
 
286
  # Derived features
287
  size_per_layer = size_in_mb / max(n_layers, 1e-6)
288
+ kv_cache_factor = n_kv_heads * cache_type * ctx_size
289
+ embedding_per_context = embedding_dim / ctx_size
 
 
 
 
 
290
 
291
  # Calculate VRAM using the model
292
+ # Details: https://oobabooga.github.io/blog/posts/gguf-vram-formula/
293
  vram = (
294
+ (size_per_layer - 17.99552795246051 + 3.148552680382576e-05 * kv_cache_factor)
295
+ * (gpu_layers + max(0.9690636483914102, cache_type - (floor(50.77817218646521 * embedding_per_context) + 9.987899908205632)))
296
+ + 1516.522943869404
297
+ )
 
298
 
299
+ return vram
300
 
301
  except Exception as e:
302
  print(f"Error in VRAM calculation: {e}")
 
311
  # Use cache_type directly (it's already a string from the radio button)
312
  try:
313
  result = estimate_vram(model_metadata, gpu_layers, ctx_size, cache_type)
314
+ conservative = result + 577
315
  return f"""<div id="vram-info">
316
  <div>Expected VRAM usage: <span class="value">{result:.0f} MiB</span></div>
317
  <div>Safe estimate: <span class="value">{conservative:.0f} MiB</span> - 95% chance the VRAM is at most this.</div>