Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,13 +5,13 @@ from huggingface_hub import hf_hub_download
|
|
5 |
from llama_cpp import Llama # GGUF inference on CPU
|
6 |
|
7 |
# ---------- model loading (done once at startup) ----------
|
8 |
-
MODEL_REPO = "TheBloke/
|
9 |
-
MODEL_FILE = "
|
10 |
CTX_SIZE = 2048 # ample for prompt+answer
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
n_ctx=
|
15 |
n_threads=os.cpu_count() or 2) # use all CPUs
|
16 |
|
17 |
# ---------- analysis + generation ----------
|
|
|
5 |
from llama_cpp import Llama # GGUF inference on CPU
|
6 |
|
7 |
# ---------- model loading (done once at startup) ----------
|
8 |
+
MODEL_REPO = "TheBloke/gemma-2b-it-GGUF" # fully open 2.7 B model
|
9 |
+
MODEL_FILE = "gemma-2b-it.Q4_K_M.gguf" # 4‑bit, 3.5 GB RAM
|
10 |
CTX_SIZE = 2048 # ample for prompt+answer
|
11 |
|
12 |
+
llm = Llama(model_path=hf_hub_download(repo_id=MODEL_REPO,
|
13 |
+
filename=MODEL_FILE),
|
14 |
+
n_ctx=1024, # 512‑1024 достаточно
|
15 |
n_threads=os.cpu_count() or 2) # use all CPUs
|
16 |
|
17 |
# ---------- analysis + generation ----------
|