astonn commited on
Commit
47431b1
·
verified ·
1 Parent(s): 36bd0db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -5,13 +5,13 @@ from huggingface_hub import hf_hub_download
5
  from llama_cpp import Llama # GGUF inference on CPU
6
 
7
  # ---------- model loading (done once at startup) ----------
8
- MODEL_REPO = "TheBloke/phi-2-GGUF" # fully open 2.7 B model
9
- MODEL_FILE = "phi-2.Q4_K_M.gguf" # 4‑bit, 3.5 GB RAM
10
  CTX_SIZE = 2048 # ample for prompt+answer
11
 
12
- model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
13
- llm = Llama(model_path=model_path,
14
- n_ctx=CTX_SIZE,
15
  n_threads=os.cpu_count() or 2) # use all CPUs
16
 
17
  # ---------- analysis + generation ----------
 
5
  from llama_cpp import Llama # GGUF inference on CPU
6
 
7
  # ---------- model loading (done once at startup) ----------
8
+ MODEL_REPO = "TheBloke/gemma-2b-it-GGUF" # fully open 2.7 B model
9
+ MODEL_FILE = "gemma-2b-it.Q4_K_M.gguf" # 4‑bit, 3.5 GB RAM
10
  CTX_SIZE = 2048 # ample for prompt+answer
11
 
12
+ llm = Llama(model_path=hf_hub_download(repo_id=MODEL_REPO,
13
+ filename=MODEL_FILE),
14
+ n_ctx=1024, # 512‑1024 достаточно
15
  n_threads=os.cpu_count() or 2) # use all CPUs
16
 
17
  # ---------- analysis + generation ----------