astonn commited on
Commit
a63b1ed
·
verified ·
1 Parent(s): 47431b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -5,14 +5,14 @@ from huggingface_hub import hf_hub_download
5
  from llama_cpp import Llama # GGUF inference on CPU
6
 
7
  # ---------- model loading (done once at startup) ----------
8
- MODEL_REPO = "TheBloke/gemma-2b-it-GGUF" # fully open 2.7 B model
9
  MODEL_FILE = "gemma-2b-it.Q4_K_M.gguf" # 4‑bit, 3.5 GB RAM
10
- CTX_SIZE = 2048 # ample for prompt+answer
11
 
12
- llm = Llama(model_path=hf_hub_download(repo_id=MODEL_REPO,
13
- filename=MODEL_FILE),
14
- n_ctx=1024, # 512‑1024 достаточно
15
- n_threads=os.cpu_count() or 2) # use all CPUs
16
 
17
  # ---------- analysis + generation ----------
18
  def analyze_ads(file):
 
5
  from llama_cpp import Llama # GGUF inference on CPU
6
 
7
  # ---------- model loading (done once at startup) ----------
8
+ MODEL_REPO = "MaziyarPanahi/gemma-2b-it-GGUF" # fully open 2.7 B model
9
  MODEL_FILE = "gemma-2b-it.Q4_K_M.gguf" # 4‑bit, 3.5 GB RAM
10
+ CTX_SIZE = 1024 # ample for prompt+answer
11
 
12
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
13
+ llm = Llama(model_path=model_path,
14
+ n_ctx=CTX_SIZE,
15
+ n_threads=os.cpu_count() or 2)
16
 
17
  # ---------- analysis + generation ----------
18
  def analyze_ads(file):