Spaces:

astonn
/

ad_analyzer

Sleeping

astonn commited on May 6

Commit

47431b1

verified ·

1 Parent(s): 36bd0db

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,13 +5,13 @@ from huggingface_hub import hf_hub_download
 from llama_cpp import Llama                         # GGUF inference on CPU
 # ---------- model loading (done once at startup) ----------
-MODEL_REPO  = "TheBloke/phi-2-GGUF"                 # fully open 2.7 B model
-MODEL_FILE  = "phi-2.Q4_K_M.gguf"                  # 4‑bit, 3.5 GB RAM
 CTX_SIZE    = 2048                                  # ample for prompt+answer
-model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
-llm = Llama(model_path=model_path,
-            n_ctx=CTX_SIZE,
             n_threads=os.cpu_count() or 2)          # use all CPUs
 # ---------- analysis + generation ----------

 from llama_cpp import Llama                         # GGUF inference on CPU
 # ---------- model loading (done once at startup) ----------
+MODEL_REPO  = "TheBloke/gemma-2b-it-GGUF"                 # fully open 2.7 B model
+MODEL_FILE  = "gemma-2b-it.Q4_K_M.gguf"                  # 4‑bit, 3.5 GB RAM
 CTX_SIZE    = 2048                                  # ample for prompt+answer
+llm = Llama(model_path=hf_hub_download(repo_id=MODEL_REPO,
+                                       filename=MODEL_FILE),
+            n_ctx=1024,          # 512‑1024 достаточно
             n_threads=os.cpu_count() or 2)          # use all CPUs
 # ---------- analysis + generation ----------