TobDeBer commited on
Commit
1dfbab6
·
verified ·
1 Parent(s): 31c9425

t3 binaries

Browse files
Files changed (1) hide show
  1. app.py +3 -10
app.py CHANGED
@@ -18,7 +18,7 @@ SYS_PROMPT = f"""Today's Date: {today_date}.
18
  You are Granite, developed by IBM. You are a helpful AI assistant"""
19
  TITLE = "IBM Granite 4 Micro served from local GGUF server"
20
  DESCRIPTION = """
21
- <p>Granite 4 Micro is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
22
  <span class="gr_docs_link">
23
  <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
24
  </span>
@@ -42,13 +42,6 @@ except FileNotFoundError:
42
 
43
  print(f"Detected platform {platform}")
44
 
45
- # login to HF with space secret and download gguf and executable
46
- #hf_token = os.getenv("HF_TOKEN") # Set this in your environment before running
47
- #if hf_token:
48
- # login(token=hf_token)
49
- #else:
50
- # raise ValueError("Hugging Face token not found. Please set HF_TOKEN environment variable.")
51
-
52
  gguf_name = "granite-4.0-h-micro-UD-Q2_K_XL.gguf"
53
  gguf_path = hf_hub_download(
54
  repo_id="unsloth/granite-4.0-h-micro-GGUF",
@@ -57,7 +50,7 @@ gguf_path = hf_hub_download(
57
  )
58
 
59
  # set exe_name depending on platform
60
- exe_name = "llama-server-6343-cuda" if platform == "CUDA" else "llama-server-6343-blas"
61
  exe_path = hf_hub_download(
62
  repo_id="TobDeBer/Skipper",
63
  filename=exe_name,
@@ -66,7 +59,7 @@ exe_path = hf_hub_download(
66
 
67
  # start llama-server
68
  subprocess.run(["chmod", "+x", exe_name])
69
- command = ["./" + exe_name, "-m", gguf_name, "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
70
  process = subprocess.Popen(command)
71
  print(f"Llama-server process started with PID {process.pid}")
72
 
 
18
  You are Granite, developed by IBM. You are a helpful AI assistant"""
19
  TITLE = "IBM Granite 4 Micro served from local GGUF server"
20
  DESCRIPTION = """
21
+ <p>Granite 4 Micro is an open-source LLM supporting a 1M context window. This demo uses only 2K context and max 1K output tokens.
22
  <span class="gr_docs_link">
23
  <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
24
  </span>
 
42
 
43
  print(f"Detected platform {platform}")
44
 
 
 
 
 
 
 
 
45
  gguf_name = "granite-4.0-h-micro-UD-Q2_K_XL.gguf"
46
  gguf_path = hf_hub_download(
47
  repo_id="unsloth/granite-4.0-h-micro-GGUF",
 
50
  )
51
 
52
  # set exe_name depending on platform
53
+ exe_name = "llama-server-t3-6266-cuda" if platform == "CUDA" else "llama-server-t3-6268-blas"
54
  exe_path = hf_hub_download(
55
  repo_id="TobDeBer/Skipper",
56
  filename=exe_name,
 
59
 
60
  # start llama-server
61
  subprocess.run(["chmod", "+x", exe_name])
62
+ command = ["./" + exe_name, "-m", gguf_name, "-c", "2048", "--port", "8081"]
63
  process = subprocess.Popen(command)
64
  print(f"Llama-server process started with PID {process.pid}")
65