Spaces:

TobDeBer
/

Granite4MicroCPU

Running

App Files Files Community

TobDeBer commited on 30 days ago

Commit

1dfbab6

verified ·

1 Parent(s): 31c9425

t3 binaries

Browse files

Files changed (1) hide show

app.py +3 -10

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ SYS_PROMPT = f"""Today's Date: {today_date}.
 You are Granite, developed by IBM. You are a helpful AI assistant"""
 TITLE = "IBM Granite 4 Micro served from local GGUF server"
 DESCRIPTION = """
-<p>Granite 4 Micro is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
 <span class="gr_docs_link">
 <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
 </span>
@@ -42,13 +42,6 @@ except FileNotFoundError:
 print(f"Detected platform {platform}")
-# login to HF with space secret and download gguf and executable
-#hf_token = os.getenv("HF_TOKEN")  # Set this in your environment before running
-#if hf_token:
-#    login(token=hf_token)
-#else:
-#    raise ValueError("Hugging Face token not found. Please set HF_TOKEN environment variable.")
 gguf_name = "granite-4.0-h-micro-UD-Q2_K_XL.gguf"
 gguf_path = hf_hub_download(
             repo_id="unsloth/granite-4.0-h-micro-GGUF",
@@ -57,7 +50,7 @@ gguf_path = hf_hub_download(
 )
 # set exe_name depending on platform
-exe_name = "llama-server-6343-cuda" if platform == "CUDA" else "llama-server-6343-blas"
 exe_path = hf_hub_download(
             repo_id="TobDeBer/Skipper",
             filename=exe_name,
@@ -66,7 +59,7 @@ exe_path = hf_hub_download(
 # start llama-server
 subprocess.run(["chmod", "+x", exe_name])
-command = ["./" + exe_name, "-m", gguf_name, "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
 process = subprocess.Popen(command)
 print(f"Llama-server process started with PID {process.pid}")

 You are Granite, developed by IBM. You are a helpful AI assistant"""
 TITLE = "IBM Granite 4 Micro served from local GGUF server"
 DESCRIPTION = """
+<p>Granite 4 Micro is an open-source LLM supporting a 1M context window. This demo uses only 2K context and max 1K output tokens.
 <span class="gr_docs_link">
 <a href="https://www.ibm.com/granite/docs/">View Documentation <i class="fa fa-external-link"></i></a>
 </span>
 print(f"Detected platform {platform}")
 gguf_name = "granite-4.0-h-micro-UD-Q2_K_XL.gguf"
 gguf_path = hf_hub_download(
             repo_id="unsloth/granite-4.0-h-micro-GGUF",
 )
 # set exe_name depending on platform
+exe_name = "llama-server-t3-6266-cuda" if platform == "CUDA" else "llama-server-t3-6268-blas"
 exe_path = hf_hub_download(
             repo_id="TobDeBer/Skipper",
             filename=exe_name,
 # start llama-server
 subprocess.run(["chmod", "+x", exe_name])
+command = ["./" + exe_name, "-m", gguf_name, "-c", "2048", "--port", "8081"]
 process = subprocess.Popen(command)
 print(f"Llama-server process started with PID {process.pid}")