Spaces:

Bton
/

llama3_product-reviewer

Sleeping

App Files Files Community

Bton commited on Jun 8

Commit

6d6882f

verified ·

1 Parent(s): ea0454c

Update app.py

Browse files

adjusting strat

Files changed (1) hide show

app.py +49 -65

app.py CHANGED Viewed

@@ -1,74 +1,58 @@
-import os
-import json
 import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-import spaces
-# Hugging Face model repo + filename
-REPO_ID = "Bton/llama3-product-reviewer"
-FILENAME = "unsloth.Q4_K_M.gguf"
-# ✅ Download model once (cached)
-model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=".")
-# ✅ GPU-accelerated review generation
-@spaces.GPU
-def generate_review(title, price, rating, about):
-    # 🧠 Load model on GPU only when function is called
-    llm = Llama(
-        model_path=model_path,
-        n_ctx=1024,
-        n_batch=64,
-        n_gpu_layers=-1,  # Offload all to GPU
-        use_mlock=False,
-        verbose=False
-    )
-    product_data = {
-        "product_title": title,
-        "price": price,
-        "rating": rating,
-        "about_this_item": about
-    }
-    # ⚠️ DO NOT MODIFY PROMPT FORMAT – it's finetuned
-    prompt = (
-        "Write a helpful and natural-sounding customer review in JSON format with two fields: "
-        "\"title\" and \"review\" for the product below.\n\n"
-        f"{json.dumps(product_data, ensure_ascii=False)}"
-    )
-    response = llm(prompt, max_tokens=512)
-    raw = response["choices"][0]["text"]
-    try:
-        json_start = raw.find("{")
-        review_data = json.loads(raw[json_start:])
-        return review_data.get("title", "Untitled"), review_data.get("review", raw.strip())
-    except Exception:
-        return "Error", raw.strip()
-# 🖥️ Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("## 📝 LLaMA3 Product Review Generator (ZeroGPU 🚀)")
-    with gr.Row():
-        title = gr.Textbox(label="Product Title", placeholder="Ergonomic Office Chair")
-        price = gr.Textbox(label="Price", placeholder="$129.99")
-        rating = gr.Textbox(label="Rating", placeholder="4.6 out of 5 stars")
-    about = gr.Textbox(
-        label="About This Item",
-        placeholder="• Breathable mesh back\n• Adjustable lumbar support\n• Height-adjustable armrests",
-        lines=4
-    )
-    btn = gr.Button("Generate Review")
-    out_title = gr.Textbox(label="Generated Title")
-    out_review = gr.Textbox(label="Generated Review", lines=5)
-    btn.click(generate_review, inputs=[title, price, rating, about], outputs=[out_title, out_review])
-demo.launch()

 import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+# Download your GGUF model
+model_path = hf_hub_download(
+    repo_id="Bton/llama3-product-reviewer",
+    filename="unsloth.Q4_K_M.gguf",
+    local_dir="."
+)
+# Load model with chatml formatting
+llm = Llama(
+    model_path=model_path,
+    chat_format="chatml",
+    n_ctx=4096,
+    n_threads=4,
+    n_gpu_layers=-1,
+    use_mlock=False,
+    verbose=False
+)
+def generate_response(message, history, system_message, max_tokens, temperature, top_p):
+    messages = []
+    if system_message.strip():
+        messages.append({"role": "system", "content": system_message})
+    if history:
+        messages += history
+    messages.append({"role": "user", "content": message})
+    response_text = ""
+    for chunk in llm.create_chat_completion(
+        messages=messages,
+        stream=True,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        if "content" in chunk["choices"][0]["delta"]:
+            response_text += chunk["choices"][0]["delta"]["content"]
+        yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": response_text}], ""
 with gr.Blocks() as demo:
+    gr.Markdown("## Bton/llama3-product-reviewer")
+    chatbot = gr.Chatbot(label="Chat")
+    msg = gr.Textbox(placeholder="Type your message...", label="Message")
+    with gr.Accordion("⚙️ Advanced", open=False):
+        system_msg = gr.Textbox(value="You are a helpful assistant.", label="System Message")
+        max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="Max tokens")
+        temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
+        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top-p")
+    def chat_submit(message, chat_history, system_message, max_tokens, temperature, top_p):
+        yield from generate_response(message, chat_history, system_message, max_tokens, temperature, top_p)
+    msg.submit(chat_submit, [msg, chatbot, system_msg, max_tokens, temperature, top_p], [chatbot, msg])
+    demo.launch()