Bton's picture
Update app.py
38eb8fc verified
import os
import gradio as gr
import spaces
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Model setup
REPO_ID = "Bton/llama3-product-reviewer"
FILENAME = "unsloth.Q4_K_M.gguf"
# Download GGUF model
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=".")
# Load GGUF model (copying stable params from NanoLLaVA)
llm = Llama(
model_path=model_path,
chat_format="chatml",
n_ctx=2048,
n_gpu_layers=35, # Avoid using -1 unless you're on big GPU
flash_attn=True,
use_mlock=False,
n_batch=512
)
# βœ… Use @spaces.GPU to avoid timeout on Spaces
@spaces.GPU(duration=30)
def generate_review(title, price, rating, about):
prompt = f"""Write a helpful and natural-sounding customer review in JSON format with two fields: "review_title" and "review_body" for the product below.
Product Title: {title}
Rating: {rating}
Price: {price}
About This Item: {about}
"""
response = llm.create_chat_completion(
messages=[{"role": "user", "content": prompt}],
stream=True
)
full_text = ""
for chunk in response:
if "content" in chunk["choices"][0]["delta"]:
token = chunk["choices"][0]["delta"]["content"]
full_text += token
yield full_text
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# πŸ›’ LLaMA Product Review Generator")
with gr.Row():
with gr.Column():
title = gr.Textbox(label="Product Title", placeholder="e.g. Ergonomic Mesh Office Chair")
price = gr.Textbox(label="Price", placeholder="e.g. $129.99")
rating = gr.Textbox(label="Rating", placeholder="e.g. 4.6 out of 5 stars")
about = gr.Textbox(label="About This Item", lines=4, placeholder="e.g. Breathable mesh, adjustable lumbar support...")
submit = gr.Button("Generate Review")
with gr.Column():
output = gr.TextArea(label="Generated Review", lines=20)
submit.click(fn=generate_review, inputs=[title, price, rating, about], outputs=output)
demo.queue().launch()