NextStep-1 / app.py
mrfakename's picture
Update app.py
9cc2d55 verified
raw
history blame
2.78 kB
import spaces
import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer
# Load model and tokenizer
model_path = "apple/DiffuCoder-7B-cpGRPO"
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModel.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
trust_remote_code=True
).to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
@spaces.GPU
def generate_code(query, temperature=0.4, top_p=0.95, max_new_tokens=256):
# Format prompt using chat template
prompt = f"""<|im_start|>system
You are a helpful coding assistant.<|im_end|>
<|im_start|>user
{query.strip()}<|im_end|>
<|im_start|>assistant
"""
inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device)
# Generate with token streaming
TOKEN_PER_STEP = 1
steps = max_new_tokens // TOKEN_PER_STEP
full_output = ""
for _ in range(steps):
output = model.diffusion_generate(
input_ids,
attention_mask=attention_mask,
max_new_tokens=TOKEN_PER_STEP,
output_history=True,
return_dict_in_generate=True,
steps=1,
temperature=temperature,
top_p=top_p,
alg="entropy",
alg_temp=0.,
)
# Decode new tokens
new_tokens = tokenizer.decode(
output.sequences[0, -TOKEN_PER_STEP:].tolist(),
skip_special_tokens=True
)
# Update input for next step
input_ids = output.sequences
attention_mask = torch.cat([
attention_mask,
torch.ones(1, 1, dtype=attention_mask.dtype, device=device)
], dim=1)
# Append to full output and stream
full_output += new_tokens
yield full_output.split('<|dlm_pad|>')[0].strip()
# Create Gradio interface
demo = gr.Interface(
fn=generate_code,
inputs=[
gr.Textbox(label="Code Request", lines=3,
placeholder="Describe the code you want..."),
gr.Slider(0.1, 1.0, value=0.4, label="Temperature"),
gr.Slider(0.5, 1.0, value=0.95, label="Top-p"),
gr.Slider(32, 512, value=256, step=32, label="Max Tokens")
],
outputs=gr.Textbox(label="Generated Code", lines=10),
title="🧠 DiffuCoder Code Generator",
description="Generate code with Apple's DiffuCoder-7B model",
examples=[
["Write a Python function to calculate factorial"],
["Create a function to merge two sorted lists"],
["How to reverse a string in JavaScript?"]
]
)
# Run the demo
if __name__ == "__main__":
demo.queue().launch()