Spaces:
Runtime error
Runtime error
import spaces | |
import gradio as gr | |
import torch | |
from transformers import AutoModel, AutoTokenizer | |
# Load model and tokenizer | |
model_path = "apple/DiffuCoder-7B-cpGRPO" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model = AutoModel.from_pretrained( | |
model_path, | |
torch_dtype=torch.bfloat16, | |
trust_remote_code=True | |
).to(device).eval() | |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
def generate_code(query, temperature=0.4, top_p=0.95, max_new_tokens=256): | |
# Format prompt using chat template | |
prompt = f"""<|im_start|>system | |
You are a helpful coding assistant.<|im_end|> | |
<|im_start|>user | |
{query.strip()}<|im_end|> | |
<|im_start|>assistant | |
""" | |
inputs = tokenizer(prompt, return_tensors="pt") | |
input_ids = inputs.input_ids.to(device) | |
attention_mask = inputs.attention_mask.to(device) | |
# Generate with token streaming | |
TOKEN_PER_STEP = 1 | |
steps = max_new_tokens // TOKEN_PER_STEP | |
full_output = "" | |
for _ in range(steps): | |
output = model.diffusion_generate( | |
input_ids, | |
attention_mask=attention_mask, | |
max_new_tokens=TOKEN_PER_STEP, | |
output_history=True, | |
return_dict_in_generate=True, | |
steps=1, | |
temperature=temperature, | |
top_p=top_p, | |
alg="entropy", | |
alg_temp=0., | |
) | |
# Decode new tokens | |
new_tokens = tokenizer.decode( | |
output.sequences[0, -TOKEN_PER_STEP:].tolist(), | |
skip_special_tokens=True | |
) | |
# Update input for next step | |
input_ids = output.sequences | |
attention_mask = torch.cat([ | |
attention_mask, | |
torch.ones(1, 1, dtype=attention_mask.dtype, device=device) | |
], dim=1) | |
# Append to full output and stream | |
full_output += new_tokens | |
yield full_output.split('<|dlm_pad|>')[0].strip() | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=generate_code, | |
inputs=[ | |
gr.Textbox(label="Code Request", lines=3, | |
placeholder="Describe the code you want..."), | |
gr.Slider(0.1, 1.0, value=0.4, label="Temperature"), | |
gr.Slider(0.5, 1.0, value=0.95, label="Top-p"), | |
gr.Slider(32, 512, value=256, step=32, label="Max Tokens") | |
], | |
outputs=gr.Textbox(label="Generated Code", lines=10), | |
title="π§ DiffuCoder Code Generator", | |
description="Generate code with Apple's DiffuCoder-7B model", | |
examples=[ | |
["Write a Python function to calculate factorial"], | |
["Create a function to merge two sorted lists"], | |
["How to reverse a string in JavaScript?"] | |
] | |
) | |
# Run the demo | |
if __name__ == "__main__": | |
demo.queue().launch() |