JoPmt's picture
Create app.py
cf317f8 verified
raw
history blame
2.08 kB
import gradio as gr
import os, re, gc, time, sys, subprocess, typing, shutil, json, datetime, tempfile, safetensors, torch, threading, spaces
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import Accelerator
from huggingface_hub import login
HF_TOKEN=os.environ.get('HF_TOKEN')
login(token=HF_TOKEN)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["ZERO_GPU_PATCH_TORCH_DEVICE"] = "True"
device = "cuda" if torch.cuda.is_available() else "cpu"
accelerator=Accelerator()
TOKENIZER = accelerator.prepare(AutoTokenizer.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct"))
MODEL = accelerator.prepare(AutoModelForCausalLM.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct", torch_dtype=torch.bfloat16,))
@spaces.GPU()
def plex(pmpt, input_text):
model_inputs=""
generated_text=""
prompt = f"{pmpt}. The code: {input_text}"
messages = [{"role": "system", "content": "Acting as an expert AI Web Development and Programming model. As an expert AI Web Development and Programming model, correct/modify/create/generate/complete the user provided code based on user request, output the code without comments. Only output code without additional prefixed or suffixed messages/comments/wrapping or additional text."}, {"role": "user", "content": prompt}]
text = TOKENIZER.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
model_inputs = TOKENIZER([text], return_tensors="pt", return_token_type_ids=False).to(device)
generated_text = ""
for new_text in TOKENIZER.decode(MODEL.generate(**model_inputs, max_new_tokens=8192, do_sample=True, temperature=0.7, top_k=30, top_p=0.8,)[0][len(model_inputs.input_ids[0]):], skip_special_tokens=True):
generated_text += new_text
yield generated_text.strip()
yield generated_text.strip()
with gr.Blocks() as iface:
ins=gr.Code(lines=30)
pmpt=gr.Textbox()
btn=gr.Button("Assist")
btn.click(plex,[pmpt,ins],[ins])
iface.queue()
iface.launch(ssr_mode=False,share=False)