Spaces:
Running
on
Zero
Running
on
Zero
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os, re, gc, time, sys, subprocess, typing, shutil, json, datetime, tempfile, safetensors, torch, threading, spaces
|
3 |
+
import numpy as np
|
4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
+
from accelerate import Accelerator
|
6 |
+
from huggingface_hub import login
|
7 |
+
HF_TOKEN=os.environ.get('HF_TOKEN')
|
8 |
+
login(token=HF_TOKEN)
|
9 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
10 |
+
os.environ["ZERO_GPU_PATCH_TORCH_DEVICE"] = "True"
|
11 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
12 |
+
accelerator=Accelerator()
|
13 |
+
|
14 |
+
TOKENIZER = accelerator.prepare(AutoTokenizer.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct"))
|
15 |
+
MODEL = accelerator.prepare(AutoModelForCausalLM.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct", torch_dtype=torch.bfloat16,))
|
16 |
+
|
17 |
+
@spaces.GPU()
|
18 |
+
def plex(pmpt, input_text):
|
19 |
+
model_inputs=""
|
20 |
+
generated_text=""
|
21 |
+
prompt = f"{pmpt}. The code: {input_text}"
|
22 |
+
messages = [{"role": "system", "content": "Acting as an expert AI Web Development and Programming model. As an expert AI Web Development and Programming model, correct/modify/create/generate/complete the user provided code based on user request, output the code without comments. Only output code without additional prefixed or suffixed messages/comments/wrapping or additional text."}, {"role": "user", "content": prompt}]
|
23 |
+
text = TOKENIZER.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
|
24 |
+
model_inputs = TOKENIZER([text], return_tensors="pt", return_token_type_ids=False).to(device)
|
25 |
+
generated_text = ""
|
26 |
+
for new_text in TOKENIZER.decode(MODEL.generate(**model_inputs, max_new_tokens=8192, do_sample=True, temperature=0.7, top_k=30, top_p=0.8,)[0][len(model_inputs.input_ids[0]):], skip_special_tokens=True):
|
27 |
+
generated_text += new_text
|
28 |
+
yield generated_text.strip()
|
29 |
+
yield generated_text.strip()
|
30 |
+
|
31 |
+
with gr.Blocks() as iface:
|
32 |
+
ins=gr.Code(lines=30)
|
33 |
+
pmpt=gr.Textbox()
|
34 |
+
btn=gr.Button("Assist")
|
35 |
+
btn.click(plex,[pmpt,ins],[ins])
|
36 |
+
iface.queue()
|
37 |
+
iface.launch(ssr_mode=False,share=False)
|