import gradio as gr import os, re, gc, time, sys, subprocess, typing, shutil, json, datetime, tempfile, safetensors, torch, threading, spaces import numpy as np from transformers import AutoTokenizer, AutoModelForCausalLM from accelerate import Accelerator from huggingface_hub import login HF_TOKEN=os.environ.get('HF_TOKEN') login(token=HF_TOKEN) os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" os.environ["ZERO_GPU_PATCH_TORCH_DEVICE"] = "True" device = "cuda" if torch.cuda.is_available() else "cpu" accelerator=Accelerator() TOKENIZER = accelerator.prepare(AutoTokenizer.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct")) MODEL = accelerator.prepare(AutoModelForCausalLM.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct", torch_dtype=torch.bfloat16,)) @spaces.GPU() def plex(pmpt, input_text): model_inputs="" generated_text="" prompt = f"{pmpt}. The code: {input_text}" messages = [{"role": "system", "content": "Acting as an expert AI Web Development and Programming model. As an expert AI Web Development and Programming model, correct/modify/create/generate/complete the user provided code based on user request, output the code without comments. Only output code without additional prefixed or suffixed messages/comments/wrapping or additional text."}, {"role": "user", "content": prompt}] text = TOKENIZER.apply_chat_template(messages,tokenize=False,add_generation_prompt=True) model_inputs = TOKENIZER([text], return_tensors="pt", return_token_type_ids=False).to(device) generated_text = "" for new_text in TOKENIZER.decode(MODEL.generate(**model_inputs, max_new_tokens=8192, do_sample=True, temperature=0.7, top_k=30, top_p=0.8,)[0][len(model_inputs.input_ids[0]):], skip_special_tokens=True): generated_text += new_text yield generated_text.strip() yield generated_text.strip() with gr.Blocks() as iface: ins=gr.Code(lines=30,label="Paste the code you'd like to modify/fix/extend/edit here.. or start coding.. get assistance when/if required.") pmpt=gr.Textbox(label="Prompt") btn=gr.Button("Assist") btn.click(plex,[pmpt,ins],[ins]) iface.queue() iface.launch(ssr_mode=False,share=False)