import gradio as gr
import os, re, gc, time, sys, subprocess, typing, shutil, json, datetime, tempfile, safetensors, torch, threading, spaces
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import Accelerator
from huggingface_hub import login
HF_TOKEN=os.environ.get('HF_TOKEN')
login(token=HF_TOKEN)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["ZERO_GPU_PATCH_TORCH_DEVICE"] = "True"
device = "cuda" if torch.cuda.is_available() else "cpu"
accelerator=Accelerator()

TOKENIZER = accelerator.prepare(AutoTokenizer.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct"))
MODEL = accelerator.prepare(AutoModelForCausalLM.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct", torch_dtype=torch.bfloat16,))

@spaces.GPU()
def plex(pmpt, input_text):
    model_inputs=""
    generated_text=""
    prompt = f"{pmpt}. The code: {input_text}"    
    messages = [{"role": "system", "content": "Acting as an expert AI Web Development and Programming model. As an expert AI Web Development and Programming model, correct/modify/create/generate/complete the user provided code based on user request, output the code without comments. Only output code without additional prefixed or suffixed messages/comments/wrapping or additional text."}, {"role": "user", "content": prompt}]
    text = TOKENIZER.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
    model_inputs = TOKENIZER([text], return_tensors="pt", return_token_type_ids=False).to(device)
    generated_text = ""
    for new_text in TOKENIZER.decode(MODEL.generate(**model_inputs, max_new_tokens=8192, do_sample=True, temperature=0.7, top_k=30, top_p=0.8,)[0][len(model_inputs.input_ids[0]):], skip_special_tokens=True):
        generated_text += new_text
        yield generated_text.strip()
    yield generated_text.strip()

with gr.Blocks() as iface:
    ins=gr.Code(lines=30,label="Paste the code you'd like to modify/fix/extend/edit here.. or start coding.. get assistance when/if required.")
    pmpt=gr.Textbox(label="Prompt")
    btn=gr.Button("Assist")
    btn.click(plex,[pmpt,ins],[ins])
iface.queue()
iface.launch(ssr_mode=False,share=False)