CodeNyx / app.py
AryanRathod3097's picture
Update app.py
20fc52e verified
raw
history blame
5.77 kB
"""
CodeNyx – zero-config Gradio chatbot
Auto-handles Hugging Face tokens without stdin prompts.
"""
import os
import gradio as gr
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
TextIteratorStreamer
)
from huggingface_hub import login
from threading import Thread
MODEL_ID = "bigcode/starcoder2-3b-instruct"
BOT_NAME = "CodeNyx"
SYSTEM_PROMPT = (f"You are {BOT_NAME}, an expert open-source coding assistant. "
"Always provide concise, runnable code snippets with short explanations.")
MAX_NEW_TOK = 1024
TEMPERATURE = 0.2
TOP_P = 0.9
# ------------------------------------------------------------------
# 1. Token helper (Gradio popup instead of stdin)
# ------------------------------------------------------------------
def ensure_token(token_value):
"""
token_value comes from the Gradio UI the first time.
We cache it in environment variable HF_TOKEN and login once.
Returns True on success.
"""
token = token_value.strip()
if not token:
return False
os.environ["HF_TOKEN"] = token
login(token)
return True
# ------------------------------------------------------------------
# 2. Lazy model loader (once token is ready)
# ------------------------------------------------------------------
model, tokenizer = None, None
def load_model():
global model, tokenizer
if model is not None:
return True # already loaded
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
)
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
use_auth_token=os.getenv("HF_TOKEN"),
trust_remote_code=True,
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
quantization_config=bnb_config,
device_map="auto",
use_auth_token=os.getenv("HF_TOKEN"),
trust_remote_code=True,
)
return True
# ------------------------------------------------------------------
# 3. Chat logic
# ------------------------------------------------------------------
def build_prompt(history, user_input):
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for human, ai in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": ai})
messages.append({"role": "user", "content": user_input})
return tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
def user_turn(user_message, history):
return "", history + [[user_message, ""]]
def bot_turn(history):
prompt = build_prompt(history[:-1], history[-1][0])
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True,
)
gen_kwargs = dict(
**inputs,
max_new_tokens=MAX_NEW_TOK,
temperature=TEMPERATURE,
top_p=TOP_P,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
streamer=streamer,
)
Thread(target=model.generate, kwargs=gen_kwargs).start()
partial = ""
for new_text in streamer:
partial += new_text
history[-1][1] = partial
yield history
# ------------------------------------------------------------------
# 4. Gradio UI flow
# ------------------------------------------------------------------
with gr.Blocks(title=f"{BOT_NAME} – AI Pair-Programmer") as demo:
gr.Markdown(f"""
# πŸ€– {BOT_NAME} – AI Pair-Programmer
*3 B params, 100 % free-tier friendly.*
Paste your Hugging Face token **once** if asked, then chat away.
""")
token_box = gr.Textbox(label="πŸ€— Hugging Face Token (only first time)", type="password")
load_btn = gr.Button("Authorize")
status_lbl = gr.Label(value="Waiting for token …")
# --- main chat controls (hidden until token OK) ---
with gr.Column(visible=False) as chat_col:
chatbot = gr.Chatbot(height=450)
with gr.Row():
msg = gr.Textbox(
placeholder="Ask me to write / debug / explain code …",
lines=2,
scale=8,
show_label=False,
container=False,
)
send_btn = gr.Button("Send", scale=1, variant="primary")
clear_btn = gr.Button("πŸ—‘οΈ Clear")
# ------------------------------------------------------------------
# 5. Event wiring
# ------------------------------------------------------------------
def _auth(token):
ok = ensure_token(token)
if ok:
try:
load_model()
return gr.update(visible=False), gr.update(visible=True), "βœ… Ready! Start coding."
except Exception as e:
return gr.update(visible=True), gr.update(visible=False), f"❌ Error: {e}"
else:
return gr.update(visible=True), gr.update(visible=False), "❌ Invalid token."
load_btn.click(_auth, token_box, [token_box, chat_col, status_lbl])
def _send(user_msg, hist):
return user_turn(user_msg, hist)
def _bot(hist):
yield from bot_turn(hist)
msg.submit(_send, [msg, chatbot], [msg, chatbot]).then(_bot, chatbot, chatbot)
send_btn.click(_send, [msg, chatbot], [msg, chatbot]).then(_bot, chatbot, chatbot)
clear_btn.click(lambda: None, None, chatbot)
demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=True)