Spaces:
Runtime error
Runtime error
""" | |
CodeNyx β zero-config Gradio chatbot | |
Auto-handles Hugging Face tokens without stdin prompts. | |
""" | |
import os | |
import gradio as gr | |
import torch | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForCausalLM, | |
BitsAndBytesConfig, | |
TextIteratorStreamer | |
) | |
from huggingface_hub import login | |
from threading import Thread | |
MODEL_ID = "bigcode/starcoder2-3b-instruct" | |
BOT_NAME = "CodeNyx" | |
SYSTEM_PROMPT = (f"You are {BOT_NAME}, an expert open-source coding assistant. " | |
"Always provide concise, runnable code snippets with short explanations.") | |
MAX_NEW_TOK = 1024 | |
TEMPERATURE = 0.2 | |
TOP_P = 0.9 | |
# ------------------------------------------------------------------ | |
# 1. Token helper (Gradio popup instead of stdin) | |
# ------------------------------------------------------------------ | |
def ensure_token(token_value): | |
""" | |
token_value comes from the Gradio UI the first time. | |
We cache it in environment variable HF_TOKEN and login once. | |
Returns True on success. | |
""" | |
token = token_value.strip() | |
if not token: | |
return False | |
os.environ["HF_TOKEN"] = token | |
login(token) | |
return True | |
# ------------------------------------------------------------------ | |
# 2. Lazy model loader (once token is ready) | |
# ------------------------------------------------------------------ | |
model, tokenizer = None, None | |
def load_model(): | |
global model, tokenizer | |
if model is not None: | |
return True # already loaded | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_compute_dtype=torch.float16, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_use_double_quant=True, | |
) | |
tokenizer = AutoTokenizer.from_pretrained( | |
MODEL_ID, | |
use_auth_token=os.getenv("HF_TOKEN"), | |
trust_remote_code=True, | |
) | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_ID, | |
quantization_config=bnb_config, | |
device_map="auto", | |
use_auth_token=os.getenv("HF_TOKEN"), | |
trust_remote_code=True, | |
) | |
return True | |
# ------------------------------------------------------------------ | |
# 3. Chat logic | |
# ------------------------------------------------------------------ | |
def build_prompt(history, user_input): | |
messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
for human, ai in history: | |
messages.append({"role": "user", "content": human}) | |
messages.append({"role": "assistant", "content": ai}) | |
messages.append({"role": "user", "content": user_input}) | |
return tokenizer.apply_chat_template( | |
messages, tokenize=False, add_generation_prompt=True | |
) | |
def user_turn(user_message, history): | |
return "", history + [[user_message, ""]] | |
def bot_turn(history): | |
prompt = build_prompt(history[:-1], history[-1][0]) | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
streamer = TextIteratorStreamer( | |
tokenizer, | |
skip_prompt=True, | |
skip_special_tokens=True, | |
) | |
gen_kwargs = dict( | |
**inputs, | |
max_new_tokens=MAX_NEW_TOK, | |
temperature=TEMPERATURE, | |
top_p=TOP_P, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id, | |
streamer=streamer, | |
) | |
Thread(target=model.generate, kwargs=gen_kwargs).start() | |
partial = "" | |
for new_text in streamer: | |
partial += new_text | |
history[-1][1] = partial | |
yield history | |
# ------------------------------------------------------------------ | |
# 4. Gradio UI flow | |
# ------------------------------------------------------------------ | |
with gr.Blocks(title=f"{BOT_NAME} β AI Pair-Programmer") as demo: | |
gr.Markdown(f""" | |
# π€ {BOT_NAME} β AI Pair-Programmer | |
*3 B params, 100 % free-tier friendly.* | |
Paste your Hugging Face token **once** if asked, then chat away. | |
""") | |
token_box = gr.Textbox(label="π€ Hugging Face Token (only first time)", type="password") | |
load_btn = gr.Button("Authorize") | |
status_lbl = gr.Label(value="Waiting for token β¦") | |
# --- main chat controls (hidden until token OK) --- | |
with gr.Column(visible=False) as chat_col: | |
chatbot = gr.Chatbot(height=450) | |
with gr.Row(): | |
msg = gr.Textbox( | |
placeholder="Ask me to write / debug / explain code β¦", | |
lines=2, | |
scale=8, | |
show_label=False, | |
container=False, | |
) | |
send_btn = gr.Button("Send", scale=1, variant="primary") | |
clear_btn = gr.Button("ποΈ Clear") | |
# ------------------------------------------------------------------ | |
# 5. Event wiring | |
# ------------------------------------------------------------------ | |
def _auth(token): | |
ok = ensure_token(token) | |
if ok: | |
try: | |
load_model() | |
return gr.update(visible=False), gr.update(visible=True), "β Ready! Start coding." | |
except Exception as e: | |
return gr.update(visible=True), gr.update(visible=False), f"β Error: {e}" | |
else: | |
return gr.update(visible=True), gr.update(visible=False), "β Invalid token." | |
load_btn.click(_auth, token_box, [token_box, chat_col, status_lbl]) | |
def _send(user_msg, hist): | |
return user_turn(user_msg, hist) | |
def _bot(hist): | |
yield from bot_turn(hist) | |
msg.submit(_send, [msg, chatbot], [msg, chatbot]).then(_bot, chatbot, chatbot) | |
send_btn.click(_send, [msg, chatbot], [msg, chatbot]).then(_bot, chatbot, chatbot) | |
clear_btn.click(lambda: None, None, chatbot) | |
demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=True) |