Spaces:
Runtime error
Runtime error
File size: 5,765 Bytes
6f77195 20fc52e 6f77195 aacf87a 9e2faf0 6f77195 9e2faf0 6f77195 9e2faf0 20fc52e 9e2faf0 20fc52e 9e2faf0 20fc52e 9e2faf0 20fc52e 9e2faf0 20fc52e 9e2faf0 20fc52e 9e2faf0 20fc52e 6f77195 20fc52e 6f77195 20fc52e 6f77195 20fc52e 6f77195 20fc52e 6f77195 aacf87a 6f77195 aacf87a 6f77195 aacf87a 6f77195 aacf87a 6f77195 9e2faf0 6f77195 aacf87a 6f77195 aacf87a 6f77195 aacf87a 6f77195 20fc52e 6f77195 20fc52e 6f77195 20fc52e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
"""
CodeNyx β zero-config Gradio chatbot
Auto-handles Hugging Face tokens without stdin prompts.
"""
import os
import gradio as gr
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
TextIteratorStreamer
)
from huggingface_hub import login
from threading import Thread
MODEL_ID = "bigcode/starcoder2-3b-instruct"
BOT_NAME = "CodeNyx"
SYSTEM_PROMPT = (f"You are {BOT_NAME}, an expert open-source coding assistant. "
"Always provide concise, runnable code snippets with short explanations.")
MAX_NEW_TOK = 1024
TEMPERATURE = 0.2
TOP_P = 0.9
# ------------------------------------------------------------------
# 1. Token helper (Gradio popup instead of stdin)
# ------------------------------------------------------------------
def ensure_token(token_value):
"""
token_value comes from the Gradio UI the first time.
We cache it in environment variable HF_TOKEN and login once.
Returns True on success.
"""
token = token_value.strip()
if not token:
return False
os.environ["HF_TOKEN"] = token
login(token)
return True
# ------------------------------------------------------------------
# 2. Lazy model loader (once token is ready)
# ------------------------------------------------------------------
model, tokenizer = None, None
def load_model():
global model, tokenizer
if model is not None:
return True # already loaded
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
)
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
use_auth_token=os.getenv("HF_TOKEN"),
trust_remote_code=True,
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
quantization_config=bnb_config,
device_map="auto",
use_auth_token=os.getenv("HF_TOKEN"),
trust_remote_code=True,
)
return True
# ------------------------------------------------------------------
# 3. Chat logic
# ------------------------------------------------------------------
def build_prompt(history, user_input):
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for human, ai in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": ai})
messages.append({"role": "user", "content": user_input})
return tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
def user_turn(user_message, history):
return "", history + [[user_message, ""]]
def bot_turn(history):
prompt = build_prompt(history[:-1], history[-1][0])
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True,
)
gen_kwargs = dict(
**inputs,
max_new_tokens=MAX_NEW_TOK,
temperature=TEMPERATURE,
top_p=TOP_P,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
streamer=streamer,
)
Thread(target=model.generate, kwargs=gen_kwargs).start()
partial = ""
for new_text in streamer:
partial += new_text
history[-1][1] = partial
yield history
# ------------------------------------------------------------------
# 4. Gradio UI flow
# ------------------------------------------------------------------
with gr.Blocks(title=f"{BOT_NAME} β AI Pair-Programmer") as demo:
gr.Markdown(f"""
# π€ {BOT_NAME} β AI Pair-Programmer
*3 B params, 100 % free-tier friendly.*
Paste your Hugging Face token **once** if asked, then chat away.
""")
token_box = gr.Textbox(label="π€ Hugging Face Token (only first time)", type="password")
load_btn = gr.Button("Authorize")
status_lbl = gr.Label(value="Waiting for token β¦")
# --- main chat controls (hidden until token OK) ---
with gr.Column(visible=False) as chat_col:
chatbot = gr.Chatbot(height=450)
with gr.Row():
msg = gr.Textbox(
placeholder="Ask me to write / debug / explain code β¦",
lines=2,
scale=8,
show_label=False,
container=False,
)
send_btn = gr.Button("Send", scale=1, variant="primary")
clear_btn = gr.Button("ποΈ Clear")
# ------------------------------------------------------------------
# 5. Event wiring
# ------------------------------------------------------------------
def _auth(token):
ok = ensure_token(token)
if ok:
try:
load_model()
return gr.update(visible=False), gr.update(visible=True), "β
Ready! Start coding."
except Exception as e:
return gr.update(visible=True), gr.update(visible=False), f"β Error: {e}"
else:
return gr.update(visible=True), gr.update(visible=False), "β Invalid token."
load_btn.click(_auth, token_box, [token_box, chat_col, status_lbl])
def _send(user_msg, hist):
return user_turn(user_msg, hist)
def _bot(hist):
yield from bot_turn(hist)
msg.submit(_send, [msg, chatbot], [msg, chatbot]).then(_bot, chatbot, chatbot)
send_btn.click(_send, [msg, chatbot], [msg, chatbot]).then(_bot, chatbot, chatbot)
clear_btn.click(lambda: None, None, chatbot)
demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=True) |