File size: 5,765 Bytes
6f77195
20fc52e
 
6f77195
aacf87a
9e2faf0
6f77195
 
 
 
 
 
 
 
9e2faf0
6f77195
 
9e2faf0
20fc52e
9e2faf0
 
 
 
 
 
 
 
20fc52e
9e2faf0
20fc52e
9e2faf0
20fc52e
 
 
9e2faf0
20fc52e
9e2faf0
20fc52e
9e2faf0
 
20fc52e
6f77195
 
20fc52e
6f77195
20fc52e
 
 
 
 
 
 
 
 
 
 
 
 
6f77195
20fc52e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f77195
 
20fc52e
6f77195
 
 
aacf87a
 
 
6f77195
 
 
 
 
aacf87a
 
 
 
 
6f77195
 
 
 
 
aacf87a
6f77195
 
aacf87a
6f77195
9e2faf0
 
 
6f77195
 
aacf87a
6f77195
aacf87a
6f77195
 
 
 
aacf87a
 
6f77195
 
20fc52e
6f77195
 
 
20fc52e
 
 
6f77195
 
20fc52e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
CodeNyx – zero-config Gradio chatbot
Auto-handles Hugging Face tokens without stdin prompts.
"""

import os
import gradio as gr
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TextIteratorStreamer
)
from huggingface_hub import login
from threading import Thread

MODEL_ID      = "bigcode/starcoder2-3b-instruct"
BOT_NAME      = "CodeNyx"
SYSTEM_PROMPT = (f"You are {BOT_NAME}, an expert open-source coding assistant. "
                 "Always provide concise, runnable code snippets with short explanations.")

MAX_NEW_TOK   = 1024
TEMPERATURE   = 0.2
TOP_P         = 0.9

# ------------------------------------------------------------------
# 1. Token helper (Gradio popup instead of stdin)
# ------------------------------------------------------------------
def ensure_token(token_value):
    """
    token_value comes from the Gradio UI the first time.
    We cache it in environment variable HF_TOKEN and login once.
    Returns True on success.
    """
    token = token_value.strip()
    if not token:
        return False
    os.environ["HF_TOKEN"] = token
    login(token)
    return True

# ------------------------------------------------------------------
# 2. Lazy model loader (once token is ready)
# ------------------------------------------------------------------
model, tokenizer = None, None

def load_model():
    global model, tokenizer
    if model is not None:
        return True  # already loaded

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )

    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_ID,
        use_auth_token=os.getenv("HF_TOKEN"),
        trust_remote_code=True,
    )
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        quantization_config=bnb_config,
        device_map="auto",
        use_auth_token=os.getenv("HF_TOKEN"),
        trust_remote_code=True,
    )
    return True

# ------------------------------------------------------------------
# 3. Chat logic
# ------------------------------------------------------------------
def build_prompt(history, user_input):
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    for human, ai in history:
        messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": ai})
    messages.append({"role": "user", "content": user_input})
    return tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )

def user_turn(user_message, history):
    return "", history + [[user_message, ""]]

def bot_turn(history):
    prompt = build_prompt(history[:-1], history[-1][0])
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    streamer = TextIteratorStreamer(
        tokenizer,
        skip_prompt=True,
        skip_special_tokens=True,
    )

    gen_kwargs = dict(
        **inputs,
        max_new_tokens=MAX_NEW_TOK,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer,
    )
    Thread(target=model.generate, kwargs=gen_kwargs).start()

    partial = ""
    for new_text in streamer:
        partial += new_text
        history[-1][1] = partial
        yield history

# ------------------------------------------------------------------
# 4. Gradio UI flow
# ------------------------------------------------------------------
with gr.Blocks(title=f"{BOT_NAME} – AI Pair-Programmer") as demo:
    gr.Markdown(f"""
    # πŸ€– {BOT_NAME} – AI Pair-Programmer  
    *3 B params, 100 % free-tier friendly.*  
    Paste your Hugging Face token **once** if asked, then chat away.
    """)

    token_box  = gr.Textbox(label="πŸ€— Hugging Face Token (only first time)", type="password")
    load_btn   = gr.Button("Authorize")
    status_lbl = gr.Label(value="Waiting for token …")

    # --- main chat controls (hidden until token OK) ---
    with gr.Column(visible=False) as chat_col:
        chatbot = gr.Chatbot(height=450)
        with gr.Row():
            msg = gr.Textbox(
                placeholder="Ask me to write / debug / explain code …",
                lines=2,
                scale=8,
                show_label=False,
                container=False,
            )
            send_btn = gr.Button("Send", scale=1, variant="primary")
        clear_btn = gr.Button("πŸ—‘οΈ Clear")

    # ------------------------------------------------------------------
    # 5. Event wiring
    # ------------------------------------------------------------------
    def _auth(token):
        ok = ensure_token(token)
        if ok:
            try:
                load_model()
                return gr.update(visible=False), gr.update(visible=True), "βœ… Ready! Start coding."
            except Exception as e:
                return gr.update(visible=True), gr.update(visible=False), f"❌ Error: {e}"
        else:
            return gr.update(visible=True), gr.update(visible=False), "❌ Invalid token."

    load_btn.click(_auth, token_box, [token_box, chat_col, status_lbl])

    def _send(user_msg, hist):
        return user_turn(user_msg, hist)

    def _bot(hist):
        yield from bot_turn(hist)

    msg.submit(_send, [msg, chatbot], [msg, chatbot]).then(_bot, chatbot, chatbot)
    send_btn.click(_send, [msg, chatbot], [msg, chatbot]).then(_bot, chatbot, chatbot)
    clear_btn.click(lambda: None, None, chatbot)

demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=True)