beyoru commited on
Commit
7d179f8
·
verified ·
1 Parent(s): 8943ecb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -55
app.py CHANGED
@@ -1,64 +1,96 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
 
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
-
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import re
5
 
6
+ # Model configuration
7
+ MAX_REASONING_TOKENS = 4096
8
+ MAX_RESPONSE_TOKENS = 2048
9
+ MODEL_OPTIONS = ["beyoru/ThinkAgain1.2", "beyoru/ThinkAgain1.4", "beyoru/ThinkAgain1.5"]
10
 
11
+ # Global variables for model and tokenizer
12
+ model = None
13
+ tokenizer = None
14
+ messages = []
15
 
16
+ # Function to extract text between <think> and </think> tags
17
+ def extract_think_content(text):
18
+ match = re.search(r'<think>(.*?)</think>', text, re.DOTALL)
19
+ return match.group(1).strip() if match else text
 
 
 
 
 
20
 
21
+ # Function to load model and tokenizer
22
+ def load_model(model_name):
23
+ global model, tokenizer, messages
24
+ messages = [] # Reset conversation history
25
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
26
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
27
+ return f"Loaded model: {model_name}"
28
 
29
+ def generate_response(user_input, history=None, model_name=None):
30
+ global messages
31
+
32
+ if history is None:
33
+ history = []
34
+
35
+ # Append user message to history
36
+ messages.append({"role": "user", "content": user_input})
37
+ history.append((user_input, None)) # User message in chatbot
38
+
39
+ # Generate reasoning
40
+ reasoning_template = tokenizer.apply_chat_template(messages, tokenize=False, add_reasoning_prompt=True)
41
+ reasoning_inputs = tokenizer(reasoning_template, return_tensors="pt").to(model.device)
42
+ reasoning_ids = model.generate(**reasoning_inputs, max_new_tokens=MAX_REASONING_TOKENS)
43
+ reasoning_output = tokenizer.decode(reasoning_ids[0, reasoning_inputs.input_ids.shape[1]:], skip_special_tokens=True)
44
+
45
+ # Extract content from <think> tags
46
+ extracted_reasoning = extract_think_content(reasoning_output)
47
+ messages.append({"role": "reasoning", "content": extracted_reasoning})
48
+
49
+ # Generate assistant response
50
+ response_template = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
51
+ response_inputs = tokenizer(response_template, return_tensors="pt").to(model.device)
52
+ response_ids = model.generate(**response_inputs, max_new_tokens=MAX_RESPONSE_TOKENS)
53
+ response_output = tokenizer.decode(response_ids[0, response_inputs.input_ids.shape[1]:], skip_special_tokens=True)
54
+
55
+ messages.append({"role": "assistant", "content": response_output})
56
+
57
+ # Combine reasoning and assistant response in a single chatbot message with toggleable reasoning
58
+ combined_response = (
59
+ f"<details><summary>Reasoning</summary>{extracted_reasoning}</details>\n\n"
60
+ f"{response_output}"
61
+ )
62
+ history.append((None, combined_response)) # Assistant message with toggleable reasoning
63
+
64
+ return history, ""
65
 
66
+ # Create Gradio interface
67
+ with gr.Blocks() as demo:
68
+ gr.Markdown("# ThinkAgain Chatbot")
69
+
70
+ # Model selection
71
+ model_dropdown = gr.Dropdown(choices=MODEL_OPTIONS, label="Select Model", value=MODEL_OPTIONS[-1])
72
+ model_load_status = gr.Textbox(label="Model Status", interactive=False)
73
+
74
+ # Load model when dropdown changes
75
+ model_dropdown.change(
76
+ fn=load_model,
77
+ inputs=model_dropdown,
78
+ outputs=model_load_status
79
+ )
80
+
81
+ chatbot = gr.Chatbot(label="Conversation")
82
+ user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
83
+ submit = gr.Button("Send")
84
+
85
+ # Clear input and update chatbot after submission
86
+ submit.click(
87
+ fn=generate_response,
88
+ inputs=[user_input, chatbot, model_dropdown],
89
+ outputs=[chatbot, user_input]
90
+ )
91
 
92
+ # Load default model
93
+ load_model(MODEL_OPTIONS[0])
 
 
 
 
 
 
94
 
95
+ # Launch the app
96
+ demo.launch()