KingNish commited on
Commit
060b8a4
·
verified ·
1 Parent(s): a940b7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -3,20 +3,22 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
3
  import torch
4
  from threading import Thread
5
  import spaces
 
6
 
7
  # Load the model and tokenizer
8
  model_name = "sarvamai/sarvam-m"
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
11
 
 
 
12
  @spaces.GPU(duration=120)
13
  def generate_response(prompt, chat_history):
14
-
15
- chat_history.append(dict(role="user", content=prompt ))
16
  yield chat_history
17
 
18
  print(chat_history)
19
-
20
  text = tokenizer.apply_chat_template(chat_history, tokenize=False, enable_thinking=True)
21
 
22
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
@@ -27,7 +29,7 @@ def generate_response(prompt, chat_history):
27
  # Conduct text generation with streaming
28
  generation_kwargs = dict(
29
  input_ids=model_inputs.input_ids,
30
- max_new_tokens=4096,
31
  streamer=streamer,
32
  )
33
 
@@ -38,22 +40,28 @@ def generate_response(prompt, chat_history):
38
  reasoning_content = ""
39
  content = ""
40
  reasoning_done = False
 
41
 
42
- chat_history.append(dict(role="assistant", content=reasoning_content, metadata={"title": "Thinking..."}) )
43
 
 
44
  for new_text in streamer:
45
  if "</think>" in new_text:
46
- chat_history[-1]["metadata"] = {"title": "Thinking Completed"}
47
  reasoning_done = True
 
 
48
  chat_history.append(dict(role="assistant", content=content))
49
-
50
  if not reasoning_done:
 
 
 
51
  reasoning_content += new_text
52
  chat_history[-1]["content"] = reasoning_content
53
  else:
54
  content += new_text
55
  chat_history[-1]["content"] = content
56
-
57
  yield chat_history
58
 
59
  # Create the Gradio interface
@@ -64,4 +72,4 @@ with gr.Blocks() as demo:
64
  msg.submit(generate_response, [msg, chatbot], [chatbot])
65
 
66
  if __name__ == "__main__":
67
- demo.launch()
 
3
  import torch
4
  from threading import Thread
5
  import spaces
6
+ import time
7
 
8
  # Load the model and tokenizer
9
  model_name = "sarvamai/sarvam-m"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
12
 
13
+ indicators = ["Thinking ⠋", "Thinking ⠙", "Thinking ⠹", "Thinking ⠸", "Thinking ⠼", "Thinking ⠴", "Thinking ⠦", "Thinking ⠧", "Thinking ⠇", "Thinking ⠏"]
14
+
15
  @spaces.GPU(duration=120)
16
  def generate_response(prompt, chat_history):
17
+ chat_history.append(dict(role="user", content=prompt))
 
18
  yield chat_history
19
 
20
  print(chat_history)
21
+
22
  text = tokenizer.apply_chat_template(chat_history, tokenize=False, enable_thinking=True)
23
 
24
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
29
  # Conduct text generation with streaming
30
  generation_kwargs = dict(
31
  input_ids=model_inputs.input_ids,
32
+ max_new_tokens=8192,
33
  streamer=streamer,
34
  )
35
 
 
40
  reasoning_content = ""
41
  content = ""
42
  reasoning_done = False
43
+ start_time = time.time()
44
 
45
+ chat_history.append(dict(role="assistant", content=reasoning_content, metadata={"title": "Thinking..."}))
46
 
47
+ indicator_index = 0
48
  for new_text in streamer:
49
  if "</think>" in new_text:
 
50
  reasoning_done = True
51
+ thought_duration = time.time() - start_time
52
+ chat_history[-1]["metadata"] = {"title": f"Thought for {thought_duration:.2f} seconds"}
53
  chat_history.append(dict(role="assistant", content=content))
54
+
55
  if not reasoning_done:
56
+ # Update the thinking indicator
57
+ indicator_index = (indicator_index + 1) % len(indicators)
58
+ chat_history[-1]["metadata"] = {"title": indicators[indicator_index]}
59
  reasoning_content += new_text
60
  chat_history[-1]["content"] = reasoning_content
61
  else:
62
  content += new_text
63
  chat_history[-1]["content"] = content
64
+
65
  yield chat_history
66
 
67
  # Create the Gradio interface
 
72
  msg.submit(generate_response, [msg, chatbot], [chatbot])
73
 
74
  if __name__ == "__main__":
75
+ demo.launch(mcp_server=True)