Spaces:

KingNish
/

Sarvam-M-Demo

Running on Zero

App Files Files Community

KingNish commited on Jun 8

Commit

060b8a4

verified ·

1 Parent(s): a940b7a

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -9

app.py CHANGED Viewed

@@ -3,20 +3,22 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
 import torch
 from threading import Thread
 import spaces
 # Load the model and tokenizer
 model_name = "sarvamai/sarvam-m"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
 @spaces.GPU(duration=120)
 def generate_response(prompt, chat_history):
-    chat_history.append(dict(role="user", content=prompt ))
     yield chat_history
     print(chat_history)
     text = tokenizer.apply_chat_template(chat_history, tokenize=False, enable_thinking=True)
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
@@ -27,7 +29,7 @@ def generate_response(prompt, chat_history):
     # Conduct text generation with streaming
     generation_kwargs = dict(
         input_ids=model_inputs.input_ids,
-        max_new_tokens=4096,
         streamer=streamer,
     )
@@ -38,22 +40,28 @@ def generate_response(prompt, chat_history):
     reasoning_content = ""
     content = ""
     reasoning_done = False
-    chat_history.append(dict(role="assistant", content=reasoning_content, metadata={"title": "Thinking..."}) )
     for new_text in streamer:
         if "</think>" in new_text:
-            chat_history[-1]["metadata"] = {"title": "Thinking Completed"}
             reasoning_done = True
             chat_history.append(dict(role="assistant", content=content))
         if not reasoning_done:
             reasoning_content += new_text
             chat_history[-1]["content"] = reasoning_content
         else:
             content += new_text
             chat_history[-1]["content"] = content
         yield chat_history
 # Create the Gradio interface
@@ -64,4 +72,4 @@ with gr.Blocks() as demo:
     msg.submit(generate_response, [msg, chatbot], [chatbot])
 if __name__ == "__main__":
-    demo.launch()

 import torch
 from threading import Thread
 import spaces
+import time
 # Load the model and tokenizer
 model_name = "sarvamai/sarvam-m"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
+indicators = ["Thinking ⠋", "Thinking ⠙", "Thinking ⠹", "Thinking ⠸", "Thinking ⠼", "Thinking ⠴", "Thinking ⠦", "Thinking ⠧", "Thinking ⠇", "Thinking ⠏"]
 @spaces.GPU(duration=120)
 def generate_response(prompt, chat_history):
+    chat_history.append(dict(role="user", content=prompt))
     yield chat_history
     print(chat_history)
     text = tokenizer.apply_chat_template(chat_history, tokenize=False, enable_thinking=True)
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     # Conduct text generation with streaming
     generation_kwargs = dict(
         input_ids=model_inputs.input_ids,
+        max_new_tokens=8192,
         streamer=streamer,
     )
     reasoning_content = ""
     content = ""
     reasoning_done = False
+    start_time = time.time()
+    chat_history.append(dict(role="assistant", content=reasoning_content, metadata={"title": "Thinking..."}))
+    indicator_index = 0
     for new_text in streamer:
         if "</think>" in new_text:
             reasoning_done = True
+            thought_duration = time.time() - start_time
+            chat_history[-1]["metadata"] = {"title": f"Thought for {thought_duration:.2f} seconds"}
             chat_history.append(dict(role="assistant", content=content))
         if not reasoning_done:
+            # Update the thinking indicator
+            indicator_index = (indicator_index + 1) % len(indicators)
+            chat_history[-1]["metadata"] = {"title": indicators[indicator_index]}
             reasoning_content += new_text
             chat_history[-1]["content"] = reasoning_content
         else:
             content += new_text
             chat_history[-1]["content"] = content
         yield chat_history
 # Create the Gradio interface
     msg.submit(generate_response, [msg, chatbot], [chatbot])
 if __name__ == "__main__":
+    demo.launch(mcp_server=True)