Priyanka6 commited on
Commit
e89dcde
Β·
1 Parent(s): c17980d

Update space

Browse files
Files changed (1) hide show
  1. app.py +14 -55
app.py CHANGED
@@ -1,65 +1,24 @@
1
- # import torch
2
- # import gradio as gr
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
-
5
- # # Load the model and tokenizer
6
- # MODEL_NAME = "sarvamai/sarvam-1"
7
- # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
- # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
9
- # model.eval()
10
-
11
- # def respond(message, history, max_tokens, temperature, top_p):
12
- # # Convert chat history to format
13
- # messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
14
- # for val in history:
15
- # if val[0]:
16
- # messages.append({"role": "user", "content": val[0]})
17
- # if val[1]:
18
- # messages.append({"role": "assistant", "content": val[1]})
19
- # messages.append({"role": "user", "content": message})
20
-
21
- # # Tokenize and generate response
22
- # inputs = tokenizer.apply_chat_template(messages, tokenize=False)
23
- # input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
24
-
25
- # output_tokens = model.generate(
26
- # **input_tokens,
27
- # max_new_tokens=max_tokens,
28
- # temperature=temperature,
29
- # top_p=top_p,
30
- # pad_token_id=tokenizer.pad_token_id,
31
- # eos_token_id=tokenizer.eos_token_id,
32
- # )
33
-
34
- # response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
35
- # return response
36
-
37
- # # Define Gradio Chat Interface
38
- # demo = gr.ChatInterface(
39
- # fn=respond,
40
- # additional_inputs=[
41
- # gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
42
- # gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
43
- # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
44
- # ],
45
- # title="Sarvam-1 Chat Interface",
46
- # description="Chat with the Sarvam-1 language model"
47
- # )
48
-
49
- # if __name__ == "__main__":
50
- # demo.launch()
51
-
52
  import torch
53
  import gradio as gr
54
  from transformers import AutoModelForCausalLM, AutoTokenizer
55
 
56
- # Load the model and tokenizer
57
  MODEL_NAME = "sarvamai/sarvam-1"
58
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
59
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
60
- model.eval()
 
 
 
 
 
 
61
 
62
  def respond(message, history, max_tokens, temperature, top_p):
 
 
 
 
63
  # Convert chat history to format
64
  messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
65
  for val in history:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # Load the model and tokenizer only once at startup
6
  MODEL_NAME = "sarvamai/sarvam-1"
7
+ tokenizer = None
8
+ model = None
9
+
10
+ def load_model():
11
+ global tokenizer, model
12
+ if tokenizer is None or model is None:
13
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
15
+ model.eval()
16
 
17
  def respond(message, history, max_tokens, temperature, top_p):
18
+ global tokenizer, model
19
+ # Ensure model is loaded
20
+ load_model()
21
+
22
  # Convert chat history to format
23
  messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
24
  for val in history: