donjun commited on
Commit
0789c58
ยท
verified ยท
1 Parent(s): a673a02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -9
app.py CHANGED
@@ -2,18 +2,58 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import gradio as gr
3
  import torch
4
 
5
- device = "cuda" if torch.cuda.is_available() else "cpu"
6
- model = AutoModelForCausalLM.from_pretrained("naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B").to(device)
7
- tokenizer = AutoTokenizer.from_pretrained("naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B")
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def predict(message, history):
 
10
  chat = [
11
- {"role": "system", "content": "๋‹น์‹ ์€ CLOVA X AI์ž…๋‹ˆ๋‹ค."},
12
- *[{"role": "user" if i%2==0 else "assistant", "content": h} for i,h in enumerate(sum(history, ()))],
13
  {"role": "user", "content": message}
14
  ]
15
- inputs = tokenizer.apply_chat_template(chat, return_tensors="pt").to(device)
16
- outputs = model.generate(inputs, max_length=1024, temperature=0.7)
17
- return tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- gr.ChatInterface(predict).launch(server_name="0.0.0.0")
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import torch
4
 
5
+ # CPU ๊ฐ•์ œ ์„ค์ •
6
+ device = "cpu"
7
+ torch.set_num_threads(4) # CPU ์Šค๋ ˆ๋“œ ์ œํ•œ
8
+
9
+ # ๊ฒฝ๋Ÿ‰ํ™”๋œ ๋ชจ๋ธ ๋กœ๋“œ
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B",
12
+ torch_dtype=torch.float32, # CPU๋Š” float32 ๊ถŒ์žฅ
13
+ low_cpu_mem_usage=True
14
+ ).to(device)
15
+
16
+ tokenizer = AutoTokenizer.from_pretrained(
17
+ "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B"
18
+ )
19
 
20
  def predict(message, history):
21
+ # ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์„ ์œ„ํ•œ ๊ฐ„์†Œํ™”๋œ ์ฑ„ํŒ… ๊ตฌ์„ฑ
22
  chat = [
23
+ {"role": "system", "content": "๊ฐ„๊ฒฐํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”."},
 
24
  {"role": "user", "content": message}
25
  ]
26
+
27
+ # CPU ์ตœ์ ํ™” ์„ค์ •
28
+ inputs = tokenizer.apply_chat_template(
29
+ chat,
30
+ return_tensors="pt",
31
+ max_length=512, # ๊ธธ์ด ์ œํ•œ
32
+ truncation=True
33
+ ).to(device)
34
+
35
+ outputs = model.generate(
36
+ inputs,
37
+ max_new_tokens=200, # ์งง์€ ์‘๋‹ต
38
+ temperature=0.3, # ์ฐฝ์˜์„ฑ ๊ฐ์†Œ
39
+ do_sample=False #็กฎๅฎšๆ€ง ์‘๋‹ต
40
+ )
41
+
42
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
43
+
44
+ # ๊ฒฝ๋Ÿ‰ํ™”๋œ ์ธํ„ฐํŽ˜์ด์Šค
45
+ demo = gr.ChatInterface(
46
+ predict,
47
+ title="CLOVA X (CPU ๋ชจ๋“œ)",
48
+ description="CPU ์ „์šฉ ๊ฒฝ๋Ÿ‰ํ™” ๋ฒ„์ „",
49
+ theme="soft",
50
+ examples=["์•ˆ๋…•ํ•˜์„ธ์š”", "๋‚ ์”จ ์•Œ๋ ค์ค˜"]
51
+ )
52
 
53
+ if __name__ == "__main__":
54
+ demo.launch(
55
+ server_name="0.0.0.0",
56
+ server_port=7860,
57
+ favicon_path=None,
58
+ prevent_thread_lock=True
59
+ )