everyday / app.py
donjun's picture
Update app.py
0789c58 verified
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import torch
# CPU ๊ฐ•์ œ ์„ค์ •
device = "cpu"
torch.set_num_threads(4) # CPU ์Šค๋ ˆ๋“œ ์ œํ•œ
# ๊ฒฝ๋Ÿ‰ํ™”๋œ ๋ชจ๋ธ ๋กœ๋“œ
model = AutoModelForCausalLM.from_pretrained(
"naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B",
torch_dtype=torch.float32, # CPU๋Š” float32 ๊ถŒ์žฅ
low_cpu_mem_usage=True
).to(device)
tokenizer = AutoTokenizer.from_pretrained(
"naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B"
)
def predict(message, history):
# ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ์„ ์œ„ํ•œ ๊ฐ„์†Œํ™”๋œ ์ฑ„ํŒ… ๊ตฌ์„ฑ
chat = [
{"role": "system", "content": "๊ฐ„๊ฒฐํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”."},
{"role": "user", "content": message}
]
# CPU ์ตœ์ ํ™” ์„ค์ •
inputs = tokenizer.apply_chat_template(
chat,
return_tensors="pt",
max_length=512, # ๊ธธ์ด ์ œํ•œ
truncation=True
).to(device)
outputs = model.generate(
inputs,
max_new_tokens=200, # ์งง์€ ์‘๋‹ต
temperature=0.3, # ์ฐฝ์˜์„ฑ ๊ฐ์†Œ
do_sample=False #็กฎๅฎšๆ€ง ์‘๋‹ต
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# ๊ฒฝ๋Ÿ‰ํ™”๋œ ์ธํ„ฐํŽ˜์ด์Šค
demo = gr.ChatInterface(
predict,
title="CLOVA X (CPU ๋ชจ๋“œ)",
description="CPU ์ „์šฉ ๊ฒฝ๋Ÿ‰ํ™” ๋ฒ„์ „",
theme="soft",
examples=["์•ˆ๋…•ํ•˜์„ธ์š”", "๋‚ ์”จ ์•Œ๋ ค์ค˜"]
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
favicon_path=None,
prevent_thread_lock=True
)