|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import gradio as gr |
|
import torch |
|
|
|
|
|
device = "cpu" |
|
torch.set_num_threads(4) |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B", |
|
torch_dtype=torch.float32, |
|
low_cpu_mem_usage=True |
|
).to(device) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
"naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B" |
|
) |
|
|
|
def predict(message, history): |
|
|
|
chat = [ |
|
{"role": "system", "content": "๊ฐ๊ฒฐํ๊ฒ ๋ต๋ณํด์ฃผ์ธ์."}, |
|
{"role": "user", "content": message} |
|
] |
|
|
|
|
|
inputs = tokenizer.apply_chat_template( |
|
chat, |
|
return_tensors="pt", |
|
max_length=512, |
|
truncation=True |
|
).to(device) |
|
|
|
outputs = model.generate( |
|
inputs, |
|
max_new_tokens=200, |
|
temperature=0.3, |
|
do_sample=False |
|
) |
|
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
demo = gr.ChatInterface( |
|
predict, |
|
title="CLOVA X (CPU ๋ชจ๋)", |
|
description="CPU ์ ์ฉ ๊ฒฝ๋ํ ๋ฒ์ ", |
|
theme="soft", |
|
examples=["์๋
ํ์ธ์", "๋ ์จ ์๋ ค์ค"] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
favicon_path=None, |
|
prevent_thread_lock=True |
|
) |