translate / app.py
DaymonLeyt's picture
Update app.py
b3f44d5 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
model_name = "Qwen/Qwen3-4B-Thinking-2507"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
device_map="cpu",
torch_dtype="auto"
)
# 🔥 ВАЖНО: создайте pipeline
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="cpu"
)
def respond(message, history, system_message, max_tokens):
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": message}
]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
output = pipe(
prompt,
max_new_tokens=max_tokens,
do_sample=True,
temperature=0.7,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id
)
response = output[0]["generated_text"][len(prompt):].strip()
return response
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a helpful assistant.", label="System message"),
gr.Slider(minimum=1, maximum=262144, value=64, step=1, label="Max new tokens")
],
title="Qwen Chat (Local, No History)",
)
if __name__ == "__main__":
chatbot.launch()