Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| model_name = "Qwen/Qwen3-4B-Thinking-2507" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| device_map="cpu", | |
| torch_dtype="auto" | |
| ) | |
| # 🔥 ВАЖНО: создайте pipeline | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device_map="cpu" | |
| ) | |
| def respond(message, history, system_message, max_tokens): | |
| messages = [ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": message} | |
| ] | |
| prompt = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| output = pipe( | |
| prompt, | |
| max_new_tokens=max_tokens, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.95, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = output[0]["generated_text"][len(prompt):].strip() | |
| return response | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| additional_inputs=[ | |
| gr.Textbox(value="You are a helpful assistant.", label="System message"), | |
| gr.Slider(minimum=1, maximum=262144, value=64, step=1, label="Max new tokens") | |
| ], | |
| title="Qwen Chat (Local, No History)", | |
| ) | |
| if __name__ == "__main__": | |
| chatbot.launch() |