Erigann commited on
Commit
5fd90b5
·
verified ·
1 Parent(s): 04382d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -2,21 +2,26 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
3
  import torch
4
 
5
- # Загружаем модель Qwen локально
6
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B", trust_remote_code=True)
7
- model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B", device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
 
 
 
 
 
 
 
8
 
9
  def respond(message):
10
  inputs = tokenizer(message, return_tensors="pt").to(model.device)
11
- outputs = model.generate(**inputs, max_new_tokens=200)
12
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
13
- return response
14
 
15
- # Создаём интерфейс
16
  gr.Interface(
17
  fn=respond,
18
- inputs=gr.Textbox(label="Ваше сообщение"),
19
- outputs=gr.Textbox(label="Qwen отвечает"),
20
- title="Qwen Прокси",
21
- description="Это API-прокси для Janotaro.ai"
22
  ).launch()
 
2
  import gradio as gr
3
  import torch
4
 
5
+ # Используем более лёгкую модель
6
+ model_name = "Qwen/Qwen-1_8B"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_name,
11
+ device_map="auto",
12
+ torch_dtype=torch.float16,
13
+ trust_remote_code=True
14
+ )
15
 
16
  def respond(message):
17
  inputs = tokenizer(message, return_tensors="pt").to(model.device)
18
+ outputs = model.generate(**inputs, max_new_tokens=150)
19
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
20
 
 
21
  gr.Interface(
22
  fn=respond,
23
+ inputs="text",
24
+ outputs="text",
25
+ title="Qwen Прокси для Janotaro.ai",
26
+ description="Работает на Qwen-1_8B"
27
  ).launch()