Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -21,8 +21,7 @@ class ChatRequest(BaseModel):
|
|
21 |
@app.post("/chat/stream")
|
22 |
async def chat_stream(request: ChatRequest):
|
23 |
prompt = f"Responde en español de forma clara y breve como un asistente IA.\nUsuario: {request.message}\nIA:"
|
24 |
-
|
25 |
-
input_ids = tokenizer.encode(prompt)
|
26 |
input_ids = torch.tensor([input_ids])
|
27 |
|
28 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
@@ -33,8 +32,9 @@ async def chat_stream(request: ChatRequest):
|
|
33 |
top_p=0.9,
|
34 |
do_sample=True,
|
35 |
streamer=streamer,
|
36 |
-
pad_token_id=tokenizer.eos_token_id,
|
37 |
)
|
|
|
38 |
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
|
39 |
thread.start()
|
40 |
|
|
|
21 |
@app.post("/chat/stream")
|
22 |
async def chat_stream(request: ChatRequest):
|
23 |
prompt = f"Responde en español de forma clara y breve como un asistente IA.\nUsuario: {request.message}\nIA:"
|
24 |
+
input_ids = tokenizer.encode(prompt, add_special_tokens=True)
|
|
|
25 |
input_ids = torch.tensor([input_ids])
|
26 |
|
27 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
|
|
32 |
top_p=0.9,
|
33 |
do_sample=True,
|
34 |
streamer=streamer,
|
35 |
+
pad_token_id=tokenizer.eos_token_id if hasattr(tokenizer, "eos_token_id") else None,
|
36 |
)
|
37 |
+
# Elimina attention_mask, padding, y cualquier argumento de batch.
|
38 |
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
|
39 |
thread.start()
|
40 |
|