Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -21,7 +21,12 @@ class ChatRequest(BaseModel):
|
|
21 |
@app.post("/chat/stream")
|
22 |
async def chat_stream(request: ChatRequest):
|
23 |
prompt = f"Responde en español de forma clara y breve como un asistente IA.\nUsuario: {request.message}\nIA:"
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
25 |
input_ids = torch.tensor([input_ids])
|
26 |
|
27 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
@@ -32,9 +37,8 @@ async def chat_stream(request: ChatRequest):
|
|
32 |
top_p=0.9,
|
33 |
do_sample=True,
|
34 |
streamer=streamer,
|
35 |
-
pad_token_id=
|
36 |
)
|
37 |
-
# Elimina attention_mask, padding, y cualquier argumento de batch.
|
38 |
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
|
39 |
thread.start()
|
40 |
|
|
|
21 |
@app.post("/chat/stream")
|
22 |
async def chat_stream(request: ChatRequest):
|
23 |
prompt = f"Responde en español de forma clara y breve como un asistente IA.\nUsuario: {request.message}\nIA:"
|
24 |
+
|
25 |
+
# 1. Tokeniza a tokens (sin padding, sin encode)
|
26 |
+
tokens = tokenizer.tokenize(prompt)
|
27 |
+
token_ids = tokenizer.convert_tokens_to_ids(tokens)
|
28 |
+
# 2. Añade manualmente los tokens especiales
|
29 |
+
input_ids = tokenizer.build_inputs_with_special_tokens(token_ids)
|
30 |
input_ids = torch.tensor([input_ids])
|
31 |
|
32 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
|
|
37 |
top_p=0.9,
|
38 |
do_sample=True,
|
39 |
streamer=streamer,
|
40 |
+
pad_token_id=getattr(tokenizer, "eos_token_id", None),
|
41 |
)
|
|
|
42 |
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
|
43 |
thread.start()
|
44 |
|