Spaces:

DauroCamilo
/

chat-backend-test

Sleeping

DauroCamilo commited on May 27

Commit

543077e

verified ·

1 Parent(s): 7dc77a4

AutoModel

Files changed (1) hide show

main.py CHANGED Viewed

@@ -4,15 +4,18 @@ os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf/transformers"
 from fastapi import FastAPI
 from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from fastapi.responses import StreamingResponse
 import threading
 app = FastAPI()
-model_id = "GEB-AGI/geb-1.3b"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
 class ChatRequest(BaseModel):
     message: str

 from fastapi import FastAPI
 from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, AutoModel
 from fastapi.responses import StreamingResponse
 import threading
 app = FastAPI()
+# model_id = "GEB-AGI/geb-1.3b"
+# tokenizer = AutoTokenizer.from_pretrained(model_id)
+# model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
+model = AutoModel.from_pretrained("GEB-AGI/geb-1.3b", trust_remote_code=True).bfloat16().cuda()
+tokenizer = AutoTokenizer.from_pretrained("GEB-AGI/geb-1.3b", trust_remote_code=True)
 class ChatRequest(BaseModel):
     message: str