Update app.py
Browse files
app.py
CHANGED
@@ -5,18 +5,27 @@ import torch
|
|
5 |
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
6 |
|
7 |
# Load tokenizer and model
|
|
|
8 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
|
|
|
9 |
if tokenizer.pad_token is None:
|
10 |
tokenizer.pad_token = tokenizer.eos_token
|
11 |
|
|
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
MODEL_NAME,
|
14 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
15 |
)
|
|
|
|
|
16 |
if torch.cuda.is_available():
|
|
|
17 |
model.to("cuda")
|
18 |
model.eval()
|
19 |
|
|
|
|
|
20 |
def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50):
|
21 |
if not prompt:
|
22 |
return "Please enter a prompt."
|
|
|
5 |
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
6 |
|
7 |
# Load tokenizer and model
|
8 |
+
print("Loading tokenizer...")
|
9 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
10 |
+
print("Tokenizer loaded.")
|
11 |
+
|
12 |
if tokenizer.pad_token is None:
|
13 |
tokenizer.pad_token = tokenizer.eos_token
|
14 |
|
15 |
+
print("Loading model...")
|
16 |
model = AutoModelForCausalLM.from_pretrained(
|
17 |
MODEL_NAME,
|
18 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
19 |
)
|
20 |
+
print("Model loaded.")
|
21 |
+
|
22 |
if torch.cuda.is_available():
|
23 |
+
print("Moving model to GPU...")
|
24 |
model.to("cuda")
|
25 |
model.eval()
|
26 |
|
27 |
+
print("Model ready.")
|
28 |
+
|
29 |
def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50):
|
30 |
if not prompt:
|
31 |
return "Please enter a prompt."
|