|
|
|
|
|
from i3_model import i3Model, ChunkTokenizer |
|
|
from modeling_i3 import I3ForCausalLM, I3Config |
|
|
from tokenizer_i3 import I3Tokenizer |
|
|
import torch |
|
|
|
|
|
|
|
|
model_path = "." |
|
|
|
|
|
|
|
|
tokenizer = I3Tokenizer(vocab_file=f"{model_path}/chunk_vocab_combined.json") |
|
|
|
|
|
|
|
|
model = I3ForCausalLM.from_pretrained(model_path) |
|
|
model.eval() |
|
|
|
|
|
|
|
|
prompt = "hello, how are you" |
|
|
|
|
|
|
|
|
input_ids = torch.tensor([tokenizer.encode(prompt)], dtype=torch.long) |
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
model.to(device) |
|
|
input_ids = input_ids.to(device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
generated_ids = model.i3.generate( |
|
|
input_ids, |
|
|
max_new_tokens=50, |
|
|
temperature=0.8, |
|
|
top_k=40 |
|
|
) |
|
|
|
|
|
|
|
|
generated_text = tokenizer.decode(generated_ids[0].cpu().tolist()) |
|
|
print("Generated text:", generated_text) |
|
|
|