import subprocess import sys def install_package(package_name): subprocess.check_call([sys.executable, '-m', 'pip', 'install', package_name]) # Example usage: install_package('airllm') from airllm import AutoModel MAX_LENGTH = 128 model = AutoModel.from_pretrained("v2ray/Llama-3-70B") input_text = [ 'What is the capital of United States?' ] input_tokens = model.tokenizer(input_text, return_tensors="pt", return_attention_mask=False, truncation=True, max_length=MAX_LENGTH, padding=False) generation_output = model.generate( input_tokens['input_ids'].cuda(), max_new_tokens=20, use_cache=True, return_dict_in_generate=True) output = model.tokenizer.decode(generation_output.sequences[0]) print(output)