nagasurendra commited on
Commit
9dfb71c
·
verified ·
1 Parent(s): 674b49d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ import torch
3
+
4
+ # Specify the model you want to use
5
+ model_id = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
6
+
7
+ # Set up the text-generation pipeline
8
+ pipeline = transformers.pipeline(
9
+ "text-generation", # You are using the text generation pipeline
10
+ model=model_id,
11
+ model_kwargs={"torch_dtype": torch.bfloat16}, # Specifying the torch dtype
12
+ device_map="auto", # This will use available hardware (GPU or CPU)
13
+ )
14
+
15
+ # Define the conversation/messages you want the model to handle
16
+ messages = [
17
+ {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
18
+ {"role": "user", "content": "Who are you?"}
19
+ ]
20
+
21
+ # Use the pipeline to generate a response
22
+ outputs = pipeline(
23
+ messages[1]["content"], # Use only the user message here
24
+ max_new_tokens=256, # Limit the number of tokens generated
25
+ )
26
+
27
+ # Print the generated text from the output
28
+ print(outputs[0]["generated_text"])