neuralnets commited on
Commit
909bba4
·
verified ·
1 Parent(s): da90ecf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +60 -0
README.md CHANGED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ **EXAMPLE USAGE**
2
+
3
+ ```
4
+ # Install required packages if needed
5
+ # !pip install transformers torch unsloth
6
+
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
+ from unsloth.chat_templates import get_chat_template
9
+ from unsloth import FastLanguageModel
10
+ import torch
11
+
12
+ # Load the electrical engineering model
13
+ model_name = "neuralnets/electrical_engg_model"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForCausalLM.from_pretrained(model_name)
16
+
17
+ # Apply the chat template to format inputs correctly
18
+ tokenizer = get_chat_template(
19
+ tokenizer,
20
+ chat_template = "llama-3.1",
21
+ )
22
+
23
+ # Enable faster inference using Unsloth
24
+ model = FastLanguageModel.for_inference(model)
25
+
26
+ # Move model to GPU if available (or specify your device)
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+ model = model.to(device)
29
+
30
+ # Create an electrical engineering related query
31
+ messages = [
32
+ {"role": "user", "content": "Explain the working principle of a three-phase induction motor."},
33
+ ]
34
+
35
+ # Format the input using the chat template
36
+ inputs = tokenizer.apply_chat_template(
37
+ messages,
38
+ tokenize = True,
39
+ add_generation_prompt = True, # Required for generation
40
+ return_tensors = "pt",
41
+ ).to(device)
42
+
43
+ # Set up text streaming for real-time output
44
+ from transformers import TextStreamer
45
+ text_streamer = TextStreamer(tokenizer, skip_prompt = True)
46
+
47
+ # Generate response
48
+ outputs = model.generate(
49
+ input_ids = inputs,
50
+ streamer = text_streamer,
51
+ max_new_tokens = 512,
52
+ use_cache = True,
53
+ temperature = 0.7, # Adjust temperature for creativity vs precision
54
+ min_p = 0.05 # Nucleus sampling parameter
55
+ )
56
+
57
+ # If you want to capture the full response as a string
58
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+
60
+ ```