mioskomi commited on
Commit
4fa50f5
·
verified ·
1 Parent(s): 2622907

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -7
README.md CHANGED
@@ -30,20 +30,37 @@ pip install transformers accelerate peft
30
 
31
  Load the model.
32
  ```python
33
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
34
  from peft import PeftModel, PeftConfig
35
 
36
  repo_id = "stefan-m-lenz/Mixtral-8x7B-ICDOPS-QA-2024"
37
  config = PeftConfig.from_pretrained(repo_id, device_map="auto")
38
- model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto")
 
 
 
 
 
 
 
 
39
  model = PeftModel.from_pretrained(model, repo_id, device_map="auto")
40
- tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, device_map="auto")
 
41
 
42
  # Test input
43
- test_input = "Was ist der ICD-10-Code für die Tumordiagnose Bronchialkarzinom, Hauptbronchus“?"
 
 
 
 
 
 
 
44
 
45
  # Generate response
46
- inputs = tokenizer(test_input, return_tensors="pt").to("cuda")
47
  outputs = model.generate(
48
  **inputs,
49
  max_new_tokens=7,
@@ -53,8 +70,8 @@ outputs = model.generate(
53
  top_p=None,
54
  top_k=None,
55
  )
56
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
57
- response = response[len(test_input):].strip()
58
 
59
  print("Test Input:", test_input)
60
  print("Model Response:", response)
 
30
 
31
  Load the model.
32
  ```python
33
+ import torch
34
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
35
  from peft import PeftModel, PeftConfig
36
 
37
  repo_id = "stefan-m-lenz/Mixtral-8x7B-ICDOPS-QA-2024"
38
  config = PeftConfig.from_pretrained(repo_id, device_map="auto")
39
+ quantization_config = BitsAndBytesConfig(
40
+ load_in_4bit=True,
41
+ bnb_4bit_use_double_quant=True,
42
+ bnb_4bit_quant_type="nf4",
43
+ bnb_4bit_compute_dtype=torch.bfloat16
44
+ )
45
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
46
+ device_map="auto",
47
+ quantization_config=quantization_config)
48
  model = PeftModel.from_pretrained(model, repo_id, device_map="auto")
49
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,
50
+ device_map="auto")
51
 
52
  # Test input
53
+ test_input = """Welche ICD-10-Kodierung wird für die Tumordiagnose "Bronchialkarzinom, Hauptbronchus" verwendet? Antworte nur mit dem ICD-10 Code."""
54
+
55
+ input_str = tokenizer.apply_chat_template(
56
+ [{"role": "user", "content": test_input}],
57
+ tokenize=False,
58
+ add_generation_prompt=True,
59
+ enable_thinking=False
60
+ )
61
 
62
  # Generate response
63
+ inputs = tokenizer(input_str, return_tensors="pt").to("cuda")
64
  outputs = model.generate(
65
  **inputs,
66
  max_new_tokens=7,
 
70
  top_p=None,
71
  top_k=None,
72
  )
73
+ generated_tokens = outputs[0, inputs["input_ids"].shape[1]:]
74
+ response = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
75
 
76
  print("Test Input:", test_input)
77
  print("Model Response:", response)