samith-a commited on
Commit
5db110f
·
1 Parent(s): ed6f0c8

unsloth llm3-1B

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -29,7 +29,7 @@ class ModelManager:
29
  if HAS_UNSLOTH and self.device != "cpu":
30
  # GPU via Unsloth + LoRA
31
  backbone, tokenizer = FastLanguageModel.from_pretrained(
32
- "Qwen/Qwen2.5-Coder-1.5B-Instruct",
33
  load_in_4bit=True,
34
  dtype=torch.float16,
35
  device_map="auto",
@@ -50,7 +50,7 @@ class ModelManager:
50
 
51
  # --- Fallback: CPU-only via HF Transformers + PEFT ---
52
  print("Falling back to CPU-only Transformers + PEFT")
53
- base_name = "Qwen/Qwen2.5-Coder-1.5B-Instruct" # non-4bit to run on CPU
54
  tokenizer = AutoTokenizer.from_pretrained(base_name, use_fast=True)
55
  base = AutoModelForCausalLM.from_pretrained(
56
  base_name,
@@ -109,4 +109,4 @@ demo = gr.Interface(
109
  )
110
 
111
  if __name__ == "__main__":
112
- demo.launch()
 
29
  if HAS_UNSLOTH and self.device != "cpu":
30
  # GPU via Unsloth + LoRA
31
  backbone, tokenizer = FastLanguageModel.from_pretrained(
32
+ "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
33
  load_in_4bit=True,
34
  dtype=torch.float16,
35
  device_map="auto",
 
50
 
51
  # --- Fallback: CPU-only via HF Transformers + PEFT ---
52
  print("Falling back to CPU-only Transformers + PEFT")
53
+ base_name = "unsloth/Llama-3.2-1B-Instruct" # non-4bit to run on CPU
54
  tokenizer = AutoTokenizer.from_pretrained(base_name, use_fast=True)
55
  base = AutoModelForCausalLM.from_pretrained(
56
  base_name,
 
109
  )
110
 
111
  if __name__ == "__main__":
112
+ demo.launch(share=True)