samith-a commited on
Commit
14dbea3
·
1 Parent(s): e3d6072

django model try, no-accesss-token

Browse files
Files changed (2) hide show
  1. app.py +77 -5
  2. requirements.txt +5 -2
app.py CHANGED
@@ -1,9 +1,81 @@
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def add(x, y):
4
- return x + y
5
 
6
- # Interface: define inputs and outputs
7
- app = gr.Interface(fn=add, inputs=["number", "number"], outputs="number")
 
 
 
 
 
 
 
 
 
 
8
 
9
- app.launch()
 
 
1
+ # app.py
2
+
3
  import gradio as gr
4
+ import torch
5
+ from unsloth import FastLanguageModel
6
+ from peft import PeftModel
7
+ from transformers import AutoTokenizer
8
+
9
+ class ModelManager:
10
+ _instance = None
11
+
12
+ def __init__(self):
13
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ self.model, self.tokenizer = self.load_model()
15
+
16
+ @classmethod
17
+ def get_instance(cls):
18
+ if cls._instance is None:
19
+ cls._instance = cls()
20
+ return cls._instance
21
+
22
+ def load_model(self):
23
+ # Load base model
24
+ backbone, tokenizer = FastLanguageModel.from_pretrained(
25
+ "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
26
+ load_in_4bit=True,
27
+ dtype=torch.float16,
28
+ device_map=self.device,
29
+ )
30
+
31
+ # Load your fine-tuned adapter
32
+ try:
33
+ model = PeftModel.from_pretrained(
34
+ backbone,
35
+ "samith-a/Django-orm-code-gen",
36
+ torch_dtype=torch.float16,
37
+ device_map=self.device,
38
+ )
39
+ print("Adapter weights loaded successfully")
40
+ except Exception as e:
41
+ print(f"Error loading adapter: {e}")
42
+ model = backbone
43
+
44
+ FastLanguageModel.for_inference(model)
45
+ return model, tokenizer
46
+
47
+ def generate(self, instruction: str, input_text: str, max_new_tokens: int = 128) -> str:
48
+ alpaca_template = (
49
+ "### Instruction:\n{}\n\n"
50
+ "### Input:\n{}\n\n"
51
+ "### Response:\n"
52
+ )
53
+ prompt = alpaca_template.format(instruction, input_text)
54
+
55
+ encoded = self.tokenizer([prompt], return_tensors="pt").to(self.device)
56
+ outputs = self.model.generate(**encoded, max_new_tokens=max_new_tokens)
57
+
58
+ raw = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+ return raw.split("### Response:")[-1].strip()
60
+
61
+ # Instantiate once
62
+ manager = ModelManager.get_instance()
63
 
64
+ def predict(instruction, context, max_tokens=128):
65
+ return manager.generate(instruction, context, max_new_tokens=int(max_tokens))
66
 
67
+ # Gradio UI / API
68
+ demo = gr.Interface(
69
+ fn=predict,
70
+ inputs=[
71
+ gr.Textbox(lines=2, label="Instruction", placeholder="Describe what you want…"),
72
+ gr.Textbox(lines=5, label="Input (code/context)", placeholder="Optional context…"),
73
+ gr.Slider(minimum=16, maximum=512, step=16, label="Max new tokens", value=128),
74
+ ],
75
+ outputs=gr.Textbox(label="Generated Code"),
76
+ title="Django-ORM Code Generator",
77
+ description="Ask the LoRA-finetuned LLaMA3.2 model to generate or modify Django ORM code.",
78
+ )
79
 
80
+ if __name__ == "__main__":
81
+ demo.launch()
requirements.txt CHANGED
@@ -1,2 +1,5 @@
1
- fastapi
2
- uvicorn[standard]
 
 
 
 
1
+ torch
2
+ transformers
3
+ unsloth
4
+ peft
5
+ gradio