File size: 4,063 Bytes
019895a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0aa5cd7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
import inspect
from typing import get_type_hints, Callable, Any
import gradio as gr

model_name = "wolfofbackstreet/SmolLM2-135M-int4-qptq-v2"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define GPTQ configuration
#gptq_config = GPTQConfig(bits=4, use_exllama=False, use_cuda_fp16=False)

# Load pre-quantized model on CPU
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cpu"  # Explicitly enforce CPU execution
    # quantization_config=gptq_config,
)


def parse_docstring(func):
    doc = inspect.getdoc(func)
    if not doc:
        return {"title": "Untitled", "description": ""}

    lines = doc.splitlines()
    title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled")
    description = "\n".join(line.strip() for line in lines if line.startswith("Description:"))
    description = description.replace("Description:", "").strip()

    return {"title": title, "description": description}

def gradio_app_with_docs(func: Callable) -> Callable:
    sig = inspect.signature(func)
    type_hints = get_type_hints(func)
    metadata = parse_docstring(func)

    """
    A decorator that automatically builds and launches a Gradio interface
    based on function type hints.

    Args:
        func: A callable with type-hinted parameters and return type.

    Returns:
        The wrapped function with a `.launch()` method to start the app.
    """
    # Infer Gradio components from type hints
    def _map_type(t: type) -> gr.Component:
        if t == str:
            return gr.Textbox(label="Input")
        elif t == int:
            return gr.Number(precision=0)
        elif t == float:
            return gr.Number()
        elif t == bool:
            return gr.Checkbox()
        elif hasattr(t, "__origin__") and t.__origin__ == list:  # Handle List[type]
            elem_type = t.__args__[0]
            if elem_type == str:
                return gr.Dropdown(choices=["Option1", "Option2"])
            else:
                raise ValueError(f"Unsupported list element type: {elem_type}")
        else:
            raise ValueError(f"Unsupported type: {t}")

    # Extract function signature and type hints
    sig = inspect.signature(func)
    type_hints = get_type_hints(func)

    # Map parameters to Gradio inputs
    inputs = []
    for name, param in sig.parameters.items():
        if name == "self":
            continue  # Skip self in class methods
        param_type = type_hints.get(name, Any)
        component = _map_type(param_type)
        component.label = name.replace("_", " ").title()
        inputs.append(component)

    # Map return type to Gradio output
    return_type = type_hints.get("return", Any)
    outputs = _map_type(return_type)

    # Wrap function with Gradio interface
    interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)

    with gr.Blocks() as demo:
        gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
        interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)

    def wrapper(*args, **kwargs):
        return func(*args, **kwargs)

    wrapper.launch = lambda: demo.launch()
    return wrapper


@gradio_app_with_docs
def generate_response(prompt: str) -> str:
    """
    Title: Super Tiny GPTQ V2 Model on CPU
    Description: A Simple app to test out the potentials of small GPTQ LLM model.

    Args:
        prompt (str): A simple prompt.

    Returns:
        str: Simplified response.
    """
    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")  # Move inputs to CPU
    outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        temperature=0.7,
        top_p=0.9
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# # Example usage
# prompt = "Explain quantum computing in simple terms."
# response = generate_response(prompt)
# print(response)


if __name__ == "__main__":
    generate_response.launch()