import gradio as gr import time from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import bitsandbytes as bnb # Quantization için # Model yükle model_name = "distilbert-base-uncased-finetuned-sst-2-english" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) def classify_with_quantization(text, use_quantization=False): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) if use_quantization: # 8-bit quantization uygula model_quantized = AutoModelForSequenceClassification.from_pretrained( model_name, load_in_8bit=True, device_map="auto" ) model_to_use = model_quantized else: model_to_use = model start_time = time.time() with torch.no_grad(): outputs = model_to_use(**inputs) inference_time = time.time() - start_time logits = outputs.logits predicted_class = logits.argmax().item() label = "POSITIVE" if predicted_class == 1 else "NEGATIVE" return f"Label: {label}\nInference Time: {inference_time:.4f}s" # Gradio interface demo = gr.Interface( fn=classify_with_quantization, inputs=[ gr.Textbox(lines=2, placeholder="Enter text for sentiment analysis..."), gr.Checkbox(label="Use 8-bit Quantization", value=False) ], outputs=gr.Textbox(), title="Transformer Model Optimization Demo", description="Test quantization on DistilBERT for faster edge inference. Toggle quantization to see speed gains." ) if __name__ == "__main__": demo.launch()