|
|
import gradio as gr |
|
|
import time |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import torch |
|
|
import bitsandbytes as bnb |
|
|
|
|
|
|
|
|
model_name = "distilbert-base-uncased-finetuned-sst-2-english" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
def classify_with_quantization(text, use_quantization=False): |
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
|
|
|
if use_quantization: |
|
|
|
|
|
model_quantized = AutoModelForSequenceClassification.from_pretrained( |
|
|
model_name, |
|
|
load_in_8bit=True, |
|
|
device_map="auto" |
|
|
) |
|
|
model_to_use = model_quantized |
|
|
else: |
|
|
model_to_use = model |
|
|
|
|
|
start_time = time.time() |
|
|
with torch.no_grad(): |
|
|
outputs = model_to_use(**inputs) |
|
|
inference_time = time.time() - start_time |
|
|
|
|
|
logits = outputs.logits |
|
|
predicted_class = logits.argmax().item() |
|
|
label = "POSITIVE" if predicted_class == 1 else "NEGATIVE" |
|
|
|
|
|
return f"Label: {label}\nInference Time: {inference_time:.4f}s" |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=classify_with_quantization, |
|
|
inputs=[ |
|
|
gr.Textbox(lines=2, placeholder="Enter text for sentiment analysis..."), |
|
|
gr.Checkbox(label="Use 8-bit Quantization", value=False) |
|
|
], |
|
|
outputs=gr.Textbox(), |
|
|
title="Transformer Model Optimization Demo", |
|
|
description="Test quantization on DistilBERT for faster edge inference. Toggle quantization to see speed gains." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |