File size: 1,678 Bytes
a7a7fe4 c316c05 a7a7fe4 c316c05 a7a7fe4 c316c05 a7a7fe4 c316c05 a7a7fe4 c316c05 a7a7fe4 c316c05 a7a7fe4 c316c05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import bitsandbytes as bnb # Quantization için
# Model yükle
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
def classify_with_quantization(text, use_quantization=False):
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
if use_quantization:
# 8-bit quantization uygula
model_quantized = AutoModelForSequenceClassification.from_pretrained(
model_name,
load_in_8bit=True,
device_map="auto"
)
model_to_use = model_quantized
else:
model_to_use = model
start_time = time.time()
with torch.no_grad():
outputs = model_to_use(**inputs)
inference_time = time.time() - start_time
logits = outputs.logits
predicted_class = logits.argmax().item()
label = "POSITIVE" if predicted_class == 1 else "NEGATIVE"
return f"Label: {label}\nInference Time: {inference_time:.4f}s"
# Gradio interface
demo = gr.Interface(
fn=classify_with_quantization,
inputs=[
gr.Textbox(lines=2, placeholder="Enter text for sentiment analysis..."),
gr.Checkbox(label="Use 8-bit Quantization", value=False)
],
outputs=gr.Textbox(),
title="Transformer Model Optimization Demo",
description="Test quantization on DistilBERT for faster edge inference. Toggle quantization to see speed gains."
)
if __name__ == "__main__":
demo.launch() |