Spaces:

tugrulkaya
/

transformer-edge-optimization

Sleeping

File size: 1,678 Bytes

a7a7fe4
 
c316c05
 
 
a7a7fe4
c316c05
 
 
 
a7a7fe4
c316c05
a7a7fe4
 
c316c05
 
 
 
 
 
a7a7fe4
c316c05
 
 
a7a7fe4
c316c05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7a7fe4
 
c316c05

import gradio as gr
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import bitsandbytes as bnb  # Quantization için

# Model yükle
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def classify_with_quantization(text, use_quantization=False):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    
    if use_quantization:
        # 8-bit quantization uygula
        model_quantized = AutoModelForSequenceClassification.from_pretrained(
            model_name, 
            load_in_8bit=True, 
            device_map="auto"
        )
        model_to_use = model_quantized
    else:
        model_to_use = model
    
    start_time = time.time()
    with torch.no_grad():
        outputs = model_to_use(**inputs)
    inference_time = time.time() - start_time
    
    logits = outputs.logits
    predicted_class = logits.argmax().item()
    label = "POSITIVE" if predicted_class == 1 else "NEGATIVE"
    
    return f"Label: {label}\nInference Time: {inference_time:.4f}s"

# Gradio interface
demo = gr.Interface(
    fn=classify_with_quantization,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter text for sentiment analysis..."),
        gr.Checkbox(label="Use 8-bit Quantization", value=False)
    ],
    outputs=gr.Textbox(),
    title="Transformer Model Optimization Demo",
    description="Test quantization on DistilBERT for faster edge inference. Toggle quantization to see speed gains."
)

if __name__ == "__main__":
    demo.launch()