| | from io import BytesIO |
| | import base64 |
| |
|
| | from PIL import Image |
| | import torch |
| | from transformers import CLIPProcessor, CLIPModel |
| |
|
| | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
| |
|
| | class EndpointHandler(): |
| | def __init__(self, path=""): |
| | self.model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device) |
| | self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") |
| |
|
| | def __call__(self, data): |
| |
|
| | text_input = None |
| | if isinstance(data, dict): |
| | inputs = data.pop("inputs", None) |
| | text_input = inputs.get('text',None) |
| | image_data = BytesIO(base64.b64decode(inputs['image'])) if 'image' in inputs else None |
| | else: |
| | |
| | image_data = BytesIO(data) |
| | |
| | if text_input: |
| | processor = self.processor(text=text_input, return_tensors="pt", padding=True).to(device) |
| | with torch.no_grad(): |
| | return {"embeddings": self.model.get_text_features(**processor).to("cpu").tolist()} |
| | elif image_data: |
| | image = Image.open(image_data) |
| | processor = self.processor(images=image, return_tensors="pt").to(device) |
| | with torch.no_grad(): |
| | return {"embeddings": self.model.get_image_features(**processor).to("cpu").tolist()} |
| | else: |
| | return {"embeddings": None} |
| |
|
| |
|