|
|
from deepface import DeepFace |
|
|
from pymilvus import Collection, connections, CollectionSchema, FieldSchema, DataType |
|
|
import numpy as np |
|
|
import os |
|
|
from ai_api.library.devlab_image import DevLabImage |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
milvus_host = os.getenv("MILVUS_HOST", "milvus") |
|
|
milvus_port = os.getenv("MILVUS_PORT", "19530") |
|
|
|
|
|
connections.connect("default", host=milvus_host, port=int(milvus_port)) |
|
|
|
|
|
devlab_image = DevLabImage() |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
collection = Collection("faces") |
|
|
collection.load() |
|
|
print("Collection 'faces' already exists.") |
|
|
except Exception as e: |
|
|
|
|
|
print(f"Creating collection: {e}") |
|
|
fields = [ |
|
|
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), |
|
|
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128), |
|
|
FieldSchema(name="name", dtype=DataType.VARCHAR, max_length=255), |
|
|
FieldSchema(name="short_description", dtype=DataType.VARCHAR, max_length=255), |
|
|
FieldSchema(name="description", dtype=DataType.VARCHAR, max_length=5000), |
|
|
] |
|
|
schema = CollectionSchema(fields, description="Face embeddings") |
|
|
collection = Collection(name="faces", schema=schema) |
|
|
collection.create_index(field_name="embedding", index_params={"metric_type": "COSINE", "index_type": "HNSW", "params": {"M": 32, "efConstruction": 512}}) |
|
|
collection.load() |
|
|
|
|
|
|
|
|
|
|
|
dataset_path = "people/" |
|
|
|
|
|
|
|
|
for person in os.listdir(dataset_path): |
|
|
print(".....processing person", person) |
|
|
person_path = os.path.join(dataset_path, person) |
|
|
embeddings = [] |
|
|
|
|
|
if not os.path.isdir(person_path): |
|
|
continue |
|
|
|
|
|
|
|
|
image_files = [f for f in os.listdir(person_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))] |
|
|
for img in image_files: |
|
|
img_path = os.path.join(person_path, img) |
|
|
|
|
|
embedding = devlab_image.extract_embedding(image_path=img_path) |
|
|
if embedding is not None: |
|
|
emb = np.array(embedding, dtype=np.float32) |
|
|
if emb.size > 0: |
|
|
collection.insert([[emb], [person], [''], ['']]) |
|
|
else: |
|
|
print(f"No embedding found for {img_path}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("✅ Face embeddings inserted into Milvus!") |
|
|
|