testbed / train_people.py
xspinners's picture
deepface
bc62936
from deepface import DeepFace
from pymilvus import Collection, connections, CollectionSchema, FieldSchema, DataType
import numpy as np
import os
from ai_api.library.devlab_image import DevLabImage
from dotenv import load_dotenv
load_dotenv()
milvus_host = os.getenv("MILVUS_HOST", "milvus") # default localhost
milvus_port = os.getenv("MILVUS_PORT", "19530") # default 19530
connections.connect("default", host=milvus_host, port=int(milvus_port))
devlab_image = DevLabImage()
try:
collection = Collection("faces")
collection.load() # Try loading the collection to check if it exists
print("Collection 'faces' already exists.")
except Exception as e:
# If collection doesn't exist, create it
print(f"Creating collection: {e}")
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128),
FieldSchema(name="name", dtype=DataType.VARCHAR, max_length=255),
FieldSchema(name="short_description", dtype=DataType.VARCHAR, max_length=255),
FieldSchema(name="description", dtype=DataType.VARCHAR, max_length=5000),
]
schema = CollectionSchema(fields, description="Face embeddings")
collection = Collection(name="faces", schema=schema)
collection.create_index(field_name="embedding", index_params={"metric_type": "COSINE", "index_type": "HNSW", "params": {"M": 32, "efConstruction": 512}})
collection.load()
# collection = Collection("faces")
dataset_path = "people/"
for person in os.listdir(dataset_path):
print(".....processing person", person)
person_path = os.path.join(dataset_path, person)
embeddings = []
if not os.path.isdir(person_path):
continue
image_files = [f for f in os.listdir(person_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
for img in image_files:
img_path = os.path.join(person_path, img)
# try:
embedding = devlab_image.extract_embedding(image_path=img_path)
if embedding is not None:
emb = np.array(embedding, dtype=np.float32)
if emb.size > 0:
collection.insert([[emb], [person], [''], ['']])
else:
print(f"No embedding found for {img_path}")
# except Exception as e:
# print(f"Could not process {img_path}: {str(e)}")
# os.remove(img_path)
print("✅ Face embeddings inserted into Milvus!")