import numpy as np import faiss from typing import List, Dict from pathlib import Path from src.fireworks.inference import get_embedding, expand_query, rerank_results from constants.constants import FAISS_INDEX, PRODUCTS_DF _FILE_PATH = Path(__file__).parents[2] def search_vector(query: str, top_k: int = 5) -> List[Dict[str, any]]: """ Search products using vector embeddings and FAISS for semantic search. This is Stage 2: semantic search using vector embeddings to understand query meaning and intent beyond exact keyword matching. Args: query: Search query string top_k: Number of top results to return (default: 10) Returns: List of dictionaries containing product information and scores """ query_embedding = get_embedding(query) query_vector = np.array([query_embedding], dtype=np.float32) faiss.normalize_L2(query_vector) faiss_index = FAISS_INDEX[0] distances, indices = faiss_index.search(query_vector, top_k) # Convert L2 distances to similarity scores (0-1 range) # After normalization, L2 distance = 2 * (1 - cosine_similarity) # So cosine_similarity = 1 - (L2_distance / 2) similarity_scores = 1 - (distances[0] / 2) return [ { "product_name": PRODUCTS_DF.iloc[idx]["Product Name"], "description": PRODUCTS_DF.iloc[idx]["Description"], "main_category": PRODUCTS_DF.iloc[idx]["MainCategory"], "secondary_category": PRODUCTS_DF.iloc[idx]["SecondaryCategory"], "score": float(score), } for idx, score in zip(indices[0], similarity_scores) ] def search_vector_with_expansion(query: str, top_k: int = 5) -> List[Dict[str, any]]: """ Search products using vector embeddings and FAISS for semantic search with query expansion. This is Stage 3: semantic search using vector embeddings to understand query meaning and intent beyond exact keyword matching, with query expansion. Args: query: Search query string top_k: Number of top results to return (default: 10) Returns: List of dictionaries containing product information and scores """ expanded_query = expand_query(query) print(f"Original: {query}") print(f"Expanded: {expanded_query}") return search_vector(expanded_query, top_k) def search_vector_with_reranking(query: str, top_k: int = 5) -> List[Dict[str, any]]: """ Search products using vector embeddings and FAISS for semantic search with reranking. This is Stage 4: semantic search using vector embeddings to understand query meaning and intent beyond exact keyword matching, with reranking. Args: query: Search query string top_k: Number of top results to return (default: 10) Returns: List of dictionaries containing product information with preserved cosine scores """ results = search_vector_with_expansion(query, top_k) cosine_scores = {r["product_name"]: r["score"] for r in results} reranked_results = rerank_results(query=query, results=results) for r in reranked_results: r["score"] = cosine_scores[r["product_name"]] return reranked_results