Spaces:
Running
Running
File size: 3,219 Bytes
385bc37 75361de 385bc37 75361de 385bc37 15dacd4 4cba650 75361de 4cba650 75361de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import numpy as np
import faiss
from typing import List, Dict
from pathlib import Path
from src.fireworks.inference import get_embedding, expand_query, rerank_results
from constants.constants import FAISS_INDEX, PRODUCTS_DF
_FILE_PATH = Path(__file__).parents[2]
def search_vector(query: str, top_k: int = 5) -> List[Dict[str, any]]:
"""
Search products using vector embeddings and FAISS for semantic search.
This is Stage 2: semantic search using vector embeddings to understand
query meaning and intent beyond exact keyword matching.
Args:
query: Search query string
top_k: Number of top results to return (default: 10)
Returns:
List of dictionaries containing product information and scores
"""
query_embedding = get_embedding(query)
query_vector = np.array([query_embedding], dtype=np.float32)
faiss.normalize_L2(query_vector)
faiss_index = FAISS_INDEX[0]
distances, indices = faiss_index.search(query_vector, top_k)
# Convert L2 distances to similarity scores (0-1 range)
# After normalization, L2 distance = 2 * (1 - cosine_similarity)
# So cosine_similarity = 1 - (L2_distance / 2)
similarity_scores = 1 - (distances[0] / 2)
return [
{
"product_name": PRODUCTS_DF.iloc[idx]["Product Name"],
"description": PRODUCTS_DF.iloc[idx]["Description"],
"main_category": PRODUCTS_DF.iloc[idx]["MainCategory"],
"secondary_category": PRODUCTS_DF.iloc[idx]["SecondaryCategory"],
"score": float(score),
}
for idx, score in zip(indices[0], similarity_scores)
]
def search_vector_with_expansion(query: str, top_k: int = 5) -> List[Dict[str, any]]:
"""
Search products using vector embeddings and FAISS for semantic search with query expansion.
This is Stage 3: semantic search using vector embeddings to understand
query meaning and intent beyond exact keyword matching, with query expansion.
Args:
query: Search query string
top_k: Number of top results to return (default: 10)
Returns:
List of dictionaries containing product information and scores
"""
expanded_query = expand_query(query)
print(f"Original: {query}")
print(f"Expanded: {expanded_query}")
return search_vector(expanded_query, top_k)
def search_vector_with_reranking(query: str, top_k: int = 5) -> List[Dict[str, any]]:
"""
Search products using vector embeddings and FAISS for semantic search with reranking.
This is Stage 4: semantic search using vector embeddings to understand
query meaning and intent beyond exact keyword matching, with reranking.
Args:
query: Search query string
top_k: Number of top results to return (default: 10)
Returns:
List of dictionaries containing product information with preserved cosine scores
"""
results = search_vector_with_expansion(query, top_k)
cosine_scores = {r["product_name"]: r["score"] for r in results}
reranked_results = rerank_results(query=query, results=results)
for r in reranked_results:
r["score"] = cosine_scores[r["product_name"]]
return reranked_results
|