Spaces:

fireworks-ai
/

search-alchemy

Sleeping

App Files Files Community

RobertoBarrosoLuque commited on 29 days ago

Commit

75361de

1 Parent(s): 4cba650

Add reranking

Browse files

Files changed (4) hide show

src/app.py +86 -56
src/config.py +1 -1
src/fireworks/inference.py +53 -5
src/search/vector_search.py +27 -3

src/app.py CHANGED Viewed

@@ -2,14 +2,17 @@ import gradio as gr
 import time
 from typing import List, Dict, Tuple, Callable
 from pathlib import Path
-import os
 from config import (
     GRADIO_THEME,
     CUSTOM_CSS,
     EXAMPLE_QUERIES_BY_CATEGORY,
 )
 from src.search.bm25_lexical_search import search_bm25
-from src.search.vector_search import search_vector, search_vector_with_expansion
 from src.data_prep.data_prep import load_clean_amazon_product_data
 from src.constants.code_snippets import (
     CODE_STAGE_1,
@@ -63,21 +66,26 @@ def format_results(results: List[Dict], stage_name: str, metrics: Dict) -> str:
     Args:
         results: List of dicts with keys: product_name, description, main_category, secondary_category, score
         stage_name: Name of the search stage
-        metrics: Dict with keys: semantic_match, diversity, latency_ms
     """
     html_parts = [
         f"## 🔍 {stage_name}\n\n",
         f"""
-<div style="display: flex; gap: 20px; margin-bottom: 28px;">
     <div class="metric-box" style="flex: 1;">
-        <div style="color: #6720FF; font-size: 0.9em; font-weight: 600; margin-bottom: 6px; letter-spacing: 0.5px;">SEMANTIC MATCH</div>
-        <div style="font-size: 2.2em; font-weight: 700; color: #1E293B;">{metrics['semantic_match']:.3f}</div>
-        <div style="color: #64748B; font-size: 0.8em; margin-top: 4px;">Higher is better</div>
     </div>
     <div class="metric-box" style="flex: 1;">
-        <div style="color: #6720FF; font-size: 0.9em; font-weight: 600; margin-bottom: 6px; letter-spacing: 0.5px;">LATENCY</div>
-        <div style="font-size: 2.2em; font-weight: 700; color: #1E293B;">{metrics['latency_ms']}<span style="font-size: 0.45em; color: #64748B; font-weight: 400;">ms</span></div>
-        <div style="color: #64748B; font-size: 0.8em; margin-top: 4px;">Response time</div>
     </div>
 </div>
 """,
@@ -114,14 +122,40 @@ def get_average_score(results: List[Dict]) -> float:
     return sum(r["score"] for r in results) / len(results) if results else 0
 def search_stage_1(query: str) -> Tuple[str, Dict]:
     """Stage 1: Baseline BM25 keyword search."""
     results, latency = run_search_function_and_time(query, search_bm25)
-    avg_score = get_average_score(results)
-    semantic_match = min(1.0, avg_score / len(results))
     metrics = {
-        "semantic_match": semantic_match,
         "latency_ms": latency,
     }
     print(f"Searched BM25 for {query} in {latency}ms")
@@ -132,10 +166,13 @@ def search_stage_1(query: str) -> Tuple[str, Dict]:
 def search_stage_2(query: str) -> Tuple[str, Dict]:
     """Stage 2: Vector Embeddings using FAISS."""
     results, latency = run_search_function_and_time(query, search_vector)
-    semantic_match = get_average_score(results)
     metrics = {
-        "semantic_match": semantic_match,
         "latency_ms": latency,
     }
     print(f"Searched vector embeddings for '{query}' in {latency}ms")
@@ -145,12 +182,14 @@ def search_stage_2(query: str) -> Tuple[str, Dict]:
 def search_stage_3(query: str) -> Tuple[str, Dict]:
     """Stage 3: Query Expansion + Vector Embeddings."""
     results, latency = run_search_function_and_time(query, search_vector_with_expansion)
-    semantic_match = get_average_score(results)
     metrics = {
-        "semantic_match": semantic_match,
         "latency_ms": latency,
     }
@@ -158,29 +197,19 @@ def search_stage_3(query: str) -> Tuple[str, Dict]:
 def search_stage_4(query: str) -> Tuple[str, Dict]:
-    """Stage 4: BM25 + Embeddings + Query Expansion + LLM Reranking."""
-    start_time = time.time()
-    # Placeholder: Simulated reranking with correct format
-    results = [
-        {
-            "product_name": product["title"],
-            "description": product["description"],
-            "main_category": product["category"],
-            "secondary_category": "Placeholder",
-            "score": 0.85 + (idx * 0.025),
-        }
-        for idx, product in enumerate(SAMPLE_PRODUCTS[:5])
-    ]
-    latency = int((time.time() - start_time) * 1000)
     metrics = {
-        "semantic_match": 0.88,
-        "latency_ms": max(200, latency),
     }
-    return format_results(results, "Stage 4: + LLM Reranking", metrics), metrics
 def search_all_stages(query: str) -> Tuple[str, str, str, str, str]:
@@ -210,26 +239,37 @@ def generate_comparison_table(all_metrics: List[Dict]) -> str:
     # Build markdown table
     html = "## Stage-by-Stage Comparison\n\n"
-    html += "| Stage | Semantic Match | Latency (ms) |\n"
-    html += "|-------|----------------|--------------|\n"
     for name, metrics in zip(stage_names, all_metrics):
-        html += f"| **{name}** | {metrics['semantic_match']:.3f} | {metrics['latency_ms']} |\n"
-    # Calculate improvements
-    semantic_improvement = (
         (
-            (all_metrics[3]["semantic_match"] - all_metrics[0]["semantic_match"])
-            / all_metrics[0]["semantic_match"]
             * 100
         )
-        if all_metrics[0]["semantic_match"] > 0
         else 0
     )
     html += "\n---\n\n"
     html += "## Key Insights\n\n"
-    html += f"- **Semantic Match** improves by **{semantic_improvement:.0f}%** from baseline to final stage\n"
     html += f"- **Latency** stays under **{max(m['latency_ms'] for m in all_metrics)}ms** maintaining fast performance\n"
     html += "- Each stage progressively enhances search relevance while keeping response times low\n"
     html += "- Vector embeddings provide the biggest jump in semantic understanding\n"
@@ -363,17 +403,7 @@ with gr.Blocks(
                 scale=3,
                 elem_classes="search-box",
             )
-        with gr.Column(scale=1):
-            val = os.getenv("FIREWORKS_API_KEY", "")  # pragma: allowlist secret
-            api_key_value = gr.Textbox(  # pragma: allowlist secret
-                label="API Key",
-                type="password",
-                placeholder="Enter your Fireworks AI API key",
-                value=val,
-                container=True,
-                elem_classes="compact-input",
-            )
-    # Clean example query selector
     with gr.Row():
         gr.Markdown(
             "**Try Example Queries:** Select a category and specificity level to auto-load an example"

 import time
 from typing import List, Dict, Tuple, Callable
 from pathlib import Path
 from config import (
     GRADIO_THEME,
     CUSTOM_CSS,
     EXAMPLE_QUERIES_BY_CATEGORY,
 )
 from src.search.bm25_lexical_search import search_bm25
+from src.search.vector_search import (
+    search_vector,
+    search_vector_with_expansion,
+    search_vector_with_reranking,
+)
 from src.data_prep.data_prep import load_clean_amazon_product_data
 from src.constants.code_snippets import (
     CODE_STAGE_1,
     Args:
         results: List of dicts with keys: product_name, description, main_category, secondary_category, score
         stage_name: Name of the search stage
+        metrics: Dict with keys: top1_score, top5_avg, latency_ms
     """
     html_parts = [
         f"## 🔍 {stage_name}\n\n",
         f"""
+<div style="display: flex; gap: 16px; margin-bottom: 28px;">
+    <div class="metric-box" style="flex: 1;">
+        <div style="color: #6720FF; font-size: 0.85em; font-weight: 600; margin-bottom: 6px; letter-spacing: 0.5px;">TOP-1 SCORE</div>
+        <div style="font-size: 2em; font-weight: 700; color: #1E293B;">{metrics['top1_score']:.3f}</div>
+        <div style="color: #64748B; font-size: 0.75em; margin-top: 4px;">Best result</div>
+    </div>
     <div class="metric-box" style="flex: 1;">
+        <div style="color: #6720FF; font-size: 0.85em; font-weight: 600; margin-bottom: 6px; letter-spacing: 0.5px;">TOP-5 AVG</div>
+        <div style="font-size: 2em; font-weight: 700; color: #1E293B;">{metrics['top5_avg']:.3f}</div>
+        <div style="color: #64748B; font-size: 0.75em; margin-top: 4px;">Overall quality</div>
     </div>
     <div class="metric-box" style="flex: 1;">
+        <div style="color: #6720FF; font-size: 0.85em; font-weight: 600; margin-bottom: 6px; letter-spacing: 0.5px;">LATENCY</div>
+        <div style="font-size: 2em; font-weight: 700; color: #1E293B;">{metrics['latency_ms']}<span style="font-size: 0.45em; color: #64748B; font-weight: 400;">ms</span></div>
+        <div style="color: #64748B; font-size: 0.75em; margin-top: 4px;">Response time</div>
     </div>
 </div>
 """,
     return sum(r["score"] for r in results) / len(results) if results else 0
+def get_weighted_score(results: List[Dict]) -> float:
+    """
+    Calculate position-weighted average score.
+    Top positions get higher weight (5x for #1, 4x for #2, etc.)
+    This rewards ranking quality - putting best results at the top.
+    Args:
+        results: List of search results with 'score' field
+    Returns:
+        Weighted average score (0-1 scale)
+    """
+    if not results:
+        return 0.0
+    weights = [5, 4, 3, 2, 1]
+    total_weight = sum(weights)
+    weighted_sum = sum((weights[i] * r["score"]) for i, r in enumerate(results[:5]))
+    return weighted_sum / total_weight
 def search_stage_1(query: str) -> Tuple[str, Dict]:
     """Stage 1: Baseline BM25 keyword search."""
     results, latency = run_search_function_and_time(query, search_bm25)
+    top1_score = results[0]["score"] / 5.0 if results else 0.0  # Normalize BM25 scores
+    top5_avg = get_average_score(results) / 5.0 if results else 0.0
     metrics = {
+        "top1_score": min(1.0, top1_score),
+        "top5_avg": min(1.0, top5_avg),
         "latency_ms": latency,
     }
     print(f"Searched BM25 for {query} in {latency}ms")
 def search_stage_2(query: str) -> Tuple[str, Dict]:
     """Stage 2: Vector Embeddings using FAISS."""
     results, latency = run_search_function_and_time(query, search_vector)
+    top1_score = results[0]["score"] if results else 0.0
+    top5_avg = get_average_score(results)
     metrics = {
+        "top1_score": top1_score,
+        "top5_avg": top5_avg,
         "latency_ms": latency,
     }
     print(f"Searched vector embeddings for '{query}' in {latency}ms")
 def search_stage_3(query: str) -> Tuple[str, Dict]:
     """Stage 3: Query Expansion + Vector Embeddings."""
     results, latency = run_search_function_and_time(query, search_vector_with_expansion)
+    top1_score = results[0]["score"] if results else 0.0
+    top5_avg = get_average_score(results)
     metrics = {
+        "top1_score": top1_score,
+        "top5_avg": top5_avg,
         "latency_ms": latency,
     }
 def search_stage_4(query: str) -> Tuple[str, Dict]:
+    """Stage 4: Query Expansion + Vector Embeddings + Reranking."""
+    results, latency = run_search_function_and_time(query, search_vector_with_reranking)
+    top1_score = results[0]["score"] if results else 0.0
+    top5_avg = get_average_score(results)
     metrics = {
+        "top1_score": top1_score,
+        "top5_avg": top5_avg,
+        "latency_ms": latency,
     }
+    return format_results(results, "Stage 4: Reranking", metrics), metrics
 def search_all_stages(query: str) -> Tuple[str, str, str, str, str]:
     # Build markdown table
     html = "## Stage-by-Stage Comparison\n\n"
+    html += "| Stage | Top-1 Score | Top-5 Avg | Latency (ms) |\n"
+    html += "|-------|-------------|-----------|-------------|\n"
     for name, metrics in zip(stage_names, all_metrics):
+        html += f"| **{name}** | {metrics['top1_score']:.3f} | {metrics['top5_avg']:.3f} | {metrics['latency_ms']} |\n"
+    # Calculate improvements based on top-5 average
+    top5_improvement = (
         (
+            (all_metrics[3]["top5_avg"] - all_metrics[0]["top5_avg"])
+            / all_metrics[0]["top5_avg"]
             * 100
         )
+        if all_metrics[0]["top5_avg"] > 0
+        else 0
+    )
+    top1_improvement = (
+        (
+            (all_metrics[3]["top1_score"] - all_metrics[0]["top1_score"])
+            / all_metrics[0]["top1_score"]
+            * 100
+        )
+        if all_metrics[0]["top1_score"] > 0
         else 0
     )
     html += "\n---\n\n"
     html += "## Key Insights\n\n"
+    html += f"- **Top-1 Score** improves by **{top1_improvement:.0f}%** from baseline to final stage\n"
+    html += f"- **Top-5 Average** improves by **{top5_improvement:.0f}%** from baseline to final stage\n"
     html += f"- **Latency** stays under **{max(m['latency_ms'] for m in all_metrics)}ms** maintaining fast performance\n"
     html += "- Each stage progressively enhances search relevance while keeping response times low\n"
     html += "- Vector embeddings provide the biggest jump in semantic understanding\n"
                 scale=3,
                 elem_classes="search-box",
             )
     with gr.Row():
         gr.Markdown(
             "**Try Example Queries:** Select a category and specificity level to auto-load an example"

src/config.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 # Fireworks AI Model Configuration
 EMBEDDING_MODEL = "accounts/fireworks/models/qwen3-embedding-8b"
 LLM_MODEL = "accounts/fireworks/models/qwen3-8b"
-RERANKER_MODEL = "fireworks/qwen3-reranker-8b"
 GRADIO_THEME = gr.themes.Base(
     primary_hue=gr.themes.colors.purple,

 # Fireworks AI Model Configuration
 EMBEDDING_MODEL = "accounts/fireworks/models/qwen3-embedding-8b"
 LLM_MODEL = "accounts/fireworks/models/qwen3-8b"
+RERANKER_MODEL = "accounts/fireworks/models/qwen3-reranker-8b"
 GRADIO_THEME = gr.themes.Base(
     primary_hue=gr.themes.colors.purple,

src/fireworks/inference.py CHANGED Viewed

@@ -2,14 +2,18 @@ import os
 import yaml
 from openai import OpenAI
 from dotenv import load_dotenv
-from typing import List
 from pathlib import Path
-from src.config import EMBEDDING_MODEL, LLM_MODEL
 load_dotenv()
 _FILE_PATH = Path(__file__).parents[2]
 def load_prompt_library():
     """Load prompts from YAML configuration."""
@@ -17,15 +21,15 @@ def load_prompt_library():
         return yaml.safe_load(f)
-def create_client(api_key: str = None) -> OpenAI:
     """
     Create client for FW inference
     """
-    api_key = os.getenv("FIREWORKS_API_KEY", api_key)
     assert api_key is not None, "FIREWORKS_API_KEY not found in environment variables"
     return OpenAI(
         api_key=api_key,
-        base_url="https://api.fireworks.ai/inference/v1",
     )
@@ -75,3 +79,47 @@ def expand_query(query: str) -> str:
     expanded = response.choices[0].message.content.strip()
     return expanded

 import yaml
 from openai import OpenAI
 from dotenv import load_dotenv
+from typing import List, Dict
 from pathlib import Path
+import requests
+from src.config import EMBEDDING_MODEL, LLM_MODEL, RERANKER_MODEL
 load_dotenv()
 _FILE_PATH = Path(__file__).parents[2]
+RERANK_URL = "https://api.fireworks.ai/inference/v1/rerank"
+INFERENCE_URL = "https://api.fireworks.ai/inference/v1"
 def load_prompt_library():
     """Load prompts from YAML configuration."""
         return yaml.safe_load(f)
+def create_client() -> OpenAI:
     """
     Create client for FW inference
     """
+    api_key = os.getenv("FIREWORKS_API_KEY")
     assert api_key is not None, "FIREWORKS_API_KEY not found in environment variables"
     return OpenAI(
         api_key=api_key,
+        base_url=INFERENCE_URL,
     )
     expanded = response.choices[0].message.content.strip()
     return expanded
+def rerank_results(query: str, results: List[Dict], top_n: int = 5) -> List[Dict]:
+    """
+    Rerank search results using Fireworks AI reranker model.
+    Takes search results and reranks them based on relevance to the query
+    using a specialized reranking model that considers cross-attention between
+    query and documents.
+    Args:
+        query: Original search query
+        results: List of dictionaries containing product information and scores
+        top_n: Number of top results to return after reranking (default: 5)
+    Returns:
+        List of dictionaries containing reranked product information with updated scores
+    """
+    # Prepare documents as text for reranker (product name + description)
+    documents = [f"{r['product_name']}. {r['description']}" for r in results]
+    payload = {
+        "model": RERANKER_MODEL,
+        "query": query,
+        "documents": documents,
+        "top_n": top_n,
+        "return_documents": False,
+    }
+    headers = {
+        "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}",
+        "Content-Type": "application/json",
+    }
+    response = requests.post(RERANK_URL, json=payload, headers=headers)
+    rerank_data = response.json()
+    # Map reranked results back to original product data
+    reranked_results = []
+    for item in rerank_data.get("data", []):
+        idx = item["index"]
+        reranked_results.append({**results[idx], "score": item["relevance_score"]})
+    return reranked_results

src/search/vector_search.py CHANGED Viewed

@@ -2,13 +2,13 @@ import numpy as np
 import faiss
 from typing import List, Dict
 from pathlib import Path
-from src.fireworks.inference import get_embedding, expand_query
 from constants.constants import FAISS_INDEX, PRODUCTS_DF
 _FILE_PATH = Path(__file__).parents[2]
-def search_vector(query: str, top_k: int = 10) -> List[Dict[str, any]]:
     """
     Search products using vector embeddings and FAISS for semantic search.
@@ -46,7 +46,7 @@ def search_vector(query: str, top_k: int = 10) -> List[Dict[str, any]]:
     ]
-def search_vector_with_expansion(query: str, top_k: int = 10) -> List[Dict[str, any]]:
     """
     Search products using vector embeddings and FAISS for semantic search with query expansion.
@@ -64,3 +64,27 @@ def search_vector_with_expansion(query: str, top_k: int = 10) -> List[Dict[str,
     print(f"Original: {query}")
     print(f"Expanded: {expanded_query}")
     return search_vector(expanded_query, top_k)

 import faiss
 from typing import List, Dict
 from pathlib import Path
+from src.fireworks.inference import get_embedding, expand_query, rerank_results
 from constants.constants import FAISS_INDEX, PRODUCTS_DF
 _FILE_PATH = Path(__file__).parents[2]
+def search_vector(query: str, top_k: int = 5) -> List[Dict[str, any]]:
     """
     Search products using vector embeddings and FAISS for semantic search.
     ]
+def search_vector_with_expansion(query: str, top_k: int = 5) -> List[Dict[str, any]]:
     """
     Search products using vector embeddings and FAISS for semantic search with query expansion.
     print(f"Original: {query}")
     print(f"Expanded: {expanded_query}")
     return search_vector(expanded_query, top_k)
+def search_vector_with_reranking(query: str, top_k: int = 5) -> List[Dict[str, any]]:
+    """
+    Search products using vector embeddings and FAISS for semantic search with reranking.
+    This is Stage 4: semantic search using vector embeddings to understand
+    query meaning and intent beyond exact keyword matching, with reranking.
+    Args:
+        query: Search query string
+        top_k: Number of top results to return (default: 10)
+    Returns:
+        List of dictionaries containing product information with preserved cosine scores
+    """
+    results = search_vector_with_expansion(query, top_k)
+    cosine_scores = {r["product_name"]: r["score"] for r in results}
+    reranked_results = rerank_results(query=query, results=results)
+    for r in reranked_results:
+        r["score"] = cosine_scores[r["product_name"]]
+    return reranked_results