Published on

AI-Powered Search — Building Semantic Search That Actually Works

Authors

Introduction

Semantic search with embeddings vastly outperforms keyword-only search, but pure embedding similarity returns irrelevant results. This guide covers hybrid search architectures, query understanding, reranking, and production optimization.

Keyword-only search fails on:

  • Synonyms: "car" doesn't find "vehicle"
  • Semantic intent: "movie with flying dinosaurs" returns exact phrase matches instead of Jurassic Park
  • Typos and variations: "iphone" vs "i-phone" treated as different terms
  • Relevance ranking: Document length and keyword density trump actual relevance
  • Context: "Apple" returns both fruit and tech company equally

Real search requires understanding meaning, not just exact matches.

Semantic Search with Embeddings

Convert documents and queries to dense vectors, compare similarity:

interface SearchDocument {
  id: string;
  title: string;
  content: string;
  embedding?: number[];
  metadata: {
    source: string;
    date: string;
    category: string;
  };
}

interface SearchResult {
  document: SearchDocument;
  relevanceScore: number;
  source: 'semantic' | 'keyword' | 'hybrid';
}

async function generateEmbedding(text: string): Promise<number[]> {
  const response = await fetch('https://api.openai.com/v1/embeddings', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'text-embedding-3-small',
      input: text,
      dimensions: 1536
    })
  });

  const data = await response.json();
  return data.data[0].embedding;
}

function cosineSimilarity(a: number[], b: number[]): number {
  let dotProduct = 0;
  let normA = 0;
  let normB = 0;

  for (let i = 0; i &lt; a.length; i++) {
    dotProduct += a[i] * b[i];
    normA += a[i] * a[i];
    normB += b[i] * b[i];
  }

  return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}

async function semanticSearch(
  query: string,
  documents: SearchDocument[],
  topK: number = 10
): Promise<SearchResult[]> {
  const queryEmbedding = await generateEmbedding(query);

  const results = documents
    .map(doc => ({
      document: doc,
      relevanceScore: cosineSimilarity(queryEmbedding, doc.embedding || []),
      source: 'semantic' as const
    }))
    .sort((a, b) => b.relevanceScore - a.relevanceScore)
    .slice(0, topK);

  return results;
}

Hybrid Search: BM25 + Embeddings

Combine exact-match strength of BM25 with semantic understanding:

interface HybridSearchConfig {
  semanticWeight: number; // 0.6
  bm25Weight: number; // 0.4
  rrf_k: number; // Reciprocal rank fusion parameter, default 60
}

function calculateBM25Score(
  query: string,
  document: string,
  avgDocLength: number,
  docFreq: number,
  totalDocs: number
): number {
  const k1 = 1.5; // Term frequency saturation
  const b = 0.75; // Length normalization
  const idf = Math.log((totalDocs - docFreq + 0.5) / (docFreq + 0.5));

  const docLength = document.split(/\s+/).length;
  const tf = 1; // Simplified; real BM25 counts term frequency

  return idf * ((tf * (k1 + 1)) / (tf + k1 * (1 - b + b * (docLength / avgDocLength))));
}

async function hybridSearch(
  query: string,
  documents: SearchDocument[],
  config: HybridSearchConfig = {
    semanticWeight: 0.6,
    bm25Weight: 0.4,
    rrf_k: 60
  }
): Promise<SearchResult[]> {
  // Semantic search
  const semanticResults = await semanticSearch(query, documents, 50);
  const semanticMap = new Map(
    semanticResults.map((r, i) => [r.document.id, { score: r.relevanceScore, rank: i }])
  );

  // BM25 search
  const avgDocLength = documents.reduce((sum, d) => sum + d.content.split(/\s+/).length, 0) / documents.length;
  const bm25Results: Array<{
    document: SearchDocument;
    score: number;
    rank: number;
  }> = [];

  documents.forEach((doc, idx) => {
    const score = calculateBM25Score(
      query,
      doc.content,
      avgDocLength,
      Math.floor(Math.random() * documents.length),
      documents.length
    );
    bm25Results.push({ document: doc, score, rank: idx });
  });

  bm25Results.sort((a, b) => b.score - a.score);
  const bm25Map = new Map(
    bm25Results.map((r, i) => [r.document.id, { score: r.score, rank: i }])
  );

  // Reciprocal Rank Fusion: combines rankings
  const fusedScores = new Map<string, number>();

  documents.forEach(doc => {
    let score = 0;

    if (semanticMap.has(doc.id)) {
      const semanticRank = semanticMap.get(doc.id)!.rank;
      score += (config.semanticWeight * 100) / (config.rrf_k + semanticRank);
    }

    if (bm25Map.has(doc.id)) {
      const bm25Rank = bm25Map.get(doc.id)!.rank;
      score += (config.bm25Weight * 100) / (config.rrf_k + bm25Rank);
    }

    if (score &gt; 0) {
      fusedScores.set(doc.id, score);
    }
  });

  // Return top results
  return Array.from(fusedScores.entries())
    .map(([docId, score]) => ({
      document: documents.find(d => d.id === docId)!,
      relevanceScore: score,
      source: 'hybrid' as const
    }))
    .sort((a, b) => b.relevanceScore - a.relevanceScore)
    .slice(0, 10);
}

Query Understanding with LLM

Improve search by understanding user intent before querying:

interface QueryIntent {
  original: string;
  category: 'search' | 'recommendation' | 'question_answering' | 'comparison';
  entities: string[];
  expandedQueries: string[];
  filters: { [key: string]: string };
}

async function understandQuery(query: string): Promise<QueryIntent> {
  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-4-turbo',
      messages: [
        {
          role: 'system',
          content: `You are a search query analyzer. Parse the query and extract:
1. Intent category (search, recommendation, question_answering, comparison)
2. Key entities mentioned
3. Query expansions (synonyms, related terms)
4. Implicit filters (date range, category, price, etc.)

Respond as JSON only.`
        },
        {
          role: 'user',
          content: query
        }
      ],
      temperature: 0
    })
  });

  const data = await response.json();
  const content = data.choices[0].message.content;
  const parsed = JSON.parse(content);

  return {
    original: query,
    category: parsed.category || 'search',
    entities: parsed.entities || [],
    expandedQueries: parsed.expansions || [],
    filters: parsed.filters || {}
  };
}

// Example
const intent = await understandQuery('best laptops under $1000 for video editing');
// Returns: {
//   category: 'search',
//   entities: ['laptop', 'video editing'],
//   expandedQueries: ['best laptops for video production', 'video editing computers'],
//   filters: { price_max: '1000' }
// }

Search Result Reranking

Use a smaller, faster model to rerank semantic results:

interface RankingFeatures {
  semanticScore: number;
  bm25Score: number;
  freshness: number; // 0-1 based on recency
  popularity: number; // 0-1 from user engagement
  docLength: number;
  titleMatch: number; // 0-1 if query matches title
}

async function rerank(
  query: string,
  results: SearchResult[],
  features: Map<string, RankingFeatures>
): Promise<SearchResult[]> {
  const rerankScores: Array<{
    result: SearchResult;
    score: number;
  }> = [];

  for (const result of results) {
    const docFeatures = features.get(result.document.id);
    if (!docFeatures) continue;

    // Neural reranker via API
    const response = await fetch('https://api.openai.com/v1/chat/completions', {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        model: 'gpt-4',
        messages: [
          {
            role: 'user',
            content: `Rate relevance of this document to query "${query}" on scale 1-5.
Title: ${result.document.title}
Content: ${result.document.content.substring(0, 200)}...
Respond with number only.`
          }
        ],
        temperature: 0,
        max_tokens: 1
      })
    });

    const data = await response.json();
    const neuralScore = parseInt(data.choices[0].message.content) / 5;

    // Weighted combination
    const finalScore =
      0.4 * result.relevanceScore + // Semantic similarity
      0.2 * docFeatures.bm25Score +
      0.2 * neuralScore +
      0.1 * docFeatures.freshness +
      0.1 * docFeatures.popularity;

    rerankScores.push({ result, score: finalScore });
  }

  return rerankScores
    .sort((a, b) => b.score - a.score)
    .map(r => ({ ...r.result, relevanceScore: r.score }));
}

Query Expansion with LLM

Generate related queries to improve recall:

async function expandQuery(query: string): Promise<string[]> {
  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-3.5-turbo',
      messages: [
        {
          role: 'user',
          content: `Generate 5 alternative phrasings of this search query to improve recall.
Original: "${query}"
Format as JSON array of strings only.`
        }
      ],
      temperature: 0.7
    })
  });

  const data = await response.json();
  const content = data.choices[0].message.content;
  const jsonMatch = content.match(/\[[\s\S]*\]/);

  if (!jsonMatch) return [query];

  const expansions = JSON.parse(jsonMatch[0]);
  return [query, ...expansions];
}

// Example: "laptop for ML"
// Expands to:
// - "machine learning laptop"
// - "deep learning workstation"
// - "laptop with GPU for ML"
// - "best computers for machine learning"

Faceted Search with Semantic Filters

Combine semantic results with structured filtering:

interface FacetedSearchRequest {
  query: string;
  filters: {
    category?: string;
    dateRange?: { start: string; end: string };
    priceRange?: { min: number; max: number };
    tags?: string[];
  };
  topK: number;
}

async function facetedSearch(
  request: FacetedSearchRequest,
  documents: SearchDocument[]
): Promise<SearchResult[]> {
  // Apply structured filters first (pre-filter)
  let filtered = documents;

  if (request.filters.category) {
    filtered = filtered.filter(
      d => d.metadata.category === request.filters.category
    );
  }

  if (request.filters.dateRange) {
    const start = new Date(request.filters.dateRange.start);
    const end = new Date(request.filters.dateRange.end);
    filtered = filtered.filter(d => {
      const docDate = new Date(d.metadata.date);
      return docDate &gt;= start && docDate &lt;= end;
    });
  }

  if (request.filters.tags) {
    // Tag matching logic
  }

  // Then run semantic search on filtered set
  return await semanticSearch(request.query, filtered, request.topK);
}

// This combines:
// - Structured filters: fast, exact matching
// - Semantic search: understands meaning
// - Much faster than semantic search on full dataset

Spell Correction with LLM

Detect and correct typos before searching:

async function correctQuery(query: string): Promise<{
  original: string;
  corrected: string;
  confidence: number;
}> {
  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-3.5-turbo',
      messages: [
        {
          role: 'user',
          content: `Check for spelling/grammar errors in: "${query}"
If errors found, provide corrected version and confidence (0-1).
Respond as JSON: { corrected: string, confidence: number }`
        }
      ],
      temperature: 0
    })
  });

  const data = await response.json();
  const content = data.choices[0].message.content;
  const parsed = JSON.parse(content);

  return {
    original: query,
    corrected: parsed.corrected || query,
    confidence: parsed.confidence || 0
  };
}

// Example: "latop for progamming"
// Returns: { corrected: "laptop for programming", confidence: 0.99 }

Search Analytics and Monitoring

interface SearchMetrics {
  totalQueries: number;
  zeroResultQueries: string[];
  avgPositionOfFirstClick: number;
  clickThroughRate: number;
  queryReformulations: number; // User reformulates before finding result
}

async function trackSearchEvent(
  query: string,
  results: SearchResult[],
  userAction: 'clicked' | 'skipped' | 'reformulated'
): Promise<void> {
  const event = {
    timestamp: new Date(),
    query,
    resultCount: results.length,
    action: userAction,
    firstResultClicked: userAction === 'clicked' ? results[0]?.document.id : null
  };

  // Log to analytics backend
  await fetch('/api/analytics/search', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(event)
  });
}

function analyzeSearchMetrics(events: Array<{
  query: string;
  resultCount: number;
  action: string;
}>): SearchMetrics {
  const zeroResultQueries = events
    .filter(e => e.resultCount === 0)
    .map(e => e.query);

  const clickEvents = events.filter(e => e.action === 'clicked');
  const ctr = clickEvents.length / events.length;

  return {
    totalQueries: events.length,
    zeroResultQueries,
    avgPositionOfFirstClick: 1.5, // Simplified
    clickThroughRate: ctr,
    queryReformulations: events.filter(e => e.action === 'reformulated').length
  };
}

A/B Testing Search Quality

interface SearchExperiment {
  controlModel: 'bm25' | 'semantic' | 'hybrid';
  treatmentModel: 'hybrid' | 'lora_rerank' | 'gpt4_rerank';
  metrics: {
    dcg: number; // Discounted cumulative gain
    mrr: number; // Mean reciprocal rank
    ctr: number;
    reformulationRate: number;
  };
}

function calculateDCG(results: SearchResult[]): number {
  let dcg = 0;
  results.forEach((result, index) => {
    const relevance = result.relevanceScore * 10; // Assume 0-1 scale
    dcg += relevance / Math.log2(index + 2); // Position-weighted
  });
  return dcg;
}

async function runSearchABTest(
  queries: string[],
  documents: SearchDocument[]
): Promise<SearchExperiment> {
  const controlResults: SearchResult[][] = [];
  const treatmentResults: SearchResult[][] = [];

  for (const query of queries) {
    // Control: BM25 + semantic hybrid
    const control = await hybridSearch(query, documents);
    controlResults.push(control);

    // Treatment: Hybrid + GPT-4 reranking
    const hybrid = await hybridSearch(query, documents);
    const treatment = await rerank(query, hybrid, new Map());
    treatmentResults.push(treatment);
  }

  const controlDCG = controlResults.reduce((sum, r) => sum + calculateDCG(r), 0) / controlResults.length;
  const treatmentDCG = treatmentResults.reduce((sum, r) => sum + calculateDCG(r), 0) / treatmentResults.length;

  return {
    controlModel: 'hybrid',
    treatmentModel: 'gpt4_rerank',
    metrics: {
      dcg: treatmentDCG,
      mrr: treatmentDCG / controlDCG, // Relative improvement
      ctr: 0.05, // Would measure in production
      reformulationRate: 0.1
    }
  };
}

Checklist

  • Implemented hybrid search combining BM25 and embeddings
  • Set semantic/keyword weight ratio (0.6/0.4 default)
  • Added query understanding to extract intent and entities
  • Built query expansion with LLM for improved recall
  • Implemented result reranking with neural model
  • Added spell correction before search execution
  • Set up faceted search with structured filters
  • Built search analytics dashboard for zero-result queries
  • Configured A/B test framework for search quality
  • Set alert for click-through rate dropping <3%
  • Documented search ranking factors and weights

Conclusion

Production search requires layered approach: keyword matching for precision, embeddings for semantics, LLM reranking for nuance. Start with hybrid search (BM25 + semantic), add query understanding, and iterate on analytics. Track zero-result queries and reformulation rate as key metrics.