Published on

RAG Chunking Strategies — Why Your Chunk Size Is Killing Retrieval Quality

Authors

Introduction

Chunk size and strategy directly determine RAG quality. Fixed-size chunks destroy context boundaries, semantic chunking misses relationships, and naive splitting creates orphaned fragments. This guide covers production chunking techniques that maximize recall while minimizing latency.

Fixed-Size vs Semantic Chunking

Semantic chunking preserves meaning boundaries while fixed-size chunking trades quality for simplicity.

class ChunkingStrategy {
  // Fixed-size chunking - simple but often breaks meaning
  fixedSizeChunk(text: string, chunkSize: number = 512, overlap: number = 64): string[] {
    const chunks: string[] = [];
    for (let i = 0; i < text.length; i += chunkSize - overlap) {
      chunks.push(text.slice(i, i + chunkSize));
    }
    return chunks;
  }

  // Semantic chunking - preserves meaning boundaries
  async semanticChunk(text: string): Promise<string[]> {
    const sentences = text.match(/[^.!?]+[.!?]+/g) || [];
    const chunks: string[] = [];
    let currentChunk = '';
    const targetSize = 512;

    for (const sentence of sentences) {
      if ((currentChunk + sentence).length > targetSize) {
        if (currentChunk) chunks.push(currentChunk);
        currentChunk = sentence;
      } else {
        currentChunk += sentence;
      }
    }

    if (currentChunk) chunks.push(currentChunk);
    return chunks;
  }

  // Evaluate chunking quality
  evaluateChunks(chunks: string[]): {
    avgSize: number;
    minSize: number;
    maxSize: number;
    incompleteCount: number;
  } {
    const sizes = chunks.map((c) => c.length);
    const avgSize = sizes.reduce((a, b) => a + b, 0) / sizes.length;
    const incomplete = chunks.filter((c) => !c.trim().endsWith('.') && !c.trim().endsWith('?')).length;

    return {
      avgSize: Math.round(avgSize),
      minSize: Math.min(...sizes),
      maxSize: Math.max(...sizes),
      incompleteCount: incomplete,
    };
  }
}

const chunker = new ChunkingStrategy();
const text = 'First sentence. Second sentence. Third sentence. Fourth sentence.';
const fixedChunks = chunker.fixedSizeChunk(text, 30);
const semanticChunks = await chunker.semanticChunk(text);

console.log('Fixed chunks quality:', chunker.evaluateChunks(fixedChunks));
console.log('Semantic chunks quality:', chunker.evaluateChunks(semanticChunks));

Recursive Text Splitter With Overlap

Recursively split by increasingly specific delimiters while maintaining context overlap.

class RecursiveTextSplitter {
  private separators = ['\n\n', '\n', '. ', ' ', ''];
  private chunkSize = 1024;
  private overlapSize = 128;

  split(text: string): string[] {
    return this.recursiveSplit(text, this.separators);
  }

  private recursiveSplit(text: string, separators: string[]): string[] {
    const chunks: string[] = [];
    let separator = separators[separators.length - 1];

    for (let i = 0; i < separators.length; i++) {
      const s = separators[i];
      if (s === '') break;

      const parts = text.split(s);
      const goodChunks = parts.filter((p) => p.length >= 1);

      if (goodChunks.length > 1) {
        separator = s;
        break;
      }
    }

    const splits = text.split(separator);
    return this.mergeSplits(splits, separator);
  }

  private mergeSplits(splits: string[], separator: string): string[] {
    const chunks: string[] = [];
    let currentChunk = '';

    for (const split of splits) {
      const combined = currentChunk + separator + split;

      if (combined.length < this.chunkSize) {
        currentChunk = combined.trim();
      } else {
        if (currentChunk) chunks.push(currentChunk);
        currentChunk = split;
      }
    }

    if (currentChunk) chunks.push(currentChunk);

    // Add overlap for context
    const chunksWithOverlap: string[] = [];
    for (let i = 0; i < chunks.length; i++) {
      let chunk = chunks[i];
      if (i > 0) {
        const prevChunk = chunks[i - 1];
        const overlap = prevChunk.slice(-this.overlapSize);
        chunk = overlap + '\n' + chunk;
      }
      chunksWithOverlap.push(chunk);
    }

    return chunksWithOverlap;
  }
}

const splitter = new RecursiveTextSplitter();
const document = 'Long document content...\n\nWith multiple paragraphs. And sentences.';
const chunks = splitter.split(document);

Sentence-Based and Paragraph-Based Chunking

Align chunks to natural sentence and paragraph boundaries.

class SentenceChunker {
  // Split by sentences while maintaining size limits
  chunkBySentences(text: string, maxSize: number = 1024): string[] {
    const sentences = text.match(/[^.!?]+[.!?]+/g) || [];
    const chunks: string[] = [];
    let currentChunk = '';

    for (const sentence of sentences) {
      if ((currentChunk + ' ' + sentence).length <= maxSize) {
        currentChunk += (currentChunk ? ' ' : '') + sentence;
      } else {
        if (currentChunk) chunks.push(currentChunk);
        currentChunk = sentence;
      }
    }

    if (currentChunk) chunks.push(currentChunk);
    return chunks;
  }

  // Split by paragraphs, then by sentences if too large
  chunkByParagraphs(text: string, maxSize: number = 1024): string[] {
    const paragraphs = text.split(/\n\n+/);
    const chunks: string[] = [];

    for (const para of paragraphs) {
      if (para.length <= maxSize) {
        chunks.push(para);
      } else {
        // If paragraph is too large, split by sentences
        const sentences = para.match(/[^.!?]+[.!?]+/g) || [];
        let currentChunk = '';

        for (const sentence of sentences) {
          if ((currentChunk + ' ' + sentence).length <= maxSize) {
            currentChunk += (currentChunk ? ' ' : '') + sentence;
          } else {
            if (currentChunk) chunks.push(currentChunk);
            currentChunk = sentence;
          }
        }

        if (currentChunk) chunks.push(currentChunk);
      }
    }

    return chunks;
  }
}

const sentenceChunker = new SentenceChunker();
const multiParagraphText = 'Paragraph 1. Sentence 1. Sentence 2.\n\nParagraph 2. Sentence 3.';
const chunks = sentenceChunker.chunkByParagraphs(multiParagraphText);

Document-Specific Strategies

Different document types benefit from different chunking approaches.

abstract class DocumentChunker {
  abstract chunk(content: string): Array<{ text: string; metadata: Record<string, unknown> }>;
}

class CodeChunker extends DocumentChunker {
  chunk(content: string): Array<{ text: string; metadata: Record<string, unknown> }> {
    const chunks: Array<{ text: string; metadata: Record<string, unknown> }> = [];
    const lines = content.split('\n');
    let currentChunk = '';
    let currentFunction = '';

    for (const line of lines) {
      // Detect function/class definitions
      if (line.match(/^(function|class|const.*=.*=>|\w+\s*\()/)) {
        if (currentChunk) {
          chunks.push({
            text: currentChunk.trim(),
            metadata: { type: 'code', function: currentFunction },
          });
        }
        currentFunction = line.match(/\w+/)?.[0] || 'unknown';
        currentChunk = line;
      } else {
        currentChunk += '\n' + line;
        if (currentChunk.length > 1024) {
          chunks.push({
            text: currentChunk.trim(),
            metadata: { type: 'code', function: currentFunction },
          });
          currentChunk = '';
        }
      }
    }

    if (currentChunk.trim()) {
      chunks.push({
        text: currentChunk.trim(),
        metadata: { type: 'code', function: currentFunction },
      });
    }

    return chunks;
  }
}

class MarkdownChunker extends DocumentChunker {
  chunk(content: string): Array<{ text: string; metadata: Record<string, unknown> }> {
    const chunks: Array<{ text: string; metadata: Record<string, unknown> }> = [];
    const sections = content.split(/^##\s+/m);

    for (const section of sections) {
      const title = section.split('\n')[0];
      const body = section.split('\n').slice(1).join('\n');

      chunks.push({
        text: body.trim(),
        metadata: { type: 'markdown', section: title },
      });
    }

    return chunks;
  }
}

class PDFChunker extends DocumentChunker {
  chunk(content: string): Array<{ text: string; metadata: Record<string, unknown> }> {
    // Split by page markers or use page-based chunking
    const pages = content.split(/\n\[PAGE \d+\]\n/);
    const chunks: Array<{ text: string; metadata: Record<string, unknown> }> = [];

    pages.forEach((page, idx) => {
      chunks.push({
        text: page.trim(),
        metadata: { type: 'pdf', page: idx + 1 },
      });
    });

    return chunks;
  }
}

const codeChunker = new CodeChunker();
const markdownChunker = new MarkdownChunker();
const pdfChunker = new PDFChunker();

const codeChunks = codeChunker.chunk('function hello() { return "world"; }');
const mdChunks = markdownChunker.chunk('## Introduction\nContent here.');

Chunk Metadata for Filtering

Add rich metadata to enable pre-filtering before vector search.

interface ChunkMetadata {
  source: string;
  section?: string;
  timestamp?: Date;
  author?: string;
  confidenceLevel?: 'high' | 'medium' | 'low';
  tags?: string[];
}

class ChunkWithMetadata {
  constructor(
    public id: string,
    public text: string,
    public metadata: ChunkMetadata,
    public embedding?: number[]
  ) {}

  static create(text: string, metadata: ChunkMetadata): ChunkWithMetadata {
    return new ChunkWithMetadata(`chunk_${Date.now()}_${Math.random()}`, text, metadata);
  }
}

class MetadataIndexer {
  private chunks: Map<string, ChunkWithMetadata> = new Map();

  addChunk(chunk: ChunkWithMetadata): void {
    this.chunks.set(chunk.id, chunk);
  }

  filterByMetadata(filters: Partial<ChunkMetadata>): ChunkWithMetadata[] {
    const results: ChunkWithMetadata[] = [];

    for (const chunk of this.chunks.values()) {
      let matches = true;

      if (filters.source && chunk.metadata.source !== filters.source) matches = false;
      if (filters.section && chunk.metadata.section !== filters.section) matches = false;
      if (filters.confidenceLevel && chunk.metadata.confidenceLevel !== filters.confidenceLevel) {
        matches = false;
      }
      if (filters.tags && !filters.tags.every((tag) => chunk.metadata.tags?.includes(tag))) {
        matches = false;
      }

      if (matches) results.push(chunk);
    }

    return results;
  }

  getChunksBySource(source: string): ChunkWithMetadata[] {
    return Array.from(this.chunks.values()).filter((c) => c.metadata.source === source);
  }

  getChunksByConfidence(level: 'high' | 'medium' | 'low'): ChunkWithMetadata[] {
    return Array.from(this.chunks.values()).filter((c) => c.metadata.confidenceLevel === level);
  }
}

const indexer = new MetadataIndexer();
const chunk = ChunkWithMetadata.create('Important information', {
  source: 'annual_report.pdf',
  section: 'Financial',
  confidenceLevel: 'high',
  tags: ['finance', 'revenue'],
});

indexer.addChunk(chunk);
const filtered = indexer.filterByMetadata({ confidenceLevel: 'high', source: 'annual_report.pdf' });

Parent-Child Chunking

Create small chunks for retrieval but maintain parent context for generation.

class ParentChildChunker {
  chunkWithParent(
    text: string,
    smallChunkSize: number = 256,
    parentChunkSize: number = 1024
  ): Array<{ id: string; text: string; parentId: string; parentText: string }> {
    const parentChunks = this.createChunks(text, parentChunkSize);
    const results: Array<{ id: string; text: string; parentId: string; parentText: string }> = [];

    for (const parentChunk of parentChunks) {
      const parentId = `parent_${Date.now()}_${Math.random()}`;
      const children = this.createChunks(parentChunk, smallChunkSize);

      for (const child of children) {
        results.push({
          id: `child_${Date.now()}_${Math.random()}`,
          text: child,
          parentId,
          parentText: parentChunk,
        });
      }
    }

    return results;
  }

  private createChunks(text: string, size: number): string[] {
    const chunks: string[] = [];
    let current = '';

    const sentences = text.match(/[^.!?]+[.!?]+/g) || [];
    for (const sentence of sentences) {
      if ((current + sentence).length > size) {
        if (current) chunks.push(current);
        current = sentence;
      } else {
        current += sentence;
      }
    }

    if (current) chunks.push(current);
    return chunks;
  }
}

const parentChildChunker = new ParentChildChunker();
const chunks = parentChildChunker.chunkWithParent(
  'Document with multiple sentences. Each sentence is important. They form paragraphs together.',
  128,
  512
);

chunks.forEach((chunk) => {
  console.log(`Child: ${chunk.text.slice(0, 50)}...`);
  console.log(`Parent: ${chunk.parentText.slice(0, 100)}...`);
});

Late Chunking With Long-Context Embeddings

Use long-context models to embed full documents, then retrieve later.

class LateChunker {
  async embedFullDocument(text: string): Promise<{ embedding: number[]; chunks: string[] }> {
    // With long-context embeddings, we can embed the full document
    const response = await fetch('https://api.openai.com/v1/embeddings', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
      },
      body: JSON.stringify({
        model: 'text-embedding-3-large',
        input: text,
      }),
    });

    const data = (await response.json()) as { data: Array<{ embedding: number[] }> };
    const embedding = data.data[0].embedding;

    // Only chunk at query time for better context
    const chunks = this.chunkAtQueryTime(text, 256);

    return { embedding, chunks };
  }

  private chunkAtQueryTime(text: string, chunkSize: number): string[] {
    const sentences = text.match(/[^.!?]+[.!?]+/g) || [];
    const chunks: string[] = [];
    let current = '';

    for (const sentence of sentences) {
      if ((current + sentence).length > chunkSize) {
        if (current) chunks.push(current);
        current = sentence;
      } else {
        current += sentence;
      }
    }

    if (current) chunks.push(current);
    return chunks;
  }

  async retrieveWithLateChunking(
    query: string,
    documentEmbedding: number[],
    chunks: string[]
  ): Promise<string[]> {
    // Retrieve with full document context, then re-rank chunks
    const queryEmbedding = await this.getQueryEmbedding(query);

    const scored = chunks.map((chunk) => ({
      chunk,
      score: this.cosineSimilarity(queryEmbedding, documentEmbedding),
    }));

    return scored.sort((a, b) => b.score - a.score).slice(0, 5).map((item) => item.chunk);
  }

  private async getQueryEmbedding(query: string): Promise<number[]> {
    const response = await fetch('https://api.openai.com/v1/embeddings', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
      },
      body: JSON.stringify({
        model: 'text-embedding-3-small',
        input: query,
      }),
    });

    const data = (await response.json()) as { data: Array<{ embedding: number[] }> };
    return data.data[0].embedding;
  }

  private cosineSimilarity(vecA: number[], vecB: number[]): number {
    const dotProduct = vecA.reduce((sum, a, i) => sum + a * vecB[i], 0);
    const magnitudeA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));
    const magnitudeB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));

    return dotProduct / (magnitudeA * magnitudeB);
  }
}

const lateChunker = new LateChunker();
const fullDoc = 'Long document content that we want to embed in full context...';
const { embedding, chunks } = await lateChunker.embedFullDocument(fullDoc);

Evaluation With Recall@K and MRR

Measure chunking effectiveness with retrieval metrics.

class ChunkingEvaluator {
  recallAtK(retrieved: string[], relevant: string[], k: number): number {
    const topK = retrieved.slice(0, k);
    const matches = topK.filter((item) => relevant.includes(item)).length;
    return matches / Math.min(k, relevant.length);
  }

  meanReciprocalRank(retrieved: string[], relevant: string[]): number {
    for (let i = 0; i < retrieved.length; i++) {
      if (relevant.includes(retrieved[i])) {
        return 1 / (i + 1);
      }
    }
    return 0;
  }

  ndcg(retrieved: string[], relevant: string[], k: number): number {
    const topK = retrieved.slice(0, k);
    let dcg = 0;

    for (let i = 0; i < topK.length; i++) {
      const relevance = relevant.includes(topK[i]) ? 1 : 0;
      dcg += relevance / Math.log2(i + 2);
    }

    let idcg = 0;
    for (let i = 0; i < Math.min(relevant.length, k); i++) {
      idcg += 1 / Math.log2(i + 2);
    }

    return dcg / idcg;
  }

  evaluateChunkingStrategy(
    testCases: Array<{ query: string; relevantChunks: string[] }>,
    chunkingFn: (text: string) => string[]
  ): { avgRecall5: number; avgMRR: number; avgNDCG: number } {
    let totalRecall5 = 0;
    let totalMRR = 0;
    let totalNDCG = 0;

    for (const testCase of testCases) {
      const chunks = chunkingFn(testCase.query);
      totalRecall5 += this.recallAtK(chunks, testCase.relevantChunks, 5);
      totalMRR += this.meanReciprocalRank(chunks, testCase.relevantChunks);
      totalNDCG += this.ndcg(chunks, testCase.relevantChunks, 5);
    }

    const count = testCases.length;
    return {
      avgRecall5: totalRecall5 / count,
      avgMRR: totalMRR / count,
      avgNDCG: totalNDCG / count,
    };
  }
}

const evaluator = new ChunkingEvaluator();
const metrics = evaluator.evaluateChunkingStrategy(
  [
    { query: 'topic', relevantChunks: ['chunk1', 'chunk2'] },
    { query: 'other', relevantChunks: ['chunk3'] },
  ],
  (text) => text.split(' ')
);

console.log('Chunking metrics:', metrics);

Checklist

  • Profile your document types and choose strategies per type
  • Use sentence-boundary alignment for natural language documents
  • Use function/class boundaries for code documents
  • Implement parent-child chunking for better context
  • Add rich metadata for pre-filtering and post-ranking
  • Measure retrieval quality with recall@k and MRR
  • Test semantic chunking vs fixed-size for your domain
  • Use long-context embeddings to embed full documents when possible
  • Validate chunking on golden datasets quarterly
  • Monitor average chunk size distribution for consistency

Conclusion

Chunk strategy directly determines RAG quality. Start with semantic chunking based on natural boundaries (sentences, paragraphs, functions). Add parent-child structure for context, rich metadata for filtering, and measure quality with recall@k metrics. As you scale, experiment with late chunking using long-context embeddings for maximum performance.