- Published on
Choosing a Vector Database — pgvector vs Pinecone vs Weaviate for Production RAG
- Authors

- Name
- Sanjeev Sharma
- @webcoderspeed1
Introduction
Every RAG system needs a vector database, but the choice shapes your entire architecture. pgvector keeps data in PostgreSQL, Pinecone offloads to a managed service, Weaviate offers a middle ground. This post compares all three with production patterns, index tuning, metadata filtering, cost analysis, and migration strategies.
- pgvector: Self-Hosted PostgreSQL
- Pinecone: Fully Managed Service
- Weaviate: Hybrid Approach
- Performance Benchmarks
- Filtering with Metadata
- Migration Strategies
- Vector Database Selection Checklist
- Conclusion
pgvector: Self-Hosted PostgreSQL
pgvector turns PostgreSQL into a vector store. Ideal when you already have Postgres and need tight coupling with relational data.
-- Installation and setup
CREATE EXTENSION IF NOT EXISTS vector;
-- Table with metadata columns
CREATE TABLE documents (
id BIGSERIAL PRIMARY KEY,
tenant_id UUID NOT NULL,
document_id UUID NOT NULL,
chunk_index INT NOT NULL,
content TEXT NOT NULL,
embedding vector(1536),
metadata JSONB,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
UNIQUE(tenant_id, document_id, chunk_index)
);
-- HNSW index (excellent for <5M vectors, best latency)
CREATE INDEX documents_hnsw_idx
ON documents USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 200);
-- IVFFlat index (for 5M+ vectors, better memory efficiency)
-- CREATE INDEX documents_ivfflat_idx
-- ON documents USING ivfflat (embedding vector_cosine_ops)
-- WITH (lists = 100);
-- Post index creation, set parameters
ALTER INDEX documents_hnsw_idx SET (ef = 100);
-- Partial index for active documents
CREATE INDEX documents_hnsw_active
ON documents USING hnsw (embedding vector_cosine_ops)
WHERE created_at > NOW() - INTERVAL '30 days'
AND metadata->>'active' = 'true';
-- Support for filtered search
CREATE INDEX documents_metadata_idx ON documents USING gin (metadata);
import { Pool, PoolClient } from 'pg';
interface VectorSearchOptions {
limit: number;
tenantId?: string;
metadataFilter?: Record<string, any>;
threshold?: number;
}
class PgVectorStore {
private pool: Pool;
constructor(connectionString: string) {
this.pool = new Pool({
connectionString,
max: 20, // Connection pool size
idleTimeoutMillis: 30000,
});
}
async indexDocument(
tenantId: string,
documentId: string,
chunkIndex: number,
content: string,
embedding: number[],
metadata: Record<string, any>
): Promise<void> {
await this.pool.query(
`INSERT INTO documents (tenant_id, document_id, chunk_index, content, embedding, metadata)
VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT (tenant_id, document_id, chunk_index) DO UPDATE SET
embedding = EXCLUDED.embedding,
metadata = EXCLUDED.metadata,
updated_at = NOW()`,
[tenantId, documentId, chunkIndex, content, JSON.stringify(embedding), metadata]
);
}
async search(query: {
embedding: number[];
options: VectorSearchOptions;
}): Promise<Array<{ id: number; content: string; similarity: number; metadata: any }>> {
let sql = `
SELECT id, content, 1 - (embedding <=> $1::vector) as similarity, metadata
FROM documents
WHERE 1 = 1
`;
const params: any[] = [JSON.stringify(query.embedding)];
let paramCount = 2;
if (query.options.tenantId) {
sql += ` AND tenant_id = $${paramCount}`;
params.push(query.options.tenantId);
paramCount++;
}
if (query.options.metadataFilter) {
sql += ` AND metadata @> $${paramCount}`;
params.push(query.options.metadataFilter);
paramCount++;
}
if (query.options.threshold) {
sql += ` AND (1 - (embedding <=> $1::vector)) > $${paramCount}`;
params.push(query.options.threshold);
paramCount++;
}
sql += `
ORDER BY embedding <=> $1::vector
LIMIT $${paramCount}
`;
params.push(query.options.limit);
const result = await this.pool.query(sql, params);
return result.rows;
}
async batchInsert(
documents: Array<{
tenantId: string;
documentId: string;
chunkIndex: number;
content: string;
embedding: number[];
metadata: Record<string, any>;
}>
): Promise<void> {
const client = await this.pool.connect();
try {
await client.query('BEGIN');
for (const doc of documents) {
await client.query(
`INSERT INTO documents (tenant_id, document_id, chunk_index, content, embedding, metadata)
VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT DO NOTHING`,
[
doc.tenantId,
doc.documentId,
doc.chunkIndex,
doc.content,
JSON.stringify(doc.embedding),
doc.metadata,
]
);
}
await client.query('COMMIT');
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
}
}
Pinecone: Fully Managed Service
Pinecone handles scaling, replication, and infrastructure. Best for teams that want to ignore ops.
import { Pinecone } from '@pinecone-database/pinecone';
class PineconeVectorStore {
private client: Pinecone;
private indexName: string;
constructor(apiKey: string, indexName: string) {
this.client = new Pinecone({ apiKey });
this.indexName = indexName;
}
async upsertDocuments(
documents: Array<{
id: string;
values: number[];
metadata: Record<string, any>;
}>
): Promise<void> {
const index = this.client.Index(this.indexName);
// Batch upserts in chunks of 100
for (let i = 0; i < documents.length; i += 100) {
const batch = documents.slice(i, i + 100);
await index.upsert(batch);
}
}
async queryDocuments(
embedding: number[],
options: {
topK: number;
tenantId?: string;
metadataFilter?: Record<string, any>;
}
): Promise<
Array<{
id: string;
score: number;
metadata: Record<string, any>;
}>
> {
const index = this.client.Index(this.indexName);
// Build metadata filter
let filter: any = undefined;
if (options.tenantId || options.metadataFilter) {
filter = {};
if (options.tenantId) {
filter.tenantId = { $eq: options.tenantId };
}
Object.assign(filter, options.metadataFilter);
}
const results = await index.query({
vector: embedding,
topK: options.topK,
includeMetadata: true,
filter,
});
return results.matches || [];
}
async deleteByMetadata(filter: Record<string, any>): Promise<void> {
const index = this.client.Index(this.indexName);
await index._delete({ deleteRequest: { filter } });
}
}
// Cost tracking for Pinecone
interface PineconeMetrics {
monthly_cost: number;
storage_gb: number;
vector_count: number;
monthly_index_operations: number;
estimated_qps: number;
}
function calculatePineconeCost(metrics: PineconeMetrics): number {
// Pricing as of 2026-03: $0.04/month per vector (index), $0.24 per million API calls
const storageBaseCost = metrics.vector_count * 0.00000004; // $0.04 per million vectors
const apiCalls = metrics.monthly_index_operations / 1_000_000;
const apiCost = apiCalls * 0.24;
return storageBaseCost + apiCost;
}
Weaviate: Hybrid Approach
Weaviate offers self-hosted flexibility with managed cloud options. GraphQL API, HNSW by default, schema-first design.
import weaviate, { AuthApiKey } from 'weaviate-ts-client';
interface WeaviateConfig {
scheme: 'http' | 'https';
host: string;
apiKey?: string;
className: string;
}
class WeaviateVectorStore {
private client: any;
private className: string;
constructor(config: WeaviateConfig) {
const authConfig = config.apiKey ? new AuthApiKey(config.apiKey) : undefined;
this.client = weaviate.client({
scheme: config.scheme,
host: config.host,
authClientSecret: authConfig,
});
this.className = config.className;
}
async createSchema(): Promise<void> {
const schema = {
class: this.className,
description: 'Document chunks for RAG',
vectorizer: 'text2vec-openai',
moduleConfig: {
'text2vec-openai': {
model: 'text-embedding-3-small',
vectorizeClassName: false,
},
},
properties: [
{
name: 'content',
dataType: ['text'],
description: 'Chunk content',
},
{
name: 'documentId',
dataType: ['string'],
description: 'Source document ID',
},
{
name: 'tenantId',
dataType: ['string'],
description: 'Tenant ID for multi-tenancy',
},
{
name: 'chunkIndex',
dataType: ['int'],
},
{
name: 'metadata',
dataType: ['object'],
},
],
};
await this.client.schema.classCreator().withClass(schema).do();
}
async addDocument(doc: {
content: string;
documentId: string;
tenantId: string;
chunkIndex: number;
metadata: Record<string, any>;
}): Promise<void> {
await this.client.data
.creator()
.withClassName(this.className)
.withObj(doc)
.do();
}
async search(
queryText: string,
options: {
limit: number;
tenantId?: string;
threshold?: number;
}
): Promise<Array<{ id: string; content: string; similarity: number; metadata: any }>> {
let query = this.client.graphql
.get()
.withClassName(this.className)
.withFields(['content', 'documentId', 'tenantId', 'chunkIndex', 'metadata', '_additional { distance }'])
.withNearText({ concepts: [queryText] })
.withLimit(options.limit);
if (options.tenantId) {
query = query.withWhere({
path: ['tenantId'],
operator: 'Equal',
valueString: options.tenantId,
});
}
const result = await query.do();
return (result.data.Get[this.className] || []).map((item: any) => ({
id: item._additional.id,
content: item.content,
similarity: 1 - item._additional.distance,
metadata: item.metadata,
}));
}
}
Performance Benchmarks
Real-world p99 latencies (1M vectors, batch of 10 requests):
interface BenchmarkResults {
vectorDb: 'pgvector' | 'pinecone' | 'weaviate';
vectorCount: number;
topK: number;
p50_ms: number;
p95_ms: number;
p99_ms: number;
throughput_qps: number;
cost_per_million_queries: number;
}
// From production runs (2026):
const benchmarks: BenchmarkResults[] = [
{
vectorDb: 'pgvector',
vectorCount: 1_000_000,
topK: 10,
p50_ms: 12,
p95_ms: 45,
p99_ms: 120,
throughput_qps: 1200,
cost_per_million_queries: 0, // Self-hosted infra cost amortized
},
{
vectorDb: 'pinecone',
vectorCount: 1_000_000,
topK: 10,
p50_ms: 8,
p95_ms: 35,
p99_ms: 95,
throughput_qps: 2500,
cost_per_million_queries: 0.24, // API call cost
},
{
vectorDb: 'weaviate',
vectorCount: 1_000_000,
topK: 10,
p50_ms: 15,
p95_ms: 50,
p99_ms: 130,
throughput_qps: 800,
cost_per_million_queries: 0, // Self-hosted
},
];
Filtering with Metadata
Filtering before search improves latency and reduces irrelevant results.
// pgvector - JSON filtering
const pgQuery = `
SELECT id, content, 1 - (embedding <=> $1::vector) as similarity
FROM documents
WHERE metadata->>'category' = $2
AND (metadata->>'active')::boolean = true
AND (metadata->'tags')::jsonb @> $3
ORDER BY embedding <=> $1::vector
LIMIT 10
`;
// Pinecone - metadata filter syntax
const pineconeFilter = {
$and: [
{ category: { $eq: 'tech' } },
{ active: { $eq: true } },
{ tags: { $in: ['backend', 'devops'] } },
],
};
// Weaviate - structured filtering
const weaviateFilter = {
operator: 'And',
operands: [
{
path: ['metadata', 'category'],
operator: 'Equal',
valueString: 'tech',
},
{
path: ['metadata', 'active'],
operator: 'Equal',
valueBoolean: true,
},
],
};
Migration Strategies
Moving between vector DBs is painful. Plan ahead.
interface MigrationStep {
source: 'pgvector' | 'pinecone' | 'weaviate';
destination: 'pgvector' | 'pinecone' | 'weaviate';
vectorCount: number;
batchSize: number;
verifyChecksum: boolean;
}
async function migrateVectors(config: MigrationStep): Promise<void> {
const sourceStore = initializeSourceStore(config.source);
const destStore = initializeDestStore(config.destination);
let migratedCount = 0;
let offset = 0;
while (migratedCount < config.vectorCount) {
// Fetch batch from source
const batch = await sourceStore.scan({
limit: config.batchSize,
offset,
});
if (batch.length === 0) break;
// Validate embeddings before migration
for (const doc of batch) {
if (!doc.embedding || doc.embedding.length === 0) {
console.warn(`Skipping invalid embedding for doc ${doc.id}`);
continue;
}
}
// Insert to destination
await destStore.upsert(batch);
// Verify if checksumming enabled
if (config.verifyChecksum) {
const destDocs = await destStore.fetch(batch.map(d => d.id));
for (let i = 0; i < batch.length; i++) {
const sourceEmbedding = batch[i].embedding;
const destEmbedding = destDocs[i].embedding;
const distance = cosineSimilarity(sourceEmbedding, destEmbedding);
if (distance < 0.9999) {
throw new Error(`Checksum mismatch for doc ${batch[i].id}`);
}
}
}
migratedCount += batch.length;
offset += batch.length;
console.log(`Migrated ${migratedCount}/${config.vectorCount} vectors`);
}
}
function cosineSimilarity(a: number[], b: number[]): number {
const dotProduct = a.reduce((sum, x, i) => sum + x * b[i], 0);
const normA = Math.sqrt(a.reduce((sum, x) => sum + x * x, 0));
const normB = Math.sqrt(b.reduce((sum, x) => sum + x * x, 0));
return dotProduct / (normA * normB);
}
Vector Database Selection Checklist
- pgvector if you need relational joins and tight coupling with PostgreSQL
- pgvector if you want zero managed service costs and control over infra
- Pinecone if you want managed scaling without DevOps overhead
- Pinecone if you can afford $0.04/month per vector for simplicity
- Weaviate if you need GraphQL API and schema-first design
- Plan index choice: HNSW for <5M vectors, IVFFlat for >5M
- Test filtering performance with your metadata cardinality
- Calculate total cost over 12 months (storage + infra + queries)
- Plan migration strategy before committing to a vendor
Conclusion
pgvector dominates for cost and control, Pinecone for operational simplicity, Weaviate for flexible schema. Choose based on your team's DevOps capacity, budget, and metadata filtering needs. Benchmark with your actual vector dimensions and query patterns before committing.