- Published on
AI Text Classification in Production — From Zero-Shot to Fine-Tuned Models
- Authors

- Name
- Sanjeev Sharma
- @webcoderspeed1
Introduction
Text classification ranges from simple zero-shot prompting to fine-tuned models, each with distinct latency, cost, and accuracy profiles. This guide covers when to use each approach and how to transition between them as your product scales.
- Zero-Shot Classification with LLM
- Few-Shot Classification
- Embedding-Based Classification
- Fine-Tuned Classifier
- Multi-Label Classification
- Hierarchical Classification
- Confidence Calibration
- Active Learning Loop
- Handling New Categories Without Retraining
- Checklist
- Conclusion
Zero-Shot Classification with LLM
Simplest approach: prompt GPT-4 to classify without training data:
interface ClassificationRequest {
text: string;
classes: string[];
}
interface ClassificationResult {
predictedClass: string;
confidence: number;
reasoning: string;
method: 'zero-shot' | 'few-shot' | 'embedding' | 'fine-tuned';
}
async function zeroShotClassify(
request: ClassificationRequest
): Promise<ClassificationResult> {
const classesStr = request.classes.join(', ');
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-4-turbo',
messages: [
{
role: 'system',
content: `Classify the text into one of: ${classesStr}.
Respond with JSON: { class: string, confidence: number (0-1), reasoning: string }`
},
{
role: 'user',
content: request.text
}
],
temperature: 0
})
});
const data = await response.json();
const content = data.choices[0].message.content;
const parsed = JSON.parse(content);
return {
predictedClass: parsed.class,
confidence: parsed.confidence,
reasoning: parsed.reasoning,
method: 'zero-shot'
};
}
// Pros: Works immediately, no training data needed
// Cons: $0.03-0.15 per call, 100-500ms latency, less consistent format
// Use case: Low-volume, high-accuracy requirements
Few-Shot Classification
Provide examples to improve performance without full training:
interface FewShotExample {
text: string;
label: string;
}
async function fewShotClassify(
text: string,
classes: string[],
examples: FewShotExample[]
): Promise<ClassificationResult> {
const examplesStr = examples
.map(ex => `Text: "${ex.text}"\nLabel: ${ex.label}`)
.join('\n\n');
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-3.5-turbo',
messages: [
{
role: 'system',
content: `Classify text into: ${classes.join(', ')}.\nExamples:\n${examplesStr}`
},
{
role: 'user',
content: `Classify: "${text}"`
}
],
temperature: 0
})
});
const data = await response.json();
const output = data.choices[0].message.content;
// Parse response
const match = output.match(/(\w+)/);
const predictedClass = match ? match[1] : classes[0];
return {
predictedClass,
confidence: 0.85,
reasoning: output,
method: 'few-shot'
};
}
// 5-10 examples per class dramatically improves accuracy
// Cost: 30-50% of zero-shot due to longer prompts
// Latency: Similar to zero-shot
Embedding-Based Classification
Use embeddings + simple ML models for fast, cheap inference:
interface EmbeddingClassifier {
classEmbeddings: Map<string, number[]>;
model: 'knn' | 'svm' | 'logistic_regression';
}
async function trainEmbeddingClassifier(
trainingExamples: Array<{ text: string; label: string }>,
classes: string[]
): Promise<EmbeddingClassifier> {
const classEmbeddings = new Map<string, number[]>();
// Get representative embedding for each class (centroid)
for (const className of classes) {
const classExamples = trainingExamples.filter(
ex => ex.label === className
);
const embeddings: number[][] = [];
for (const ex of classExamples) {
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'text-embedding-3-small',
input: ex.text
})
});
const data = await response.json();
embeddings.push(data.data[0].embedding);
}
// Calculate centroid
const centroid = Array(embeddings[0].length)
.fill(0)
.map((_, i) =>
embeddings.reduce((sum, emb) => sum + emb[i], 0) / embeddings.length
);
classEmbeddings.set(className, centroid);
}
return {
classEmbeddings,
model: 'knn'
};
}
function cosineSimilarity(a: number[], b: number[]): number {
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
async function classifyWithEmbedding(
text: string,
classifier: EmbeddingClassifier
): Promise<ClassificationResult> {
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'text-embedding-3-small',
input: text
})
});
const data = await response.json();
const textEmbedding = data.data[0].embedding;
// Find most similar class
let bestClass = '';
let bestScore = -1;
classifier.classEmbeddings.forEach((classEmb, className) => {
const similarity = cosineSimilarity(textEmbedding, classEmb);
if (similarity > bestScore) {
bestScore = similarity;
bestClass = className;
}
});
return {
predictedClass: bestClass,
confidence: (bestScore + 1) / 2, // Normalize -1 to 1 range
reasoning: `Highest similarity: ${bestScore.toFixed(3)}`,
method: 'embedding'
};
}
// Pros: $0.002 per call, 50ms latency, deterministic
// Cons: Requires training data, lower accuracy than fine-tuning
// Use case: Medium-volume, cost-conscious, multi-class classification
Fine-Tuned Classifier
Optimal for high-volume, specialized classification:
interface FineTunedClassifier {
modelId: string;
classes: string[];
trainingAccuracy: number;
validationAccuracy: number;
}
async function trainFineTunedClassifier(
trainingData: Array<{ text: string; label: string }>,
classes: string[]
): Promise<FineTunedClassifier> {
// Prepare JSONL format for OpenAI
const trainingLines = trainingData.map(ex => {
const classIndex = classes.indexOf(ex.label);
return JSON.stringify({
messages: [
{
role: 'system',
content: `Classify text into one of: ${classes.join(', ')}`
},
{
role: 'user',
content: ex.text
},
{
role: 'assistant',
content: ex.label
}
]
});
});
const trainingContent = trainingLines.join('\n');
// Upload training file
const uploadResponse = await fetch('https://api.openai.com/v1/files', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`
}
// Note: In real implementation, would use FormData for file upload
});
const uploadData = await uploadResponse.json();
const fileId = uploadData.id;
// Submit fine-tune job
const finetuneResponse = await fetch('https://api.openai.com/v1/fine_tuning/jobs', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
training_file: fileId,
model: 'gpt-3.5-turbo',
hyperparameters: {
n_epochs: 3
}
})
});
const finetuneData = await finetuneResponse.json();
return {
modelId: finetuneData.id,
classes,
trainingAccuracy: 0.95,
validationAccuracy: 0.92
};
}
async function classifyWithFineTuned(
text: string,
classifier: FineTunedClassifier
): Promise<ClassificationResult> {
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: classifier.modelId,
messages: [
{
role: 'user',
content: text
}
],
temperature: 0
})
});
const data = await response.json();
const predictedClass = data.choices[0].message.content.trim();
return {
predictedClass,
confidence: 0.96,
reasoning: 'Fine-tuned model prediction',
method: 'fine-tuned'
};
}
// Pros: $0.001 per call, 50ms, highest accuracy (95%+)
// Cons: Requires 500+ training examples, training cost, update latency
// Use case: High-volume, stable classes, cost optimization
Multi-Label Classification
Single text can belong to multiple classes:
interface MultiLabelResult {
labels: Array<{ label: string; confidence: number }>;
predictedLabels: string[];
}
async function multiLabelClassify(
text: string,
possibleLabels: string[],
confidenceThreshold: number = 0.5
): Promise<MultiLabelResult> {
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-4',
messages: [
{
role: 'system',
content: `Identify ALL relevant labels from: ${possibleLabels.join(', ')}.
Respond with JSON: { labels: [{ label: string, confidence: number }] }`
},
{
role: 'user',
content: text
}
],
temperature: 0
})
});
const data = await response.json();
const content = data.choices[0].message.content;
const parsed = JSON.parse(content);
const labels = parsed.labels || [];
const predictedLabels = labels
.filter((l: any) => l.confidence >= confidenceThreshold)
.map((l: any) => l.label);
return {
labels,
predictedLabels
};
}
// Example: Product description tagged with ["electronics", "gift", "premium"]
Hierarchical Classification
Classes are organized in hierarchy (root → category → subcategory):
interface HierarchyNode {
id: string;
label: string;
children?: HierarchyNode[];
}
async function hierarchicalClassify(
text: string,
hierarchy: HierarchyNode
): Promise<{ path: string[]; confidence: number }> {
let currentNode = hierarchy;
let path: string[] = [];
let confidence = 1.0;
// Navigate hierarchy level by level
while (currentNode.children && currentNode.children.length > 0) {
const childLabels = currentNode.children.map(c => c.label);
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-3.5-turbo',
messages: [
{
role: 'user',
content: `Classify into: ${childLabels.join(', ')}.\nText: "${text}"`
}
],
temperature: 0
})
});
const data = await response.json();
const output = data.choices[0].message.content;
const match = output.match(/(\w+)/);
const selectedLabel = match ? match[1] : childLabels[0];
// Find matching child
const selectedChild = currentNode.children.find(
c => c.label.toLowerCase().includes(selectedLabel.toLowerCase())
);
if (!selectedChild) break;
path.push(selectedChild.label);
currentNode = selectedChild;
confidence *= 0.95; // Decrease confidence at each level
}
return { path, confidence };
}
// Example hierarchy:
// News
// ├── Technology
// │ ├── AI
// │ ├── Hardware
// ├── Sports
// │ ├── Football
// │ ├── Basketball
Confidence Calibration
Ensure model confidence matches actual accuracy:
interface CalibrationMetrics {
expectedAccuracy: number; // Average confidence of predictions
actualAccuracy: number; // Actual % correct
calibrationError: number; // |expected - actual|
}
function calibrateConfidence(
predictions: Array<{
predicted: string;
actual: string;
confidence: number;
}>
): CalibrationMetrics {
const avgConfidence =
predictions.reduce((sum, p) => sum + p.confidence, 0) / predictions.length;
const correct = predictions.filter(p => p.predicted === p.actual).length;
const actualAccuracy = correct / predictions.length;
// If model says 80% confident but only 70% correct: miscalibrated
const calibrationError = Math.abs(avgConfidence - actualAccuracy);
return {
expectedAccuracy: avgConfidence,
actualAccuracy,
calibrationError
};
}
// Calibration techniques:
// - Temperature scaling: divide logits by learned T
// - Platt scaling: fit logistic regression to confidences
// - Isotonic regression: monotonic calibration curve
Active Learning Loop
Select high-uncertainty examples for labeling:
interface ActiveLearningBatch {
unlabeledExamples: Array<{
id: string;
text: string;
modelUncertainty: number;
}>;
selectedForLabeling: string[]; // Top 50 most uncertain
}
async function selectForLabelingActive(
unlabeledData: Array<{ id: string; text: string }>,
currentModel: FineTunedClassifier,
batchSize: number = 50
): Promise<ActiveLearningBatch> {
const uncertainties: Array<{
id: string;
text: string;
uncertainty: number;
}> = [];
for (const item of unlabeledData) {
const prediction = await classifyWithFineTuned(item.text, currentModel);
// Uncertainty: distance from 0.5 (most uncertain at 0.5)
const uncertainty = Math.abs(prediction.confidence - 0.5);
uncertainties.push({
id: item.id,
text: item.text,
uncertainty
});
}
// Select lowest confidence (highest uncertainty)
const sorted = uncertainties.sort((a, b) => a.uncertainty - b.uncertainty);
return {
unlabeledExamples: sorted,
selectedForLabeling: sorted.slice(0, batchSize).map(s => s.id)
};
}
// Reduces labeling cost: focus on hard examples model disagrees on
// Typically achieves same accuracy with 30% fewer labels
Handling New Categories Without Retraining
Detect out-of-distribution examples and add new class:
async function detectNewCategory(
text: string,
classifier: EmbeddingClassifier,
threshold: number = 0.6
): Promise<{
isNewCategory: boolean;
suggestedCategory?: string;
confidence: number;
}> {
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'text-embedding-3-small',
input: text
})
});
const data = await response.json();
const textEmbedding = data.data[0].embedding;
// Find maximum similarity to known classes
let maxSimilarity = -1;
let closestClass = '';
classifier.classEmbeddings.forEach((classEmb, className) => {
const similarity = cosineSimilarity(textEmbedding, classEmb);
if (similarity > maxSimilarity) {
maxSimilarity = similarity;
closestClass = className;
}
});
const isNew = maxSimilarity < threshold;
if (isNew) {
// Suggest category name using LLM
const nameResponse = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-3.5-turbo',
messages: [
{
role: 'user',
content: `Suggest a category name for: "${text}"\nExisting: ${Array.from(classifier.classEmbeddings.keys()).join(', ')}`
}
]
})
});
const nameData = await nameResponse.json();
const suggestedName = nameData.choices[0].message.content.trim();
return {
isNewCategory: true,
suggestedCategory: suggestedName,
confidence: 1 - maxSimilarity
};
}
return {
isNewCategory: false,
confidence: maxSimilarity
};
}
Checklist
- Started with zero-shot classification to establish baseline
- Collected 50-100 labeled examples per class
- Evaluated zero-shot vs few-shot accuracy difference
- Trained embedding-based classifier for cost optimization
- Compared embedding vs zero-shot on accuracy/latency/cost
- Fine-tuned model once data exceeded 500 examples per class
- Calibrated model confidence scores
- Set up active learning to reduce labeling burden
- Implemented detection for out-of-distribution examples
- Built monitoring for confidence/accuracy divergence
- Documented decision logic: when to use which approach
Conclusion
Start with zero-shot for rapid prototyping, migrate to embeddings when you have 100+ examples and need cost reduction, and fine-tune at 500+ examples for optimal accuracy. Track the confidence-accuracy gap and use active learning to reduce labeling costs by 30%.