HuggingFace Transformers Complete Guide 2026: NLP, Vision, and Audio
Advertisement
HuggingFace Transformers 2026: The ML Engineer's Swiss Army Knife
HuggingFace has become the GitHub of AI. Over 500,000 models, datasets, and spaces — all accessible through a unified API. This guide covers everything from quick inference to production deployment.
- Installation and Setup
- The Pipeline API: Inference in 3 Lines
- Text Classification with Fine-Tuning
- Named Entity Recognition (Production)
- Text Generation with GPT-2 / Llama
- Sentence Embeddings for Semantic Search
- Image Classification with Vision Transformers
- Audio: Speech Recognition with Whisper
- Deploy to Production with FastAPI
- Push Your Model to HuggingFace Hub
- Model Comparison: Which to Use
Installation and Setup
pip install transformers datasets accelerate tokenizers
pip install torch # or tensorflow, or jax
The Pipeline API: Inference in 3 Lines
from transformers import pipeline
# Text classification (sentiment)
classifier = pipeline("sentiment-analysis")
result = classifier("HuggingFace Transformers makes NLP incredibly easy!")
# [{'label': 'POSITIVE', 'score': 0.9998}]
# Named Entity Recognition
ner = pipeline("ner", grouped_entities=True)
entities = ner("Apple Inc. was founded by Steve Jobs in Cupertino, California.")
# [{'entity_group': 'ORG', 'word': 'Apple Inc.'}, {'entity_group': 'PER', 'word': 'Steve Jobs'}, ...]
# Summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(long_article, max_length=130, min_length=30)
# Translation
translator = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
result = translator("Machine learning is transforming every industry.")
# Question Answering
qa = pipeline("question-answering")
result = qa(
question="What year was Python created?",
context="Python was created by Guido van Rossum and first released in 1991."
)
# {'answer': '1991', 'score': 0.998}
Text Classification with Fine-Tuning
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score
# Load pre-trained model
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# Load dataset
dataset = load_dataset("imdb")
def tokenize(examples):
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
tokenized = dataset.map(tokenize, batched=True)
# Training
def compute_metrics(pred):
labels = pred.label_ids
preds = np.argmax(pred.predictions, axis=-1)
return {"accuracy": accuracy_score(labels, preds)}
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=16,
per_device_eval_batch_size=64,
evaluation_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
logging_dir="./logs",
report_to="none",
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized["train"].select(range(5000)),
eval_dataset=tokenized["test"].select(range(1000)),
compute_metrics=compute_metrics,
)
trainer.train()
trainer.save_model("./my-sentiment-model")
Named Entity Recognition (Production)
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
# Use a production-grade NER model
model_name = "dslim/bert-base-NER"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
def extract_entities(text: str) -> dict:
"""Extract and organize named entities from text."""
entities = ner_pipeline(text)
result = {"PER": [], "ORG": [], "LOC": [], "MISC": []}
for entity in entities:
group = entity["entity_group"]
if group in result and entity["score"] > 0.85:
result[group].append({
"text": entity["word"],
"confidence": round(entity["score"], 3)
})
return result
text = "Elon Musk's Tesla and SpaceX are headquartered in Austin, Texas and Hawthorne, California."
print(extract_entities(text))
# {'PER': [{'text': 'Elon Musk', 'confidence': 0.999}],
# 'ORG': [{'text': 'Tesla', ...}, {'text': 'SpaceX', ...}],
# 'LOC': [{'text': 'Austin, Texas', ...}, ...], ...}
Text Generation with GPT-2 / Llama
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load a smaller model for demos
model_name = "gpt2-medium" # or "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
def generate_text(prompt: str, max_new_tokens: int = 200) -> str:
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
pad_token_id=tokenizer.eos_token_id,
)
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated[len(prompt):] # Return only new text
print(generate_text("The future of artificial intelligence is"))
Sentence Embeddings for Semantic Search
from sentence_transformers import SentenceTransformer
import numpy as np
# sentence-transformers is built on HuggingFace
model = SentenceTransformer("all-MiniLM-L6-v2") # fast + accurate
sentences = [
"Machine learning algorithms improve with more data",
"Deep learning uses neural networks with many layers",
"Python is the most popular language for data science",
"The stock market closed higher today",
]
# Encode all sentences
embeddings = model.encode(sentences, normalize_embeddings=True)
def semantic_search(query: str, corpus_embeddings: np.ndarray, sentences: list, top_k: int = 3):
query_embedding = model.encode([query], normalize_embeddings=True)
scores = np.dot(corpus_embeddings, query_embedding.T).flatten()
top_indices = np.argsort(scores)[::-1][:top_k]
return [(sentences[i], float(scores[i])) for i in top_indices]
results = semantic_search("neural network training", embeddings, sentences)
for sentence, score in results:
print(f"{score:.3f}: {sentence}")
Image Classification with Vision Transformers
from transformers import ViTForImageClassification, ViTImageProcessor
from PIL import Image
import torch
model_name = "google/vit-base-patch16-224"
processor = ViTImageProcessor.from_pretrained(model_name)
model = ViTForImageClassification.from_pretrained(model_name)
def classify_image(image_path: str) -> list[dict]:
image = Image.open(image_path).convert("RGB")
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = torch.softmax(logits, dim=-1)[0]
top5 = torch.topk(probs, 5)
return [
{
"label": model.config.id2label[idx.item()],
"confidence": round(prob.item(), 4)
}
for prob, idx in zip(top5.values, top5.indices)
]
results = classify_image("dog.jpg")
# [{'label': 'golden retriever', 'confidence': 0.9834}, ...]
Audio: Speech Recognition with Whisper
from transformers import pipeline
import torch
# Whisper via HuggingFace (offline, no API key needed)
pipe = pipeline(
"automatic-speech-recognition",
model="openai/whisper-base",
device="cuda" if torch.cuda.is_available() else "cpu",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
)
# Transcribe audio file
result = pipe("audio.mp3", return_timestamps=True)
print(result["text"])
print(result["chunks"]) # Word-level timestamps
# For long audio files (>30s), use chunking
result = pipe(
"long_lecture.mp3",
chunk_length_s=30,
batch_size=8,
return_timestamps=True,
)
Deploy to Production with FastAPI
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
import torch
app = FastAPI(title="NLP API")
# Load models once at startup
models = {}
@app.on_event("startup")
async def load_models():
device = 0 if torch.cuda.is_available() else -1
models["sentiment"] = pipeline("sentiment-analysis", device=device)
models["ner"] = pipeline("ner", grouped_entities=True, device=device)
models["summarize"] = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
class TextRequest(BaseModel):
text: str
@app.post("/sentiment")
async def sentiment(req: TextRequest):
result = models["sentiment"](req.text[:512])
return result[0]
@app.post("/ner")
async def ner(req: TextRequest):
return models["ner"](req.text[:512])
@app.post("/summarize")
async def summarize(req: TextRequest):
if len(req.text) < 50:
raise HTTPException(400, "Text too short to summarize")
result = models["summarize"](req.text[:1024], max_length=130, min_length=30)
return {"summary": result[0]["summary_text"]}
Push Your Model to HuggingFace Hub
from huggingface_hub import HfApi
# Login
# huggingface-cli login
# Push trained model
trainer.push_to_hub("your-username/my-sentiment-model")
# Or manually
model.push_to_hub("your-username/my-model")
tokenizer.push_to_hub("your-username/my-model")
# Load your model anywhere
from transformers import pipeline
classifier = pipeline("sentiment-analysis", model="your-username/my-sentiment-model")
Model Comparison: Which to Use
| Task | Best Model 2026 | Size | Speed |
|---|---|---|---|
| Sentiment | distilbert-base-uncased-finetuned-sst-2 | 67M | Fast |
| NER | dslim/bert-base-NER | 110M | Fast |
| Summarization | facebook/bart-large-cnn | 406M | Medium |
| Translation | Helsinki-NLP/opus-mt-* | 74M | Fast |
| Embeddings | all-MiniLM-L6-v2 | 22M | Very Fast |
| Image Class. | google/vit-base-patch16-224 | 86M | Fast |
| Speech-to-Text | openai/whisper-base | 74M | Medium |
HuggingFace gives you enterprise-grade NLP, vision, and audio capabilities with zero API costs — just your compute.
Advertisement