Python Data Classes & Type Hints — Modern Python Guide
Python's type system has evolved dramatically. Dataclasses, type hints, and runtime validation with Pydantic have transformed how we model data in Python. This guide covers everything from basic @dataclass to advanced generics and Protocol-based interfaces — with real code you'll use in production.
Dataclasses — The Foundation
Basic dataclass
from dataclasses import dataclass, field
from datetime import datetime, timezone
@dataclass
class User:
name: str
email: str
age: int
is_active: bool = True
created_at: datetime = field(
default_factory=lambda: datetime.now(timezone.utc)
)
# Auto-generated: __init__, __repr__, __eq__
user = User(name="Alice", email="alice@example.com", age=30)
print(user)
# User(name='Alice', email='alice@example.com', age=30, is_active=True, created_at=...)
# Equality based on field values
user2 = User(name="Alice", email="alice@example.com", age=30)
print(user == user2) # True (same field values)
Frozen (immutable) dataclasses
@dataclass(frozen=True)
class Point:
x: float
y: float
@property
def magnitude(self) -> float:
return (self.x ** 2 + self.y ** 2) ** 0.5
p = Point(3.0, 4.0)
print(p.magnitude) # 5.0
# p.x = 10 # ❌ FrozenInstanceError — can't mutate!
# Frozen dataclasses are hashable — can use as dict keys / set members
points = {Point(0, 0): "origin", Point(1, 1): "diagonal"}
unique = {Point(1, 2), Point(1, 2), Point(3, 4)}
print(len(unique)) # 2
__post_init__ — computed fields
from dataclasses import dataclass, field
@dataclass
class Order:
items: list[dict]
tax_rate: float = 0.21
# Computed fields: set in __post_init__
subtotal: float = field(init=False)
tax: float = field(init=False)
total: float = field(init=False)
def __post_init__(self):
self.subtotal = sum(
item["price"] * item["quantity"] for item in self.items
)
self.tax = self.subtotal * self.tax_rate
self.total = self.subtotal + self.tax
order = Order(
items=[
{"name": "Widget", "price": 10.0, "quantity": 3},
{"name": "Gadget", "price": 25.0, "quantity": 1},
]
)
print(f"Subtotal: ${order.subtotal:.2f}") # $55.00
print(f"Tax: ${order.tax:.2f}") # $11.55
print(f"Total: ${order.total:.2f}") # $66.55
Slots for performance
# Python 3.10+
@dataclass(slots=True)
class SensorReading:
sensor_id: str
value: float
timestamp: float
# 20-30% faster attribute access, less memory
# No __dict__ — can't add arbitrary attributes
reading = SensorReading("temp-1", 23.5, 1711454700.0)
# reading.extra = "nope" # ❌ AttributeError
Type Hints — The Full Toolkit
Modern syntax (Python 3.10+)
# Old style (pre-3.10)
from typing import Optional, Union, List, Dict, Tuple
def old_style(
name: Optional[str],
items: List[int],
mapping: Dict[str, Union[int, str]],
) -> Tuple[bool, str]:
...
# Modern syntax — cleaner, no imports needed
def modern_style(
name: str | None,
items: list[int],
mapping: dict[str, int | str],
) -> tuple[bool, str]:
...
# Type aliases (Python 3.12+)
type JSON = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None
type UserID = int
type Headers = dict[str, str]
TypedDict — typed dictionaries
from typing import TypedDict, NotRequired
class APIResponse(TypedDict):
status: int
data: dict
message: str
errors: NotRequired[list[str]] # Optional key
def handle_response(resp: APIResponse) -> None:
print(f"Status: {resp['status']}")
if "errors" in resp:
for err in resp["errors"]:
print(f"Error: {err}")
# Type checker validates the dict structure
response: APIResponse = {
"status": 200,
"data": {"user_id": 1},
"message": "OK",
}
handle_response(response) # ✅
Generics — type-safe containers
from typing import Generic, TypeVar
T = TypeVar("T")
class Stack(Generic[T]):
"""Type-safe stack."""
def __init__(self) -> None:
self._items: list[T] = []
def push(self, item: T) -> None:
self._items.append(item)
def pop(self) -> T:
if not self._items:
raise IndexError("Stack is empty")
return self._items.pop()
def peek(self) -> T:
if not self._items:
raise IndexError("Stack is empty")
return self._items[-1]
def __len__(self) -> int:
return len(self._items)
# Type checker knows the element types
int_stack: Stack[int] = Stack()
int_stack.push(42)
int_stack.push(99)
value: int = int_stack.pop() # ✅ int
str_stack: Stack[str] = Stack()
str_stack.push("hello")
# str_stack.push(42) # ❌ mypy error: expected str, got int
Python 3.12+ generics (new syntax)
# Python 3.12+ — cleaner generic syntax
class Result[T, E]:
"""Rust-inspired Result type."""
def __init__(self, value: T | None = None, error: E | None = None):
self._value = value
self._error = error
@classmethod
def ok(cls, value: T) -> "Result[T, E]":
return cls(value=value)
@classmethod
def err(cls, error: E) -> "Result[T, E]":
return cls(error=error)
def is_ok(self) -> bool:
return self._error is None
def unwrap(self) -> T:
if self._error is not None:
raise ValueError(f"Called unwrap on Err: {self._error}")
return self._value
# Usage
def divide(a: float, b: float) -> Result[float, str]:
if b == 0:
return Result.err("Division by zero")
return Result.ok(a / b)
result = divide(10, 3)
if result.is_ok():
print(f"Result: {result.unwrap():.2f}") # 3.33
Protocol — Structural Typing (Duck Typing Done Right)
Protocol lets you define interfaces based on what an object can do, not what it inherits from. This is the Pythonic alternative to abstract base classes. See also design patterns for more patterns that use Protocol.
from typing import Protocol, runtime_checkable
@runtime_checkable
class Drawable(Protocol):
"""Anything with a draw() method."""
def draw(self, x: int, y: int) -> None: ...
@runtime_checkable
class Serializable(Protocol):
"""Anything that can serialize to dict."""
def to_dict(self) -> dict: ...
# These classes DON'T inherit from Drawable — they just implement draw()
class Circle:
def __init__(self, radius: float):
self.radius = radius
def draw(self, x: int, y: int) -> None:
print(f"Drawing circle (r={self.radius}) at ({x}, {y})")
def to_dict(self) -> dict:
return {"type": "circle", "radius": self.radius}
class Square:
def __init__(self, side: float):
self.side = side
def draw(self, x: int, y: int) -> None:
print(f"Drawing square (s={self.side}) at ({x}, {y})")
def to_dict(self) -> dict:
return {"type": "square", "side": self.side}
# Type checker accepts any Drawable — no inheritance needed!
def render_all(shapes: list[Drawable], offset_x: int = 0) -> None:
for i, shape in enumerate(shapes):
shape.draw(offset_x + i * 100, 50)
def save_all(items: list[Serializable]) -> list[dict]:
return [item.to_dict() for item in items]
# Works with any object that has the right methods
shapes = [Circle(25), Square(40), Circle(10)]
render_all(shapes)
data = save_all(shapes)
# Runtime check works too
print(isinstance(Circle(5), Drawable)) # True
print(isinstance("not a shape", Drawable)) # False
Pydantic — Runtime Validation
Dataclasses don't validate data. Pydantic does — and it's the backbone of FastAPI.
from pydantic import BaseModel, Field, field_validator, model_validator
from pydantic import EmailStr
from datetime import datetime
from enum import Enum
class Role(str, Enum):
admin = "admin"
user = "user"
viewer = "viewer"
class CreateUser(BaseModel):
"""Validated user creation schema."""
name: str = Field(..., min_length=1, max_length=100)
email: EmailStr
age: int = Field(..., ge=13, le=150)
role: Role = Role.user
tags: list[str] = Field(default_factory=list, max_length=10)
password: str = Field(..., min_length=8)
@field_validator("name")
@classmethod
def name_must_be_titlecase(cls, v: str) -> str:
return v.strip().title()
@field_validator("tags")
@classmethod
def tags_lowercase(cls, v: list[str]) -> list[str]:
return [tag.lower().strip() for tag in v]
@model_validator(mode="after")
def validate_admin_age(self):
if self.role == Role.admin and self.age < 18:
raise ValueError("Admins must be 18+")
return self
# ✅ Valid
user = CreateUser(
name="alice smith",
email="alice@example.com",
age=25,
password="securepass123",
tags=["Python", "FastAPI"],
)
print(user.name) # "Alice Smith" (auto-titlecased)
print(user.tags) # ["python", "fastapi"] (auto-lowercased)
# ❌ Validation errors
try:
bad = CreateUser(
name="", # too short
email="not-email", # invalid email
age=5, # under 13
password="short", # under 8 chars
)
except Exception as e:
print(e)
# 4 validation errors:
# name: String should have at least 1 character
# email: value is not a valid email address
# age: Input should be >= 13
# password: String should have at least 8 characters
Pydantic + JSON serialization
import json
# To dict (with aliases, excludes)
data = user.model_dump()
print(data)
# To JSON string
json_str = user.model_json_schema() # JSON Schema
json_data = user.model_dump_json() # Serialized JSON
# From JSON / dict
user_from_dict = CreateUser.model_validate({"name": "bob", "email": "bob@b.com", "age": 20, "password": "12345678"})
user_from_json = CreateUser.model_validate_json('{"name":"carol","email":"carol@c.com","age":30,"password":"pass12345"}')
Pydantic Settings — config from environment
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""App config from environment variables."""
app_name: str = "My App"
debug: bool = False
database_url: str
redis_url: str = "redis://localhost:6379"
secret_key: str
allowed_origins: list[str] = ["http://localhost:3000"]
model_config = {
"env_file": ".env",
"env_file_encoding": "utf-8",
}
# Reads from environment / .env file
# DATABASE_URL=postgresql://... SECRET_KEY=abc123
settings = Settings()
print(settings.database_url)
Dataclass vs Pydantic vs NamedTuple vs attrs
| Feature | dataclass | Pydantic | NamedTuple | attrs |
|---|---|---|---|---|
| Runtime validation | ❌ | ✅ | ❌ | ✅ (validators) |
| JSON serialization | Manual | ✅ Built-in | ❌ | Via cattrs |
| Immutability | frozen=True | frozen=True | Always | frozen=True |
| Performance | ⚡ Fast | ⚡ Fast (v2) | ⚡⚡ Fastest | ⚡ Fast |
| Slots | slots=True | ❌ | Always | slots=True |
| Stdlib | ✅ | ❌ (pip) | ✅ | ❌ (pip) |
| Best for | Internal models | API / config | Simple tuples | Advanced models |
- API request/response → Pydantic (validation + serialization)
- Internal data structures → dataclass (simple, stdlib)
- Return multiple values → NamedTuple (lightweight)
- Complex validation + ORM-like → attrs or Pydantic
Advanced Patterns
Dataclass inheritance
from dataclasses import dataclass, field
from datetime import datetime, timezone
@dataclass
class BaseEntity:
"""Common fields for all database entities."""
id: int | None = None
created_at: datetime = field(
default_factory=lambda: datetime.now(timezone.utc)
)
updated_at: datetime = field(
default_factory=lambda: datetime.now(timezone.utc)
)
@dataclass
class Product(BaseEntity):
name: str = ""
price: float = 0.0
stock: int = 0
@dataclass
class Customer(BaseEntity):
name: str = ""
email: str = ""
tier: str = "free"
product = Product(name="Widget", price=29.99, stock=100)
print(product.created_at) # Inherited from BaseEntity
Dataclass as a lightweight ORM
from dataclasses import dataclass, fields, asdict
import sqlite3
@dataclass
class Model:
"""Base model with save/load capabilities."""
@classmethod
def table_name(cls) -> str:
return cls.__name__.lower() + "s"
@classmethod
def from_row(cls, row: dict) -> "Model":
field_names = {f.name for f in fields(cls)}
filtered = {k: v for k, v in row.items() if k in field_names}
return cls(**filtered)
def to_dict(self) -> dict:
return asdict(self)
def save(self, conn: sqlite3.Connection) -> None:
data = self.to_dict()
cols = ", ".join(data.keys())
placeholders = ", ".join("?" * len(data))
conn.execute(
f"INSERT OR REPLACE INTO {self.table_name()} ({cols}) VALUES ({placeholders})",
list(data.values()),
)
conn.commit()
@dataclass
class Task(Model):
id: int | None = None
title: str = ""
completed: bool = False
# Usage
conn = sqlite3.connect(":memory:")
conn.execute("CREATE TABLE tasks (id INTEGER PRIMARY KEY, title TEXT, completed BOOLEAN)")
task = Task(title="Write docs", completed=False)
task.save(conn) # Saved to SQLite
Converters with cattrs
# pip install cattrs
import cattrs
from dataclasses import dataclass
@dataclass
class Address:
street: str
city: str
zip_code: str
@dataclass
class Person:
name: str
age: int
address: Address
# Automatic structuring from nested dicts
converter = cattrs.Converter()
raw = {
"name": "Alice",
"age": 30,
"address": {
"street": "123 Main St",
"city": "Springfield",
"zip_code": "62701",
},
}
person = converter.structure(raw, Person)
print(person.address.city) # Springfield
# Back to dict
data = converter.unstructure(person)
print(data) # Nested dict
Type Checking in Practice
# pyproject.toml
[tool.mypy]
python_version = "3.12"
strict = true
warn_return_any = true
warn_unused_ignores = true
# Per-module overrides
[[tool.mypy.overrides]]
module = "tests.*"
disallow_untyped_defs = false
# Run mypy
mypy app/ --strict
# Or use pyright (faster)
pyright app/
# In CI (GitHub Actions)
# - name: Type check
# run: mypy app/ --strict
🚀 Want production-ready Python templates with proper typing, dataclasses, and automation scripts?
Related Articles
- Build a REST API with FastAPI — Pydantic schemas for API validation
- Python Design Patterns — patterns using Protocol and dataclasses
- Python Testing Guide — test typed code with mypy and pytest
- Python Packaging Guide — distribute typed packages with py.typed
- Python Database Operations — SQLAlchemy models vs dataclasses
Need help structuring a Python project with proper typing? I build well-typed APIs, automation tools, and data pipelines. Reach out on Telegram →