Untested code is broken code you haven't noticed yet. This guide covers everything from writing your first test to building a full CI pipeline — with patterns you'll actually use in production.
We'll use pytest throughout (the de facto standard), but all concepts apply to unittest as well.
Install and run your first test in under a minute.
# Install
pip install pytest pytest-cov
# Project structure
my_project/
├── src/
│ └── calculator.py
├── tests/
│ ├── conftest.py # Shared fixtures
│ ├── test_calculator.py
│ └── test_integration.py
├── pyproject.toml
└── pytest.ini
# src/calculator.py
class Calculator:
def __init__(self):
self.history = []
def add(self, a: float, b: float) -> float:
result = a + b
self.history.append(f"{a} + {b} = {result}")
return result
def divide(self, a: float, b: float) -> float:
if b == 0:
raise ValueError("Cannot divide by zero")
result = a / b
self.history.append(f"{a} / {b} = {result}")
return result
def average(self, numbers: list) -> float:
if not numbers:
raise ValueError("Cannot average empty list")
return sum(numbers) / len(numbers)
# tests/test_calculator.py
import pytest
from src.calculator import Calculator
class TestCalculator:
def setup_method(self):
"""Fresh calculator for each test."""
self.calc = Calculator()
def test_add_positive(self):
assert self.calc.add(2, 3) == 5
def test_add_negative(self):
assert self.calc.add(-1, -1) == -2
def test_add_float(self):
assert self.calc.add(0.1, 0.2) == pytest.approx(0.3)
def test_divide_normal(self):
assert self.calc.divide(10, 3) == pytest.approx(3.333, rel=1e-3)
def test_divide_by_zero(self):
with pytest.raises(ValueError, match="Cannot divide by zero"):
self.calc.divide(10, 0)
def test_history_tracking(self):
self.calc.add(1, 2)
self.calc.divide(10, 5)
assert len(self.calc.history) == 2
assert "1 + 2 = 3" in self.calc.history[0]
Run with pytest -v for verbose output, pytest -x to stop on first failure, or pytest -k "divide" to filter by name.
Fixtures replace boilerplate setup/teardown code with composable, reusable components.
# tests/conftest.py — shared across all test files
import pytest
import tempfile
import json
from pathlib import Path
@pytest.fixture
def tmp_dir():
"""Temporary directory, auto-cleaned after test."""
with tempfile.TemporaryDirectory() as d:
yield Path(d)
@pytest.fixture
def sample_config(tmp_dir):
"""Write a sample config file and return its path."""
config = {
"database": {"host": "localhost", "port": 5432, "name": "testdb"},
"api": {"key": "test-key-123", "timeout": 30},
"debug": True,
}
path = tmp_dir / "config.json"
path.write_text(json.dumps(config))
return path
@pytest.fixture
def db_connection():
"""Database connection with automatic rollback."""
import sqlite3
conn = sqlite3.connect(":memory:")
conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT, email TEXT)")
conn.execute("INSERT INTO users VALUES (1, 'Alice', 'alice@test.com')")
conn.execute("INSERT INTO users VALUES (2, 'Bob', 'bob@test.com')")
conn.commit()
yield conn
conn.close()
@pytest.fixture(scope="session")
def expensive_resource():
"""Created once per test session (not per test)."""
print("Setting up expensive resource...")
resource = {"initialized": True, "data": list(range(10000))}
yield resource
print("Tearing down expensive resource...")
# tests/test_config.py
def test_load_config(sample_config):
"""Fixtures inject automatically by name."""
config = json.loads(sample_config.read_text())
assert config["database"]["host"] == "localhost"
assert config["debug"] is True
def test_db_has_users(db_connection):
cursor = db_connection.execute("SELECT COUNT(*) FROM users")
assert cursor.fetchone()[0] == 2
def test_temp_file_operations(tmp_dir):
file = tmp_dir / "output.txt"
file.write_text("hello")
assert file.read_text() == "hello"
# tmp_dir is auto-cleaned after test — no manual cleanup needed
Fixture scopes control lifetime: function (default, per-test), class, module, or session (once per entire run). Use narrower scopes for isolation, wider for expensive resources.
Instead of writing separate tests for each input, generate them dynamically.
import pytest
@pytest.mark.parametrize("input_val, expected", [
("hello", "HELLO"),
("", ""),
("Hello World", "HELLO WORLD"),
("123abc", "123ABC"),
("já está", "JÁ ESTÁ"), # Unicode
])
def test_uppercase(input_val, expected):
assert input_val.upper() == expected
@pytest.mark.parametrize("a, b, expected", [
(2, 3, 5),
(-1, 1, 0),
(0, 0, 0),
(1.5, 2.5, 4.0),
(1_000_000, 1, 1_000_001),
])
def test_add(a, b, expected):
assert a + b == expected
# Combine multiple parametrize decorators (cartesian product)
@pytest.mark.parametrize("x", [1, 2, 3])
@pytest.mark.parametrize("y", [10, 20])
def test_multiply_combinations(x, y):
"""Runs 6 tests: (1,10), (1,20), (2,10), (2,20), (3,10), (3,20)"""
result = x * y
assert result == x * y
# Parametrize with IDs for readable output
@pytest.mark.parametrize("url, expected_status", [
pytest.param("https://httpbin.org/get", 200, id="success"),
pytest.param("https://httpbin.org/status/404", 404, id="not-found"),
pytest.param("https://httpbin.org/status/500", 500, id="server-error"),
], ids=str)
def test_http_status(url, expected_status):
import httpx
response = httpx.get(url)
assert response.status_code == expected_status
Mock external dependencies (APIs, databases, file systems) so tests are fast, reliable, and don't hit real services.
# src/weather.py
import httpx
class WeatherService:
def __init__(self, api_key: str):
self.api_key = api_key
self.base_url = "https://api.weather.com/v1"
def get_temperature(self, city: str) -> float:
response = httpx.get(
f"{self.base_url}/current",
params={"city": city, "key": self.api_key}
)
response.raise_for_status()
return response.json()["temperature"]
def is_freezing(self, city: str) -> bool:
return self.get_temperature(city) <= 0
# tests/test_weather.py
from unittest.mock import patch, MagicMock
from src.weather import WeatherService
def test_get_temperature():
"""Mock the HTTP call, test the logic."""
service = WeatherService("fake-key")
mock_response = MagicMock()
mock_response.json.return_value = {"temperature": 22.5}
mock_response.raise_for_status = MagicMock()
with patch("src.weather.httpx.get", return_value=mock_response) as mock_get:
temp = service.get_temperature("Buenos Aires")
assert temp == 22.5
mock_get.assert_called_once()
# Verify the URL and params
call_kwargs = mock_get.call_args
assert "Buenos Aires" in str(call_kwargs)
def test_is_freezing_true():
service = WeatherService("fake-key")
with patch.object(service, "get_temperature", return_value=-5.0):
assert service.is_freezing("Moscow") is True
def test_is_freezing_false():
service = WeatherService("fake-key")
with patch.object(service, "get_temperature", return_value=25.0):
assert service.is_freezing("Miami") is False
def test_api_error_handling():
"""Verify error handling when API fails."""
service = WeatherService("fake-key")
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Server Error", request=MagicMock(), response=MagicMock(status_code=500)
)
with patch("src.weather.httpx.get", return_value=mock_response):
import pytest
with pytest.raises(httpx.HTTPStatusError):
service.get_temperature("Anywhere")
from unittest.mock import patch, MagicMock, PropertyMock, AsyncMock
# 1. Patch a function
with patch("module.function", return_value=42):
assert module.function() == 42
# 2. Patch an attribute
with patch.object(obj, "attribute", new="mocked_value"):
assert obj.attribute == "mocked_value"
# 3. Patch a property
with patch.object(MyClass, "prop", new_callable=PropertyMock, return_value=99):
assert instance.prop == 99
# 4. Side effects (different return per call)
mock = MagicMock(side_effect=[1, 2, 3])
assert mock() == 1
assert mock() == 2
assert mock() == 3
# 5. Side effect as function
mock = MagicMock(side_effect=lambda x: x * 2)
assert mock(5) == 10
# 6. Async mock (for async functions)
mock = AsyncMock(return_value={"data": "test"})
result = await mock()
assert result == {"data": "test"}
# 7. Assert call patterns
mock.assert_called_once()
mock.assert_called_with(expected_arg)
mock.assert_not_called()
assert mock.call_count == 3
Every script in the AI Agent Toolkit follows these testing patterns. Get 50+ scripts covering automation, APIs, data processing, and more — all with proper error handling and documented interfaces.
Get the Toolkit — $19Test code that reads/writes files without touching real directories. Combine tmp_path (pytest built-in) with fixtures.
# src/file_processor.py
import csv
import json
from pathlib import Path
class FileProcessor:
def csv_to_json(self, csv_path: str, json_path: str) -> int:
rows = []
with open(csv_path, newline='') as f:
reader = csv.DictReader(f)
rows = list(reader)
with open(json_path, 'w') as f:
json.dump(rows, f, indent=2)
return len(rows)
def count_lines(self, path: str, skip_empty: bool = True) -> int:
lines = Path(path).read_text().splitlines()
if skip_empty:
return len([l for l in lines if l.strip()])
return len(lines)
# tests/test_file_processor.py
from src.file_processor import FileProcessor
def test_csv_to_json(tmp_path):
# Arrange — create test CSV
csv_file = tmp_path / "data.csv"
csv_file.write_text("name,age,city\nAlice,30,NYC\nBob,25,LA\n")
json_file = tmp_path / "output.json"
# Act
processor = FileProcessor()
count = processor.csv_to_json(str(csv_file), str(json_file))
# Assert
assert count == 2
assert json_file.exists()
import json
data = json.loads(json_file.read_text())
assert len(data) == 2
assert data[0]["name"] == "Alice"
assert data[1]["city"] == "LA"
def test_count_lines(tmp_path):
file = tmp_path / "test.txt"
file.write_text("line 1\n\nline 3\n \nline 5\n")
processor = FileProcessor()
assert processor.count_lines(str(file), skip_empty=True) == 3
assert processor.count_lines(str(file), skip_empty=False) == 5
The tmp_path fixture (built into pytest) gives you a unique temp directory per test. No cleanup needed — pytest handles it. For more file automation patterns, see our file automation guide.
Measure how much of your code is actually tested, and run tests automatically on every push.
# pyproject.toml — pytest configuration
[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "-v --tb=short --strict-markers"
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"integration: marks integration tests",
]
[tool.coverage.run]
source = ["src"]
omit = ["tests/*", "*/__pycache__/*"]
[tool.coverage.report]
fail_under = 80
show_missing = true
exclude_lines = [
"pragma: no cover",
"if __name__",
"raise NotImplementedError",
]
# Run tests with coverage
pytest --cov=src --cov-report=term-missing
# Generate HTML coverage report
pytest --cov=src --cov-report=html
# Open htmlcov/index.html in browser
# Run only fast tests
pytest -m "not slow"
# Run with parallel execution (pip install pytest-xdist)
pytest -n auto
# .github/workflows/test.yml — GitHub Actions CI
name: Tests
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install -e ".[test]"
- name: Run tests
run: |
pytest --cov=src --cov-report=xml -v
- name: Upload coverage
if: matrix.python-version == '3.12'
uses: codecov/codecov-action@v4
with:
file: coverage.xml
test_user_creation should test user creation, not also email sending and database writes.test_divide_by_zero_raises_error beats test_divide_3.@pytest.mark.slow and run them separately.# Example: Testing edge cases systematically
@pytest.mark.parametrize("input_val, expected_error", [
(None, TypeError),
([], ValueError),
("not a number", TypeError),
(float("inf"), ValueError),
(float("nan"), ValueError),
])
def test_average_edge_cases(input_val, expected_error):
calc = Calculator()
with pytest.raises(expected_error):
calc.average(input_val)
The AI Agent Toolkit includes 50+ scripts built with these patterns — proper error handling, clear interfaces, and real-world reliability. Plus 30+ AI prompts for code review, debugging, and architecture.
Get the Toolkit — $19