Skip to main content

JSON, YAML, ENV, and Config

What You'll Learn

How to work with the most common configuration and data formats: JSON, YAML, .env files, and environment variables.

JSON — Python's Most Common Data Format

JSON is built into Python — no install needed.

Reading JSON

import json
from pathlib import Path

# From a file
data = json.loads(Path("config.json").read_text(encoding="utf-8"))

# From a string
text = '{"name": "Alice", "age": 30}'
data = json.loads(text)

# Using open()
with open("config.json", encoding="utf-8") as f:
data = json.load(f)

Writing JSON

import json
from pathlib import Path

data = {"name": "Alice", "age": 30, "active": True}

# To a file
Path("output.json").write_text(
json.dumps(data, indent=2, ensure_ascii=False),
encoding="utf-8"
)

# Using open()
with open("output.json", "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)

# To a string
text = json.dumps(data, indent=2)

JSON Type Mapping

PythonJSON
dictobject {}
list, tuplearray []
strstring
int, floatnumber
True / Falsetrue / false
Nonenull

Handling JSON Errors

import json
import sys

def load_json_safe(path: str) -> dict | None:
try:
with open(path, encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
print(f"File not found: {path}", file=sys.stderr)
except json.JSONDecodeError as e:
print(f"Invalid JSON in {path}: {e}", file=sys.stderr)
return None

Custom JSON Serialization

import json
from datetime import datetime
from pathlib import Path

class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Path):
return str(obj)
return super().default(obj)

data = {"created": datetime.now(), "path": Path("/tmp/file.txt")}
print(json.dumps(data, cls=CustomEncoder, indent=2))

YAML — Human-Friendly Config

YAML is more readable than JSON for configuration files. Install first:

pip install pyyaml

config.yaml:

database:
host: localhost
port: 5432
name: myapp

server:
host: 0.0.0.0
port: 8080
debug: false

allowed_origins:
- https://myapp.com
- https://api.myapp.com

Reading YAML

import yaml
from pathlib import Path

with open("config.yaml", encoding="utf-8") as f:
config = yaml.safe_load(f) # always use safe_load, not load()

print(config["database"]["host"]) # localhost
print(config["allowed_origins"]) # ['https://myapp.com', ...]

Writing YAML

import yaml
from pathlib import Path

data = {"name": "myapp", "version": "1.0", "features": ["auth", "api"]}

with open("config.yaml", "w", encoding="utf-8") as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True)

Environment Variables

Environment variables are the standard way to pass secrets and config to programs (especially in Docker/servers):

import os

# Get an env var (returns None if not set)
db_url = os.environ.get("DATABASE_URL")

# Get with a default
port = int(os.environ.get("PORT", "8080"))
debug = os.environ.get("DEBUG", "false").lower() == "true"

# Require a variable (fail fast if missing)
api_key = os.environ.get("API_KEY")
if not api_key:
raise RuntimeError("API_KEY environment variable is required")

Setting env vars in the shell:

export DATABASE_URL="postgresql://localhost/myapp"
export API_KEY="secret123"
python3 app.py

.env Files with python-dotenv

.env files store environment variables for local development:

pip install python-dotenv

.env file (never commit to git!):

DATABASE_URL=postgresql://localhost/myapp
API_KEY=secret123
DEBUG=true
PORT=8080

Loading in Python:

from dotenv import load_dotenv
import os

load_dotenv() # loads .env into os.environ automatically

db_url = os.environ.get("DATABASE_URL")
debug = os.environ.get("DEBUG", "false").lower() == "true"

Add .env to .gitignore:

.env
.env.local
.env.*.local

Building a Config Class

Centralize all config in one place:

import os
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class Config:
# Database
db_url: str = os.environ.get("DATABASE_URL", "sqlite:///local.db")

# Server
host: str = os.environ.get("HOST", "127.0.0.1")
port: int = int(os.environ.get("PORT", "8080"))

# App
debug: bool = os.environ.get("DEBUG", "false").lower() == "true"
log_level: str = os.environ.get("LOG_LEVEL", "INFO").upper()

# Paths
data_dir: Path = Path(os.environ.get("DATA_DIR", "data"))
output_dir: Path = Path(os.environ.get("OUTPUT_DIR", "output"))

def validate(self) -> None:
"""Fail fast on invalid config at startup."""
if not self.db_url:
raise ValueError("DATABASE_URL must be set")
if not 1 <= self.port <= 65535:
raise ValueError(f"PORT must be 1–65535, got {self.port}")
if self.log_level not in ("DEBUG", "INFO", "WARNING", "ERROR"):
raise ValueError(f"Invalid LOG_LEVEL: {self.log_level}")


def get_config() -> Config:
from dotenv import load_dotenv
load_dotenv()
cfg = Config()
cfg.validate()
return cfg

Layered Config (Defaults → File → Env Vars)

A robust pattern for production systems:

import os
import json
from pathlib import Path
from dataclasses import dataclass

@dataclass
class Config:
host: str = "localhost"
port: int = 8080
debug: bool = False

def load_config(config_file: str = "config.json") -> Config:
"""Load config with priority: defaults < file < environment."""
cfg = Config() # start with defaults

# Override with file if it exists
path = Path(config_file)
if path.exists():
data = json.loads(path.read_text(encoding="utf-8"))
cfg.host = data.get("host", cfg.host)
cfg.port = data.get("port", cfg.port)
cfg.debug = data.get("debug", cfg.debug)

# Override with environment variables (highest priority)
if "HOST" in os.environ:
cfg.host = os.environ["HOST"]
if "PORT" in os.environ:
cfg.port = int(os.environ["PORT"])
if "DEBUG" in os.environ:
cfg.debug = os.environ["DEBUG"].lower() == "true"

return cfg

Common Mistakes

MistakeFix
json.load() with invalid JSONWrap in try/except json.JSONDecodeError
yaml.load() instead of yaml.safe_load()Always use safe_load
Hardcoding secrets in codeUse env vars + .env
Committing .env to gitAdd .env to .gitignore
os.environ["KEY"] when key may not existUse .get("KEY", default)

Quick Reference

# JSON read
import json
data = json.loads(path.read_text(encoding="utf-8"))

# JSON write
path.write_text(json.dumps(data, indent=2), encoding="utf-8")

# YAML read (pip install pyyaml)
import yaml
with open("config.yaml") as f:
config = yaml.safe_load(f)

# Env vars
import os
value = os.environ.get("KEY", "default")

# .env file (pip install python-dotenv)
from dotenv import load_dotenv
load_dotenv()

# Config class
@dataclass(frozen=True)
class Config:
port: int = int(os.environ.get("PORT", "8080"))

What's Next

Lesson 3: argparse and CLI Design