Scheduled and Event-Driven Jobs
What You'll Learn
How to schedule Python scripts to run automatically — using cron for simple scheduling, systemd for reliable service management, and how to react to events.
cron — The Classic Scheduler
cron runs commands on a schedule. Edit your cron schedule:
crontab -e # opens your personal crontab
Cron Syntax
MIN HOUR DAY MONTH WEEKDAY command
* * * * * /path/to/command
# Examples:
# Every minute
* * * * * /usr/bin/python3 /home/alice/scripts/check.py
# Every day at 2am
0 2 * * * /usr/bin/python3 /home/alice/scripts/backup.py
# Every Monday at 9am
0 9 * * 1 /usr/bin/python3 /home/alice/scripts/report.py
# Every 15 minutes
*/15 * * * * /usr/bin/python3 /home/alice/scripts/sync.py
# First day of each month at midnight
0 0 1 * * /usr/bin/python3 /home/alice/scripts/monthly.py
Use crontab.guru to verify your expression.
Cron Best Practices
# Always use absolute paths — cron has minimal PATH
* * * * * /usr/bin/python3 /home/alice/scripts/task.py >> /var/log/task.log 2>&1
# Use the venv's Python
0 2 * * * /home/alice/scripts/venv/bin/python3 /home/alice/scripts/backup.py
# Set env vars cron doesn't have
0 2 * * * export DATABASE_URL="..." && /usr/bin/python3 /home/alice/scripts/sync.py
# Redirect output to a log file
0 2 * * * /usr/bin/python3 /home/alice/scripts/sync.py >> /var/log/sync.log 2>&1
Locking: Prevent Overlapping Runs
import fcntl
import sys
from pathlib import Path
LOCK_FILE = Path("/tmp/my-script.lock")
def acquire_lock():
"""Prevent multiple instances from running simultaneously."""
lock = open(LOCK_FILE, "w")
try:
fcntl.flock(lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
return lock
except OSError:
print("Another instance is already running", file=sys.stderr)
sys.exit(0)
def main() -> int:
lock = acquire_lock()
try:
# ... do work ...
return 0
finally:
import os
fcntl.flock(lock, fcntl.LOCK_UN)
lock.close()
LOCK_FILE.unlink(missing_ok=True)
systemd Timers — Modern Scheduling
For production Linux servers, systemd timers are better than cron:
- Automatic logging via journalctl
- Dependency management
- Missed run handling
- Better error reporting
Create a systemd Service
/etc/systemd/system/data-sync.service:
[Unit]
Description=Sync data from source API
After=network.target
[Service]
Type=oneshot
User=deploy
WorkingDirectory=/opt/myapp
ExecStart=/opt/myapp/venv/bin/python3 -m myapp.sync
EnvironmentFile=/opt/myapp/.env
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
Create a systemd Timer
/etc/systemd/system/data-sync.timer:
[Unit]
Description=Run data-sync every 15 minutes
Requires=data-sync.service
[Timer]
OnBootSec=5min
OnUnitActiveSec=15min
AccuracySec=1s
[Install]
WantedBy=timers.target
Enable and Start
sudo systemctl daemon-reload
sudo systemctl enable data-sync.timer
sudo systemctl start data-sync.timer
# Check status
sudo systemctl status data-sync.timer
sudo systemctl list-timers
# View logs
sudo journalctl -u data-sync.service -n 50
sudo journalctl -u data-sync.service --since "1 hour ago"
APScheduler — Python-Native Scheduling
For jobs within a running Python application:
pip install apscheduler
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.cron import CronTrigger
import logging
log = logging.getLogger(__name__)
def sync_users():
log.info("Starting user sync")
# ... do work ...
log.info("User sync complete")
def generate_report():
log.info("Generating daily report")
# ... do work ...
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
scheduler = BlockingScheduler()
# Run every 15 minutes
scheduler.add_job(sync_users, "interval", minutes=15)
# Run at 6am every day
scheduler.add_job(generate_report, CronTrigger(hour=6, minute=0))
log.info("Scheduler started")
try:
scheduler.start()
except KeyboardInterrupt:
scheduler.shutdown()
Watchdog — React to File Changes
pip install watchdog
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import time
import logging
log = logging.getLogger(__name__)
class UploadHandler(FileSystemEventHandler):
def on_created(self, event):
if event.is_directory:
return
if event.src_path.endswith(".csv"):
log.info("New file detected: %s", event.src_path)
process_csv(event.src_path)
def on_modified(self, event):
if not event.is_directory and event.src_path.endswith(".json"):
log.info("Config modified: %s", event.src_path)
reload_config(event.src_path)
def watch_directory(path: str) -> None:
handler = UploadHandler()
observer = Observer()
observer.schedule(handler, path, recursive=False)
observer.start()
log.info("Watching: %s", path)
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
Script Health Check
Make scheduled scripts report their health:
import time
import logging
from pathlib import Path
from datetime import datetime
log = logging.getLogger(__name__)
HEALTH_FILE = Path("/var/run/myapp/last_run.json")
def record_health(status: str, details: dict) -> None:
import json
HEALTH_FILE.parent.mkdir(parents=True, exist_ok=True)
HEALTH_FILE.write_text(json.dumps({
"timestamp": datetime.utcnow().isoformat() + "Z",
"status": status,
**details,
}), encoding="utf-8")
Quick Reference
# cron
crontab -e
# MIN HOUR DAY MON DOW command
# 0 2 * * * /path/to/venv/bin/python3 /path/to/script.py >> /var/log/script.log 2>&1
# systemd timer
sudo systemctl enable my-timer.timer
sudo systemctl start my-timer.timer
sudo systemctl list-timers
sudo journalctl -u my-service.service -n 50
# APScheduler
from apscheduler.schedulers.blocking import BlockingScheduler
s = BlockingScheduler()
s.add_job(fn, "interval", minutes=15)
s.add_job(fn, "cron", hour=6)
s.start()
# Watchdog
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler