Skip to main content

Scheduled and Event-Driven Jobs

What You'll Learn

How to schedule Python scripts to run automatically — using cron for simple scheduling, systemd for reliable service management, and how to react to events.

cron — The Classic Scheduler

cron runs commands on a schedule. Edit your cron schedule:

crontab -e # opens your personal crontab

Cron Syntax

MIN HOUR DAY MONTH WEEKDAY command
* * * * * /path/to/command

# Examples:
# Every minute
* * * * * /usr/bin/python3 /home/alice/scripts/check.py

# Every day at 2am
0 2 * * * /usr/bin/python3 /home/alice/scripts/backup.py

# Every Monday at 9am
0 9 * * 1 /usr/bin/python3 /home/alice/scripts/report.py

# Every 15 minutes
*/15 * * * * /usr/bin/python3 /home/alice/scripts/sync.py

# First day of each month at midnight
0 0 1 * * /usr/bin/python3 /home/alice/scripts/monthly.py

Use crontab.guru to verify your expression.

Cron Best Practices

# Always use absolute paths — cron has minimal PATH
* * * * * /usr/bin/python3 /home/alice/scripts/task.py >> /var/log/task.log 2>&1

# Use the venv's Python
0 2 * * * /home/alice/scripts/venv/bin/python3 /home/alice/scripts/backup.py

# Set env vars cron doesn't have
0 2 * * * export DATABASE_URL="..." && /usr/bin/python3 /home/alice/scripts/sync.py

# Redirect output to a log file
0 2 * * * /usr/bin/python3 /home/alice/scripts/sync.py >> /var/log/sync.log 2>&1

Locking: Prevent Overlapping Runs

import fcntl
import sys
from pathlib import Path


LOCK_FILE = Path("/tmp/my-script.lock")


def acquire_lock():
"""Prevent multiple instances from running simultaneously."""
lock = open(LOCK_FILE, "w")
try:
fcntl.flock(lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
return lock
except OSError:
print("Another instance is already running", file=sys.stderr)
sys.exit(0)


def main() -> int:
lock = acquire_lock()
try:
# ... do work ...
return 0
finally:
import os
fcntl.flock(lock, fcntl.LOCK_UN)
lock.close()
LOCK_FILE.unlink(missing_ok=True)

systemd Timers — Modern Scheduling

For production Linux servers, systemd timers are better than cron:

  • Automatic logging via journalctl
  • Dependency management
  • Missed run handling
  • Better error reporting

Create a systemd Service

/etc/systemd/system/data-sync.service:

[Unit]
Description=Sync data from source API
After=network.target

[Service]
Type=oneshot
User=deploy
WorkingDirectory=/opt/myapp
ExecStart=/opt/myapp/venv/bin/python3 -m myapp.sync
EnvironmentFile=/opt/myapp/.env
StandardOutput=journal
StandardError=journal

[Install]
WantedBy=multi-user.target

Create a systemd Timer

/etc/systemd/system/data-sync.timer:

[Unit]
Description=Run data-sync every 15 minutes
Requires=data-sync.service

[Timer]
OnBootSec=5min
OnUnitActiveSec=15min
AccuracySec=1s

[Install]
WantedBy=timers.target

Enable and Start

sudo systemctl daemon-reload
sudo systemctl enable data-sync.timer
sudo systemctl start data-sync.timer

# Check status
sudo systemctl status data-sync.timer
sudo systemctl list-timers

# View logs
sudo journalctl -u data-sync.service -n 50
sudo journalctl -u data-sync.service --since "1 hour ago"

APScheduler — Python-Native Scheduling

For jobs within a running Python application:

pip install apscheduler
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.cron import CronTrigger
import logging

log = logging.getLogger(__name__)


def sync_users():
log.info("Starting user sync")
# ... do work ...
log.info("User sync complete")


def generate_report():
log.info("Generating daily report")
# ... do work ...


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)

scheduler = BlockingScheduler()

# Run every 15 minutes
scheduler.add_job(sync_users, "interval", minutes=15)

# Run at 6am every day
scheduler.add_job(generate_report, CronTrigger(hour=6, minute=0))

log.info("Scheduler started")
try:
scheduler.start()
except KeyboardInterrupt:
scheduler.shutdown()

Watchdog — React to File Changes

pip install watchdog
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import time
import logging

log = logging.getLogger(__name__)


class UploadHandler(FileSystemEventHandler):
def on_created(self, event):
if event.is_directory:
return
if event.src_path.endswith(".csv"):
log.info("New file detected: %s", event.src_path)
process_csv(event.src_path)

def on_modified(self, event):
if not event.is_directory and event.src_path.endswith(".json"):
log.info("Config modified: %s", event.src_path)
reload_config(event.src_path)


def watch_directory(path: str) -> None:
handler = UploadHandler()
observer = Observer()
observer.schedule(handler, path, recursive=False)
observer.start()
log.info("Watching: %s", path)
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()

Script Health Check

Make scheduled scripts report their health:

import time
import logging
from pathlib import Path
from datetime import datetime

log = logging.getLogger(__name__)
HEALTH_FILE = Path("/var/run/myapp/last_run.json")


def record_health(status: str, details: dict) -> None:
import json
HEALTH_FILE.parent.mkdir(parents=True, exist_ok=True)
HEALTH_FILE.write_text(json.dumps({
"timestamp": datetime.utcnow().isoformat() + "Z",
"status": status,
**details,
}), encoding="utf-8")

Quick Reference

# cron
crontab -e
# MIN HOUR DAY MON DOW command
# 0 2 * * * /path/to/venv/bin/python3 /path/to/script.py >> /var/log/script.log 2>&1

# systemd timer
sudo systemctl enable my-timer.timer
sudo systemctl start my-timer.timer
sudo systemctl list-timers
sudo journalctl -u my-service.service -n 50
# APScheduler
from apscheduler.schedulers.blocking import BlockingScheduler
s = BlockingScheduler()
s.add_job(fn, "interval", minutes=15)
s.add_job(fn, "cron", hour=6)
s.start()

# Watchdog
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

What's Next

Lesson 3: Bulk Operations and Reporting