Python Task Scheduling | Automate Jobs with schedule, APScheduler, and cron

Python Task Scheduling | Automate Jobs with schedule, APScheduler, and cron

이 글의 핵심

Design recurring tasks in Python: compare schedule vs APScheduler vs OS cron, with runnable examples for backups, scraping, and email reports.

Introduction

For batch scripts or long-running services, pairing periodic work with a scheduler library or the OS scheduler is usually safer than a bare while loop. By the end of this article you can compare options for recurring execution in Python and adapt the examples to your own projects.

“Run jobs automatically”

Task scheduling means running work at defined times or intervals without manual intervention.


1. schedule library

Install

pip install schedule

Basics

import schedule
import time

def job():
    print("Job running!")

# Every 10 seconds
schedule.every(10).seconds.do(job)

# Every minute
schedule.every(1).minutes.do(job)

# Daily at 9:00 AM
schedule.every().day.at("09:00").do(job)

# Every Monday at 10:00 AM
schedule.every().monday.at("10:00").do(job)

# Run loop
while True:
    schedule.run_pending()
    time.sleep(1)

2. Real-world example

Automated backup

import schedule
import time
from datetime import datetime
import shutil
from pathlib import Path

def backup_files():
    """Copy a data folder to backups."""
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_name = f"backup_{timestamp}"
    
    source = Path('./data')
    backup_dir = Path('./backups')
    backup_dir.mkdir(exist_ok=True)
    
    backup_path = backup_dir / backup_name
    shutil.copytree(source, backup_path)
    
    print(f"[{datetime.now()}] Backup complete: {backup_name}")

# Daily at midnight
schedule.every().day.at("00:00").do(backup_files)

# Every Sunday at 11:00 PM
schedule.every().sunday.at("23:00").do(backup_files)

print("Backup scheduler started...")
while True:
    schedule.run_pending()
    time.sleep(60)

3. APScheduler

Install

pip install apscheduler

Advanced scheduling

from apscheduler.schedulers.blocking import BlockingScheduler
from datetime import datetime

scheduler = BlockingScheduler()

def job1():
    print(f"[{datetime.now()}] Job 1")

def job2():
    print(f"[{datetime.now()}] Job 2")

# Cron-style
scheduler.add_job(job1, 'cron', hour=9, minute=0)  # daily at 9:00

# Interval
scheduler.add_job(job2, 'interval', minutes=30)  # every 30 minutes

# Weekdays at 9:00
scheduler.add_job(
    job1,
    'cron',
    day_of_week='mon-fri',
    hour=9,
    minute=0
)

scheduler.start()

4. Automating web scraping

Periodic data collection

import schedule
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

def scrape_prices():
    """Collect product prices."""
    urls = [
        'https://shop1.com/product/123',
        'https://shop2.com/product/456'
    ]
    
    prices = []
    
    for url in urls:
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.text, 'html.parser')
            
            price = soup.select_one('.price').text
            price = int(price.replace(',', '').replace('원', ''))
            
            prices.append({
                'url': url,
                'price': price,
                'timestamp': datetime.now()
            })
        except Exception as e:
            print(f"Error: {url} - {e}")
    
    df = pd.DataFrame(prices)
    df.to_csv('price_history.csv', mode='a', header=False, index=False)
    print(f"[{datetime.now()}] Price scrape finished")

# Every hour
schedule.every(1).hours.do(scrape_prices)

while True:
    schedule.run_pending()
    time.sleep(60)

5. Email notifications

Daily report email

import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime
import schedule

def send_daily_report():
    """Send a daily HTML report by email."""
    report = generate_report()
    
    msg = MIMEMultipart()
    msg['From'] = '[email protected]'
    msg['To'] = '[email protected]'
    msg['Subject'] = f"Daily report - {datetime.now().strftime('%Y-%m-%d')}"
    
    msg.attach(MIMEText(report, 'html'))
    
    try:
        server = smtplib.SMTP('smtp.gmail.com', 587)
        server.starttls()
        server.login('[email protected]', 'password')
        server.send_message(msg)
        server.quit()
        print("Report sent")
    except Exception as e:
        print(f"Send failed: {e}")

def generate_report():
    """Build simple HTML for the report."""
    return """
    <html>
        <body>
            <h1>Daily report</h1>
            <p>Total revenue: 1,000,000 KRW</p>
            <p>New customers: 50</p>
        </body>
    </html>
    """

# Every day at 8:00 AM
schedule.every().day.at("08:00").do(send_daily_report)

6. Running in the background

Windows Task Scheduler

# Register a Python script with Windows Task Scheduler
# 1. Open Task Scheduler
# 2. Create Basic Task
# 3. Program: python.exe
# 4. Arguments: C:\path\to\script.py

Linux cron

# Edit crontab
crontab -e

# Daily at 9:00 AM
0 9 * * * /usr/bin/python3 /path/to/script.py

# Every hour
0 * * * * /usr/bin/python3 /path/to/script.py

# Every Monday at 10:00 AM
0 10 * * 1 /usr/bin/python3 /path/to/script.py

Practical tips

Scheduling checklist

# ✅ Logging
import logging

logging.basicConfig(
    filename='scheduler.log',
    level=logging.INFO,
    format='%(asctime)s - %(message)s'
)

def job():
    logging.info("Job start")
    # work here
    logging.info("Job done")

# ✅ Wrap errors
def safe_job():
    try:
        job()
    except Exception as e:
        logging.error(f"Error: {e}")

# ✅ Timeouts (Unix)
import signal

def timeout_handler(signum, frame):
    raise TimeoutError("Job timed out")

signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(300)  # 5 minute cap

Summary

Key takeaways

  1. schedule: simple, readable timers
  2. APScheduler: cron-like and interval jobs in process
  3. cron: system scheduler on Linux and macOS
  4. Automation: backups, scraping, reports
  5. Operations: logging and error handling

Common use cases

  • Automated backups
  • Price monitoring
  • Data collection
  • Report generation
  • Alerts and notifications