Python Task Scheduling | Automate Jobs with schedule, APScheduler, and cron
이 글의 핵심
Design recurring tasks in Python: compare schedule vs APScheduler vs OS cron, with runnable examples for backups, scraping, and email reports.
Introduction
For batch scripts or long-running services, pairing periodic work with a scheduler library or the OS scheduler is usually safer than a bare while loop. By the end of this article you can compare options for recurring execution in Python and adapt the examples to your own projects.
“Run jobs automatically”
Task scheduling means running work at defined times or intervals without manual intervention.
1. schedule library
Install
pip install schedule
Basics
import schedule
import time
def job():
print("Job running!")
# Every 10 seconds
schedule.every(10).seconds.do(job)
# Every minute
schedule.every(1).minutes.do(job)
# Daily at 9:00 AM
schedule.every().day.at("09:00").do(job)
# Every Monday at 10:00 AM
schedule.every().monday.at("10:00").do(job)
# Run loop
while True:
schedule.run_pending()
time.sleep(1)
2. Real-world example
Automated backup
import schedule
import time
from datetime import datetime
import shutil
from pathlib import Path
def backup_files():
"""Copy a data folder to backups."""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_name = f"backup_{timestamp}"
source = Path('./data')
backup_dir = Path('./backups')
backup_dir.mkdir(exist_ok=True)
backup_path = backup_dir / backup_name
shutil.copytree(source, backup_path)
print(f"[{datetime.now()}] Backup complete: {backup_name}")
# Daily at midnight
schedule.every().day.at("00:00").do(backup_files)
# Every Sunday at 11:00 PM
schedule.every().sunday.at("23:00").do(backup_files)
print("Backup scheduler started...")
while True:
schedule.run_pending()
time.sleep(60)
3. APScheduler
Install
pip install apscheduler
Advanced scheduling
from apscheduler.schedulers.blocking import BlockingScheduler
from datetime import datetime
scheduler = BlockingScheduler()
def job1():
print(f"[{datetime.now()}] Job 1")
def job2():
print(f"[{datetime.now()}] Job 2")
# Cron-style
scheduler.add_job(job1, 'cron', hour=9, minute=0) # daily at 9:00
# Interval
scheduler.add_job(job2, 'interval', minutes=30) # every 30 minutes
# Weekdays at 9:00
scheduler.add_job(
job1,
'cron',
day_of_week='mon-fri',
hour=9,
minute=0
)
scheduler.start()
4. Automating web scraping
Periodic data collection
import schedule
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
def scrape_prices():
"""Collect product prices."""
urls = [
'https://shop1.com/product/123',
'https://shop2.com/product/456'
]
prices = []
for url in urls:
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
price = soup.select_one('.price').text
price = int(price.replace(',', '').replace('원', ''))
prices.append({
'url': url,
'price': price,
'timestamp': datetime.now()
})
except Exception as e:
print(f"Error: {url} - {e}")
df = pd.DataFrame(prices)
df.to_csv('price_history.csv', mode='a', header=False, index=False)
print(f"[{datetime.now()}] Price scrape finished")
# Every hour
schedule.every(1).hours.do(scrape_prices)
while True:
schedule.run_pending()
time.sleep(60)
5. Email notifications
Daily report email
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime
import schedule
def send_daily_report():
"""Send a daily HTML report by email."""
report = generate_report()
msg = MIMEMultipart()
msg['From'] = '[email protected]'
msg['To'] = '[email protected]'
msg['Subject'] = f"Daily report - {datetime.now().strftime('%Y-%m-%d')}"
msg.attach(MIMEText(report, 'html'))
try:
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login('[email protected]', 'password')
server.send_message(msg)
server.quit()
print("Report sent")
except Exception as e:
print(f"Send failed: {e}")
def generate_report():
"""Build simple HTML for the report."""
return """
<html>
<body>
<h1>Daily report</h1>
<p>Total revenue: 1,000,000 KRW</p>
<p>New customers: 50</p>
</body>
</html>
"""
# Every day at 8:00 AM
schedule.every().day.at("08:00").do(send_daily_report)
6. Running in the background
Windows Task Scheduler
# Register a Python script with Windows Task Scheduler
# 1. Open Task Scheduler
# 2. Create Basic Task
# 3. Program: python.exe
# 4. Arguments: C:\path\to\script.py
Linux cron
# Edit crontab
crontab -e
# Daily at 9:00 AM
0 9 * * * /usr/bin/python3 /path/to/script.py
# Every hour
0 * * * * /usr/bin/python3 /path/to/script.py
# Every Monday at 10:00 AM
0 10 * * 1 /usr/bin/python3 /path/to/script.py
Practical tips
Scheduling checklist
# ✅ Logging
import logging
logging.basicConfig(
filename='scheduler.log',
level=logging.INFO,
format='%(asctime)s - %(message)s'
)
def job():
logging.info("Job start")
# work here
logging.info("Job done")
# ✅ Wrap errors
def safe_job():
try:
job()
except Exception as e:
logging.error(f"Error: {e}")
# ✅ Timeouts (Unix)
import signal
def timeout_handler(signum, frame):
raise TimeoutError("Job timed out")
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(300) # 5 minute cap
Summary
Key takeaways
- schedule: simple, readable timers
- APScheduler: cron-like and interval jobs in process
- cron: system scheduler on Linux and macOS
- Automation: backups, scraping, reports
- Operations: logging and error handling
Common use cases
- Automated backups
- Price monitoring
- Data collection
- Report generation
- Alerts and notifications