Python File Automation | Organize, Rename, and Back Up Files
이 글의 핵심
Automate file workflows in Python: find and rename files, organize by extension, backups with shutil, duplicate detection, and log cleanup—patterns and code you can reuse.
Introduction
“Automate the repetitive work”
Automating file operations in Python saves a lot of time in real workflows.
1. Finding files
Files with a given extension
from pathlib import Path
def find_files(directory, extension):
"""Find files with a specific extension."""
path = Path(directory)
return list(path.glob(f'**/*.{extension}'))
# Usage
pdf_files = find_files('.', 'pdf')
for file in pdf_files:
print(file)
Conditional search
import os
from datetime import datetime, timedelta
def find_old_files(directory, days=30):
"""Find files older than N days."""
cutoff = datetime.now() - timedelta(days=days)
old_files = []
for root, dirs, files in os.walk(directory):
for file in files:
filepath = Path(root) / file
mtime = datetime.fromtimestamp(filepath.stat().st_mtime)
if mtime < cutoff:
old_files.append(filepath)
return old_files
# Usage
old_files = find_old_files('.', days=90)
print(f"{len(old_files)} old file(s)")
2. Renaming files
Batch rename
from pathlib import Path
def rename_files(directory, old_pattern, new_pattern):
"""Batch rename files in a directory."""
path = Path(directory)
for file in path.glob('*'):
if old_pattern in file.name:
new_name = file.name.replace(old_pattern, new_pattern)
file.rename(file.parent / new_name)
print(f"{file.name} → {new_name}")
# Usage
rename_files('.', 'old_', 'new_')
Adding sequence numbers
def add_numbers(directory, extension):
"""Prefix files with a zero-padded sequence number."""
path = Path(directory)
files = sorted(path.glob(f'*.{extension}'))
for i, file in enumerate(files, 1):
new_name = f"{i:03d}_{file.name}"
file.rename(file.parent / new_name)
print(f"{file.name} → {new_name}")
# Usage
add_numbers('./images', 'jpg')
# photo.jpg → 001_photo.jpg
3. Organizing files
Sort into folders by extension
import shutil
from pathlib import Path
def organize_files(directory):
"""Move files into subfolders named by extension."""
path = Path(directory)
for file in path.iterdir():
if file.is_file():
# Extension without dot
ext = file.suffix[1:] # .jpg → jpg
if ext:
# Create folder
target_dir = path / ext
target_dir.mkdir(exist_ok=True)
# Move file
shutil.move(str(file), str(target_dir / file.name))
print(f"{file.name} → {ext}/")
# Usage
organize_files('./downloads')
4. Automated backups
Backup script
import shutil
from pathlib import Path
from datetime import datetime
def backup_directory(source, backup_root):
"""Back up a directory tree."""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_name = f"backup_{timestamp}"
backup_path = Path(backup_root) / backup_name
# Copy tree
shutil.copytree(source, backup_path)
print(f"Backup done: {backup_path}")
# Zip
shutil.make_archive(str(backup_path), 'zip', backup_path)
shutil.rmtree(backup_path) # remove unzipped folder
print(f"Archive created: {backup_path}.zip")
# Usage
backup_directory('./project', './backups')
Pruning old backups
def cleanup_old_backups(backup_dir, keep_count=5):
"""Keep only the N most recent backups."""
path = Path(backup_dir)
backups = sorted(path.glob('backup_*.zip'), key=lambda x: x.stat().st_mtime)
for backup in backups[:-keep_count]:
backup.unlink()
print(f"Deleted: {backup.name}")
# Usage
cleanup_old_backups('./backups', keep_count=5)
5. Finding duplicates
Hash-based duplicate detection
import hashlib
from collections import defaultdict
def find_duplicates(directory):
"""Find duplicate files using MD5 hashes."""
hashes = defaultdict(list)
for file in Path(directory).rglob('*'):
if file.is_file():
with open(file, 'rb') as f:
file_hash = hashlib.md5(f.read()).hexdigest()
hashes[file_hash].append(file)
duplicates = {h: files for h, files in hashes.items() if len(files) > 1}
for hash_val, files in duplicates.items():
print(f"\nDuplicate group ({hash_val[:8]}...):")
for file in files:
print(f" - {file}")
return duplicates
# Usage
duplicates = find_duplicates('./documents')
6. Real-world example
Log cleanup script
from pathlib import Path
import gzip
from datetime import datetime, timedelta
def cleanup_logs(log_dir, archive_days=7, delete_days=30):
"""
Log maintenance:
- Older than archive_days: gzip
- Older than delete_days: delete
"""
path = Path(log_dir)
now = datetime.now()
for log_file in path.glob('*.log'):
mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
age = (now - mtime).days
if age >= delete_days:
log_file.unlink()
print(f"Deleted: {log_file.name} ({age} days)")
elif age >= archive_days:
gz_path = log_file.with_suffix('.log.gz')
with open(log_file, 'rb') as f_in:
with gzip.open(gz_path, 'wb') as f_out:
f_out.writelines(f_in)
log_file.unlink()
print(f"Compressed: {log_file.name} → {gz_path.name}")
# Usage
cleanup_logs('./logs', archive_days=7, delete_days=30)
Practical tips
File automation checklist
# ✅ Safer file operations
# 1. Back up first
# 2. Dry-run mode (preview before destructive steps)
# 3. Logging
# ✅ Error handling
try:
shutil.move(src, dst)
except PermissionError:
print("Permission denied")
except FileNotFoundError:
print("File not found")
# ✅ Progress feedback
from tqdm import tqdm
for file in tqdm(files, desc="Processing"):
process(file)
Summary
Key takeaways
- Finding files:
glob,rglob - Renaming:
rename() - Moving files:
shutil.move() - Backups:
copytree(),make_archive() - Duplicates: compare hashes
Next steps
- [Web scraping](/en/blog/python-series-22-web-scraping/
- [Task scheduling](/en/blog/python-series-23-task-scheduling/
Related posts
- [Python file handling | Read, write, CSV, JSON](/en/blog/python-series-07-file-handling/
자주 묻는 질문 (FAQ)
Q. 이 내용을 실무에서 언제 쓰나요?
A. Automate file workflows in Python: find and rename files, organize by extension, backups with shutil, duplicate detectio… 실무에서는 위 본문의 예제와 선택 가이드를 참고해 적용하면 됩니다.
Q. 선행으로 읽으면 좋은 글은?
A. 각 글 하단의 이전 글 또는 관련 글 링크를 따라가면 순서대로 배울 수 있습니다. Python 시리즈 목차에서 전체 흐름을 확인할 수 있습니다.
Q. 더 깊이 공부하려면?
A. cppreference와 해당 라이브러리 공식 문서를 참고하세요. 글 말미의 참고 자료 링크도 활용하면 좋습니다.
같이 보면 좋은 글 (내부 링크)
이 주제와 연결되는 다른 글입니다.
- [Python File Handling | Read, Write, CSV, JSON, and pathlib](/en/blog/python-series-07-file-handling/
- [Python Web Scraping | BeautifulSoup and Selenium Explained](/en/blog/python-series-22-web-scraping/
- [Python Task Scheduling | Automate Jobs with schedule](/en/blog/python-series-23-task-scheduling/
이 글에서 다루는 키워드 (관련 검색어)
Python, Automation, File Processing, os, pathlib, shutil, Scripting 등으로 검색하시면 이 글이 도움이 됩니다.