When is file automation useful?

For repetitive tasks: bulk cleanup, backups, renames, format conversion, and log rotation.

pathlib is more modern and readable. Prefer pathlib for new projects.

A high-level module for copying, moving, and removing files and directories.

Python File Automation | Organize, Rename, and Back Up Files

2026년 3월 28일 · 18분 읽기 · 수정 2026년 3월 28일 Intermediate Tutorial

이 글의 핵심

Automate file workflows in Python: find and rename files, organize by extension, backups with shutil, duplicate detection, and log cleanup—patterns and code you can reuse.

Introduction

“Automate the repetitive work”

Automating file operations in Python saves a lot of time in real workflows.

1. Finding files

Files with a given extension

from pathlib import Path
def find_files(directory, extension):
    """Find files with a specific extension."""
    path = Path(directory)
    return list(path.glob(f'**/*.{extension}'))
# Usage
pdf_files = find_files('.', 'pdf')
for file in pdf_files:
    print(file)

Conditional search

import os
from datetime import datetime, timedelta
def find_old_files(directory, days=30):
    """Find files older than N days."""
    cutoff = datetime.now() - timedelta(days=days)
    old_files = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            filepath = Path(root) / file
            mtime = datetime.fromtimestamp(filepath.stat().st_mtime)
            
            if mtime < cutoff:
                old_files.append(filepath)
    
    return old_files
# Usage
old_files = find_old_files('.', days=90)
print(f"{len(old_files)} old file(s)")

2. Renaming files

Batch rename

from pathlib import Path
def rename_files(directory, old_pattern, new_pattern):
    """Batch rename files in a directory."""
    path = Path(directory)
    
    for file in path.glob('*'):
        if old_pattern in file.name:
            new_name = file.name.replace(old_pattern, new_pattern)
            file.rename(file.parent / new_name)
            print(f"{file.name} → {new_name}")
# Usage
rename_files('.', 'old_', 'new_')

Adding sequence numbers

def add_numbers(directory, extension):
    """Prefix files with a zero-padded sequence number."""
    path = Path(directory)
    files = sorted(path.glob(f'*.{extension}'))
    
    for i, file in enumerate(files, 1):
        new_name = f"{i:03d}_{file.name}"
        file.rename(file.parent / new_name)
        print(f"{file.name} → {new_name}")
# Usage
add_numbers('./images', 'jpg')
# photo.jpg → 001_photo.jpg

3. Organizing files

Sort into folders by extension

import shutil
from pathlib import Path
def organize_files(directory):
    """Move files into subfolders named by extension."""
    path = Path(directory)
    
    for file in path.iterdir():
        if file.is_file():
            # Extension without dot
            ext = file.suffix[1:]  # .jpg → jpg
            
            if ext:
                # Create folder
                target_dir = path / ext
                target_dir.mkdir(exist_ok=True)
                
                # Move file
                shutil.move(str(file), str(target_dir / file.name))
                print(f"{file.name} → {ext}/")
# Usage
organize_files('./downloads')

4. Automated backups

Backup script

import shutil
from pathlib import Path
from datetime import datetime
def backup_directory(source, backup_root):
    """Back up a directory tree."""
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_name = f"backup_{timestamp}"
    backup_path = Path(backup_root) / backup_name
    
    # Copy tree
    shutil.copytree(source, backup_path)
    print(f"Backup done: {backup_path}")
    
    # Zip
    shutil.make_archive(str(backup_path), 'zip', backup_path)
    shutil.rmtree(backup_path)  # remove unzipped folder
    print(f"Archive created: {backup_path}.zip")
# Usage
backup_directory('./project', './backups')

Pruning old backups

def cleanup_old_backups(backup_dir, keep_count=5):
    """Keep only the N most recent backups."""
    path = Path(backup_dir)
    backups = sorted(path.glob('backup_*.zip'), key=lambda x: x.stat().st_mtime)
    
    for backup in backups[:-keep_count]:
        backup.unlink()
        print(f"Deleted: {backup.name}")
# Usage
cleanup_old_backups('./backups', keep_count=5)

5. Finding duplicates

Hash-based duplicate detection

import hashlib
from collections import defaultdict
def find_duplicates(directory):
    """Find duplicate files using MD5 hashes."""
    hashes = defaultdict(list)
    
    for file in Path(directory).rglob('*'):
        if file.is_file():
            with open(file, 'rb') as f:
                file_hash = hashlib.md5(f.read()).hexdigest()
            hashes[file_hash].append(file)
    
    duplicates = {h: files for h, files in hashes.items() if len(files) > 1}
    
    for hash_val, files in duplicates.items():
        print(f"\nDuplicate group ({hash_val[:8]}...):")
        for file in files:
            print(f"  - {file}")
    
    return duplicates
# Usage
duplicates = find_duplicates('./documents')

6. Real-world example

Log cleanup script

from pathlib import Path
import gzip
from datetime import datetime, timedelta
def cleanup_logs(log_dir, archive_days=7, delete_days=30):
    """
    Log maintenance:
    - Older than archive_days: gzip
    - Older than delete_days: delete
    """
    path = Path(log_dir)
    now = datetime.now()
    
    for log_file in path.glob('*.log'):
        mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
        age = (now - mtime).days
        
        if age >= delete_days:
            log_file.unlink()
            print(f"Deleted: {log_file.name} ({age} days)")
        
        elif age >= archive_days:
            gz_path = log_file.with_suffix('.log.gz')
            
            with open(log_file, 'rb') as f_in:
                with gzip.open(gz_path, 'wb') as f_out:
                    f_out.writelines(f_in)
            
            log_file.unlink()
            print(f"Compressed: {log_file.name} → {gz_path.name}")
# Usage
cleanup_logs('./logs', archive_days=7, delete_days=30)

Practical tips

File automation checklist

# ✅ Safer file operations
# 1. Back up first
# 2. Dry-run mode (preview before destructive steps)
# 3. Logging
# ✅ Error handling
try:
    shutil.move(src, dst)
except PermissionError:
    print("Permission denied")
except FileNotFoundError:
    print("File not found")
# ✅ Progress feedback
from tqdm import tqdm
for file in tqdm(files, desc="Processing"):
    process(file)

Summary

Key takeaways

Finding files: glob, rglob
Renaming: rename()
Moving files: shutil.move()
Backups: copytree(), make_archive()
Duplicates: compare hashes

Next steps

[Web scraping](/en/blog/python-series-22-web-scraping/
[Task scheduling](/en/blog/python-series-23-task-scheduling/

[Python file handling | Read, write, CSV, JSON](/en/blog/python-series-07-file-handling/

자주 묻는 질문 (FAQ)

Q. 이 내용을 실무에서 언제 쓰나요?

A. Automate file workflows in Python: find and rename files, organize by extension, backups with shutil, duplicate detectio… 실무에서는 위 본문의 예제와 선택 가이드를 참고해 적용하면 됩니다.

Q. 선행으로 읽으면 좋은 글은?

A. 각 글 하단의 이전 글 또는 관련 글 링크를 따라가면 순서대로 배울 수 있습니다. Python 시리즈 목차에서 전체 흐름을 확인할 수 있습니다.

Q. 더 깊이 공부하려면?

A. cppreference와 해당 라이브러리 공식 문서를 참고하세요. 글 말미의 참고 자료 링크도 활용하면 좋습니다.

같이 보면 좋은 글 (내부 링크)

이 주제와 연결되는 다른 글입니다.

[Python File Handling | Read, Write, CSV, JSON, and pathlib](/en/blog/python-series-07-file-handling/
[Python Web Scraping | BeautifulSoup and Selenium Explained](/en/blog/python-series-22-web-scraping/
[Python Task Scheduling | Automate Jobs with schedule](/en/blog/python-series-23-task-scheduling/

이 글에서 다루는 키워드 (관련 검색어)

Python, Automation, File Processing, os, pathlib, shutil, Scripting 등으로 검색하시면 이 글이 도움이 됩니다.

이 글이 도움이 되셨나요?

여러분의 피드백은 더 나은 콘텐츠를 만드는 데 도움이 됩니다

문제가 있거나 개선 제안이 있으시면 연락처로 알려주세요.

Keyboard Shortcuts