Python File Automation | Organize, Rename, and Back Up Files

Python File Automation | Organize, Rename, and Back Up Files

이 글의 핵심

A hands-on guide to Python file automation: organizing folders, batch renaming, backups, and practical scripts with clear examples.

Introduction

“Automate the repetitive work”

Automating file operations in Python saves a lot of time in real workflows.


1. Finding files

Files with a given extension

from pathlib import Path

def find_files(directory, extension):
    """Find files with a specific extension."""
    path = Path(directory)
    return list(path.glob(f'**/*.{extension}'))

# Usage
pdf_files = find_files('.', 'pdf')
for file in pdf_files:
    print(file)
import os
from datetime import datetime, timedelta

def find_old_files(directory, days=30):
    """Find files older than N days."""
    cutoff = datetime.now() - timedelta(days=days)
    old_files = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            filepath = Path(root) / file
            mtime = datetime.fromtimestamp(filepath.stat().st_mtime)
            
            if mtime < cutoff:
                old_files.append(filepath)
    
    return old_files

# Usage
old_files = find_old_files('.', days=90)
print(f"{len(old_files)} old file(s)")

2. Renaming files

Batch rename

from pathlib import Path

def rename_files(directory, old_pattern, new_pattern):
    """Batch rename files in a directory."""
    path = Path(directory)
    
    for file in path.glob('*'):
        if old_pattern in file.name:
            new_name = file.name.replace(old_pattern, new_pattern)
            file.rename(file.parent / new_name)
            print(f"{file.name}{new_name}")

# Usage
rename_files('.', 'old_', 'new_')

Adding sequence numbers

def add_numbers(directory, extension):
    """Prefix files with a zero-padded sequence number."""
    path = Path(directory)
    files = sorted(path.glob(f'*.{extension}'))
    
    for i, file in enumerate(files, 1):
        new_name = f"{i:03d}_{file.name}"
        file.rename(file.parent / new_name)
        print(f"{file.name}{new_name}")

# Usage
add_numbers('./images', 'jpg')
# photo.jpg → 001_photo.jpg

3. Organizing files

Sort into folders by extension

import shutil
from pathlib import Path

def organize_files(directory):
    """Move files into subfolders named by extension."""
    path = Path(directory)
    
    for file in path.iterdir():
        if file.is_file():
            # Extension without dot
            ext = file.suffix[1:]  # .jpg → jpg
            
            if ext:
                # Create folder
                target_dir = path / ext
                target_dir.mkdir(exist_ok=True)
                
                # Move file
                shutil.move(str(file), str(target_dir / file.name))
                print(f"{file.name}{ext}/")

# Usage
organize_files('./downloads')

4. Automated backups

Backup script

import shutil
from pathlib import Path
from datetime import datetime

def backup_directory(source, backup_root):
    """Back up a directory tree."""
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_name = f"backup_{timestamp}"
    backup_path = Path(backup_root) / backup_name
    
    # Copy tree
    shutil.copytree(source, backup_path)
    print(f"Backup done: {backup_path}")
    
    # Zip
    shutil.make_archive(str(backup_path), 'zip', backup_path)
    shutil.rmtree(backup_path)  # remove unzipped folder
    print(f"Archive created: {backup_path}.zip")

# Usage
backup_directory('./project', './backups')

Pruning old backups

def cleanup_old_backups(backup_dir, keep_count=5):
    """Keep only the N most recent backups."""
    path = Path(backup_dir)
    backups = sorted(path.glob('backup_*.zip'), key=lambda x: x.stat().st_mtime)
    
    for backup in backups[:-keep_count]:
        backup.unlink()
        print(f"Deleted: {backup.name}")

# Usage
cleanup_old_backups('./backups', keep_count=5)

5. Finding duplicates

Hash-based duplicate detection

import hashlib
from collections import defaultdict

def find_duplicates(directory):
    """Find duplicate files using MD5 hashes."""
    hashes = defaultdict(list)
    
    for file in Path(directory).rglob('*'):
        if file.is_file():
            with open(file, 'rb') as f:
                file_hash = hashlib.md5(f.read()).hexdigest()
            hashes[file_hash].append(file)
    
    duplicates = {h: files for h, files in hashes.items() if len(files) > 1}
    
    for hash_val, files in duplicates.items():
        print(f"\nDuplicate group ({hash_val[:8]}...):")
        for file in files:
            print(f"  - {file}")
    
    return duplicates

# Usage
duplicates = find_duplicates('./documents')

6. Real-world example

Log cleanup script

from pathlib import Path
import gzip
from datetime import datetime, timedelta

def cleanup_logs(log_dir, archive_days=7, delete_days=30):
    """
    Log maintenance:
    - Older than archive_days: gzip
    - Older than delete_days: delete
    """
    path = Path(log_dir)
    now = datetime.now()
    
    for log_file in path.glob('*.log'):
        mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
        age = (now - mtime).days
        
        if age >= delete_days:
            log_file.unlink()
            print(f"Deleted: {log_file.name} ({age} days)")
        
        elif age >= archive_days:
            gz_path = log_file.with_suffix('.log.gz')
            
            with open(log_file, 'rb') as f_in:
                with gzip.open(gz_path, 'wb') as f_out:
                    f_out.writelines(f_in)
            
            log_file.unlink()
            print(f"Compressed: {log_file.name}{gz_path.name}")

# Usage
cleanup_logs('./logs', archive_days=7, delete_days=30)

Practical tips

File automation checklist

# ✅ Safer file operations
# 1. Back up first
# 2. Dry-run mode (preview before destructive steps)
# 3. Logging

# ✅ Error handling
try:
    shutil.move(src, dst)
except PermissionError:
    print("Permission denied")
except FileNotFoundError:
    print("File not found")

# ✅ Progress feedback
from tqdm import tqdm

for file in tqdm(files, desc="Processing"):
    process(file)

Summary

Key takeaways

  1. Finding files: glob, rglob
  2. Renaming: rename()
  3. Moving files: shutil.move()
  4. Backups: copytree(), make_archive()
  5. Duplicates: compare hashes

Next steps