Implementing Media Files Migration During Site Migration
Media files — images, documents, videos, archives — often comprise tens of gigabytes. Transfer requires not only copying files but updating all links in content.
Media Files Inventory
# Summary of sizes and types
find /var/www/uploads -type f | \
awk -F. '{print $NF}' | sort | uniq -c | sort -rn
# Total size
du -sh /var/www/uploads
# Files without DB match (possible garbage)
# Get file list from DB
mysql myapp -e "SELECT file_path FROM media ORDER BY file_path" > db_files.txt
# Compare with disk
find /uploads -type f -printf "%P\n" | sort > disk_files.txt
comm -23 disk_files.txt db_files.txt | head -50
Copying via rsync
# Initial sync (can run multiple times)
rsync -avz --progress \
--checksum \
user@old-server:/var/www/uploads/ \
/var/www/new-site/uploads/
# Exclude unnecessary directories
rsync -avz \
--exclude='.git' \
--exclude='cache/' \
--exclude='tmp/' \
user@old-server:/var/www/uploads/ \
/var/www/new-site/uploads/
# Delta-sync before final switch
rsync -avz --delete \
user@old-server:/var/www/uploads/ \
/var/www/new-site/uploads/
--checksum compares by checksum, not just modification time.
--delete removes files not on source (for final sync).
Upload to S3
# Upload with concurrency
aws s3 sync /var/www/uploads/ \
s3://company-media-bucket/uploads/ \
--storage-class STANDARD \
--exclude "*.tmp" \
--exclude "cache/*" \
--acl public-read
# Check uploaded file count
aws s3 ls s3://company-media-bucket/uploads/ --recursive | wc -l
Update URLs in Content
After file transfer, links in content point to old URLs. Need to replace all occurrences:
import re
import mysql.connector
def update_media_urls_in_content(db_conn, old_base, new_base):
cursor = db_conn.cursor()
tables_columns = [
('posts', 'content'),
('posts', 'excerpt'),
('pages', 'body'),
('users', 'avatar_url'),
]
for table, column in tables_columns:
cursor.execute(f"SELECT id, {column} FROM {table} WHERE {column} LIKE %s",
(f'%{old_base}%',))
rows = cursor.fetchall()
for row_id, content in rows:
if content:
new_content = content.replace(old_base, new_base)
cursor.execute(
f"UPDATE {table} SET {column} = %s WHERE id = %s",
(new_content, row_id)
)
db_conn.commit()
update_media_urls_in_content(
db_conn,
'https://old-site.com/wp-content/uploads',
'https://new-site.com/uploads'
)
For WordPress: Better Search Replace plugin or SQL:
UPDATE wp_posts
SET post_content = REPLACE(post_content,
'https://old-site.com/wp-content/uploads/',
'https://cdn.new-site.com/uploads/')
WHERE post_content LIKE '%old-site.com/wp-content/uploads/%';
UPDATE wp_postmeta
SET meta_value = REPLACE(meta_value,
'https://old-site.com',
'https://new-site.com')
WHERE meta_value LIKE '%old-site.com%';
Optimization During Migration
Migration is good time to optimize images:
from PIL import Image
import os
def optimize_images(source_dir, target_dir):
for root, dirs, files in os.walk(source_dir):
for filename in files:
if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
continue
source_path = os.path.join(root, filename)
rel_path = os.path.relpath(source_path, source_dir)
target_path = os.path.join(target_dir, rel_path)
os.makedirs(os.path.dirname(target_path), exist_ok=True)
img = Image.open(source_path)
# Convert to WebP
webp_path = os.path.splitext(target_path)[0] + '.webp'
img.save(webp_path, 'WEBP', quality=85, method=6)
# Save optimized original
img.save(target_path, optimize=True, quality=85)
Verification After Transfer
import requests
import hashlib
def verify_file_integrity(source_server, dest_server, file_list):
errors = []
for path in file_list:
src = requests.get(f"{source_server}/{path}")
dst = requests.get(f"{dest_server}/{path}")
if src.status_code != 200:
errors.append(f"Source missing: {path}")
continue
if dst.status_code != 200:
errors.append(f"Destination missing: {path}")
continue
src_hash = hashlib.md5(src.content).hexdigest()
dst_hash = hashlib.md5(dst.content).hexdigest()
if src_hash != dst_hash:
errors.append(f"Checksum mismatch: {path}")
return errors
301 Redirects for Old Media URLs
If media file URLs changed, configure nginx:
# Redirect old WP paths
location ~* ^/wp-content/uploads/(.*)$ {
return 301 /uploads/$1;
}
# Or via map for individual redirects
map $uri $media_redirect {
/wp-content/uploads/2022/01/image.jpg /uploads/2022/01/image.webp;
}
Execution Time
Transfer media files with URL updates in content for site up to 10GB — 2–3 working days.







