Implementing Content Mapping (Old Structure → New) During Migration
Content mapping creates correspondence between fields, types and structures of old CMS and new platform. Without explicit mapping, data either disappears or ends up in wrong fields.
What Mapping Includes
- Content types: page → post, article → news, product → catalog_item
- Fields:
post_title→title,post_content→body,post_date→published_at - Taxonomies: categories/tags → one or different taxonomies
- Media files: attachment → media_library
- Users: wp_users.user_login → users.username
Mapping Documentation as Schema
# content-mapping.yml
content_types:
- source: "post"
target: "article"
fields:
- source: "ID"
target: "legacy_id"
transform: "int_to_string"
- source: "post_title"
target: "title"
transform: null
- source: "post_content"
target: "body"
transform: "wp_shortcodes_to_html"
- source: "post_excerpt"
target: "summary"
transform: "strip_tags"
- source: "post_date"
target: "published_at"
transform: "datetime_utc"
- source: "post_status"
target: "status"
transform: "map_status" # publish→published, draft→draft
- source: "_yoast_wpseo_title"
target: "seo_title"
source_type: "meta"
- source: "_yoast_wpseo_metadesc"
target: "seo_description"
source_type: "meta"
- source: "featured_image"
target: "cover_image_id"
transform: "resolve_attachment_id"
- source: "page"
target: "page"
fields:
- source: "post_title"
target: "name"
- source: "post_content"
target: "content"
- source: "post_name"
target: "slug"
taxonomies:
- source: "category"
target: "category"
preserve_hierarchy: true
- source: "post_tag"
target: "tag"
preserve_hierarchy: false
Python Mapping Script WordPress → Custom CMS
import mysql.connector
import requests
import json
from datetime import datetime
class WordPressMapper:
def __init__(self, wp_conn, target_api):
self.wp = wp_conn
self.api = target_api
self.attachment_map = {} # wp_id → new_id
self.user_map = {}
self.category_map = {}
def map_post(self, wp_post):
# Get post metadata
cursor = self.wp.cursor(dictionary=True)
cursor.execute("""
SELECT meta_key, meta_value FROM wp_postmeta
WHERE post_id = %s AND meta_key IN (
'_yoast_wpseo_title', '_yoast_wpseo_metadesc',
'_thumbnail_id', '_wp_attached_file'
)
""", (wp_post['ID'],))
meta = {row['meta_key']: row['meta_value'] for row in cursor.fetchall()}
# Get categories and tags
cursor.execute("""
SELECT t.name, t.slug, tt.taxonomy
FROM wp_terms t
JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id
JOIN wp_term_relationships tr ON tt.term_taxonomy_id = tr.term_taxonomy_id
WHERE tr.object_id = %s
""", (wp_post['ID'],))
terms = cursor.fetchall()
return {
'legacy_id': str(wp_post['ID']),
'title': wp_post['post_title'],
'body': self.transform_content(wp_post['post_content']),
'summary': self.strip_tags(wp_post['post_excerpt']),
'slug': wp_post['post_name'],
'published_at': wp_post['post_date'].isoformat() + 'Z',
'status': self.map_status(wp_post['post_status']),
'author_id': self.user_map.get(wp_post['post_author']),
'seo_title': meta.get('_yoast_wpseo_title', ''),
'seo_description': meta.get('_yoast_wpseo_metadesc', ''),
'cover_image_id': self.attachment_map.get(meta.get('_thumbnail_id')),
'categories': [
self.category_map.get(t['slug'])
for t in terms if t['taxonomy'] == 'category'
],
'tags': [t['slug'] for t in terms if t['taxonomy'] == 'post_tag'],
}
def map_status(self, wp_status):
return {
'publish': 'published',
'draft': 'draft',
'private': 'hidden',
'trash': None
}.get(wp_status, 'draft')
def transform_content(self, content):
# Replace WordPress shortcodes with HTML
import re
content = re.sub(
r'\[gallery ids="([^"]+)"\]',
lambda m: self.render_gallery(m.group(1)),
content
)
content = content.replace('https://old-site.com/', '/')
return content
Mapping from Drupal to WordPress
Drupal uses more complex structure (field API):
def map_drupal_node(node_row, field_data):
return {
'post_title': node_row['title'],
'post_content': field_data.get('body_value', ''),
'post_status': 'published' if node_row['status'] == 1 else 'draft',
'post_date': datetime.fromtimestamp(node_row['created']).strftime('%Y-%m-%d %H:%M:%S'),
'post_name': node_row['alias'] or slugify(node_row['title']),
'_yoast_wpseo_title': field_data.get('field_meta_title_value', ''),
'_yoast_wpseo_metadesc': field_data.get('field_meta_desc_value', ''),
}
Taxonomy Mapping with Hierarchy Preservation
def migrate_categories(wp_cursor, target_api):
# Get categories with parents
wp_cursor.execute("""
SELECT t.term_id, t.name, t.slug, tt.parent
FROM wp_terms t
JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id
WHERE tt.taxonomy = 'category'
ORDER BY tt.parent ASC
""")
category_map = {} # wp_term_id → new_id
for cat in wp_cursor.fetchall():
response = target_api.post('/categories', {
'name': cat['name'],
'slug': cat['slug'],
'parent_id': category_map.get(cat['parent'])
})
category_map[cat['term_id']] = response['id']
return category_map
Mapping Validation
# Check: all posts have all required fields
def validate_mapped_post(post):
required = ['title', 'body', 'slug', 'published_at']
missing = [f for f in required if not post.get(f)]
if missing:
print(f"WARNING: Post {post.get('legacy_id')} missing: {missing}")
return len(missing) == 0
Execution Time
Developing mapping and transformation scripts for site up to 5000 pages — 3–5 working days.







