Solving N+1 Problem in GraphQL with DataLoader
N+1 is a classic GraphQL problem: when querying a list of N objects with nested relationships, N+1 database queries execute (1 for the list + N for each nested field). DataLoader solves this with batching: it collects all requests for one event loop tick and executes one grouped query.
Problem Demonstration
# This query without DataLoader generates 1 + N DB queries
query {
posts { # SELECT * FROM posts → 100 rows
id
title
author { # SELECT * FROM users WHERE id = ? × 100 times!
name
}
}
}
// WITHOUT DataLoader—N+1
const resolvers = {
Post: {
author: async (post) => {
// Called separately for each of 100 posts
return db.users.findById(post.author_id) // 100 queries!
}
}
}
Basic DataLoader
import DataLoader from 'dataloader'
// Batching function: receives key array, returns value array
async function batchUsers(userIds) {
// One query instead of N
const users = await db.query(
'SELECT * FROM users WHERE id = ANY($1)',
[userIds]
)
// Important: result must be in the same order as input keys!
const userMap = new Map(users.map(u => [u.id, u]))
return userIds.map(id => userMap.get(id) || null)
}
const userLoader = new DataLoader(batchUsers)
// Usage in resolver—looks like single query, works as batch
const resolvers = {
Post: {
author: async (post, args, context) => {
return context.loaders.userById.load(post.author_id)
}
}
}
DataLoader Registry (Per-Request)
DataLoaders are created per-request—otherwise cache is shared between users (vulnerability):
// dataloaders.js
import DataLoader from 'dataloader'
export class DataLoaderRegistry {
constructor(db) {
this.db = db
// Auto-batches within one event loop tick
this.userById = new DataLoader(async (ids) => {
const rows = await db.query(
'SELECT * FROM users WHERE id = ANY($1::int[])', [ids]
)
const map = new Map(rows.map(r => [r.id, r]))
return ids.map(id => map.get(id) ?? null)
})
this.postsByAuthorId = new DataLoader(async (authorIds) => {
const rows = await db.query(
'SELECT * FROM posts WHERE author_id = ANY($1::int[])', [authorIds]
)
// One-to-many: return array for each authorId
const map = new Map()
for (const row of rows) {
if (!map.has(row.author_id)) map.set(row.author_id, [])
map.get(row.author_id).push(row)
}
return authorIds.map(id => map.get(id) ?? [])
})
this.commentsByPostId = new DataLoader(async (postIds) => {
const rows = await db.query(
'SELECT * FROM comments WHERE post_id = ANY($1::int[]) ORDER BY created_at',
[postIds]
)
const map = new Map()
for (const row of rows) {
if (!map.has(row.post_id)) map.set(row.post_id, [])
map.get(row.post_id).push(row)
}
return postIds.map(id => map.get(id) ?? [])
})
// With filtering—key includes parameters
this.tagsByPostId = new DataLoader(async (postIds) => {
const rows = await db.query(`
SELECT pt.post_id, t.* FROM tags t
JOIN post_tags pt ON pt.tag_id = t.id
WHERE pt.post_id = ANY($1::int[])
`, [postIds])
const map = new Map()
for (const row of rows) {
if (!map.has(row.post_id)) map.set(row.post_id, [])
map.get(row.post_id).push(row)
}
return postIds.map(id => map.get(id) ?? [])
})
}
}
// In context factory
context: async ({ req }) => {
const user = await authenticate(req)
// New instance for each HTTP request!
const loaders = new DataLoaderRegistry(db)
return { user, db, loaders }
}
DataLoader with Parameters
When filtering by additional arguments:
// Bad: separate loader for each parameter combination
// Good: composite key
this.productsByCategoryAndStatus = new DataLoader(
async (keys) => {
// keys = [{categoryId: 1, status: 'active'}, ...]
const categoryIds = [...new Set(keys.map(k => k.categoryId))]
const statuses = [...new Set(keys.map(k => k.status))]
const rows = await db.query(`
SELECT * FROM products
WHERE category_id = ANY($1::int[])
AND status = ANY($2::text[])
`, [categoryIds, statuses])
// Group by composite key
const map = new Map()
for (const row of rows) {
const key = `${row.category_id}:${row.status}`
if (!map.has(key)) map.set(key, [])
map.get(key).push(row)
}
return keys.map(k => map.get(`${k.categoryId}:${k.status}`) ?? [])
},
{
// Custom key for objects
cacheKeyFn: (key) => `${key.categoryId}:${key.status}`
}
)
// Usage in resolver
const resolvers = {
Category: {
activeProducts: (category, args, context) => {
return context.loaders.productsByCategoryAndStatus.load({
categoryId: category.id,
status: 'active'
})
}
}
}
Cache Priming
Avoids repeated requests to already-loaded data:
const resolvers = {
Query: {
posts: async (parent, { limit }, context) => {
const posts = await context.db.posts.findAll({ limit })
// Prime userById cache with data already in posts
// If posts contain embedded author—DataLoader won't re-fetch
for (const post of posts) {
if (post.author) {
context.loaders.userById.prime(post.author.id, post.author)
}
}
return posts
}
}
}
Measuring Efficiency
// Middleware for logging SQL query count
function queryCounterPlugin() {
return {
async requestDidStart() {
let queryCount = 0
const originalQuery = db.query.bind(db)
db.query = (...args) => {
queryCount++
return originalQuery(...args)
}
return {
async willSendResponse({ response }) {
console.log(`GraphQL operation executed ${queryCount} SQL queries`)
// In production—metric to Prometheus
response.http.headers.set('X-SQL-Count', queryCount.toString())
}
}
}
}
}
Before DataLoader: 100 posts query → 101 SQL queries. After DataLoader: same GraphQL query → 3–5 SQL queries (posts, users batch, comments batch).
Timelines
Implementing DataLoaders for all relationships in GraphQL schema—1–2 working days.







