Simulating Realistic Traffic in Load Testing
Uniform traffic of 100 VU—not what happens in reality. Real traffic: morning and evening peaks, different user types (browsers/mobile/API), session behavior, random pauses, Pareto distribution. Realistic simulation reveals issues that synthetic tests miss.
Analyzing Real Traffic as Test Basis
# Extract patterns from nginx access log
import re
from collections import Counter, defaultdict
import json
def analyze_access_log(log_file: str):
pattern = re.compile(
r'(?P<ip>\S+) .+ \[(?P<time>[^\]]+)\] '
r'"(?P<method>\w+) (?P<path>[^"]+) HTTP/\d+" '
r'(?P<status>\d+) (?P<bytes>\d+)'
)
endpoint_counts = Counter()
method_counts = Counter()
hourly_traffic = defaultdict(int)
with open(log_file) as f:
for line in f:
m = pattern.match(line)
if not m:
continue
# Normalize path (remove IDs)
path = re.sub(r'/\d+', '/{id}', m.group('path').split('?')[0])
endpoint_counts[f"{m.group('method')} {path}"] += 1
method_counts[m.group('method')] += 1
# Hourly distribution
hour = m.group('time').split(':')[1]
hourly_traffic[hour] += 1
total = sum(endpoint_counts.values())
print("=== Top Endpoints (% of traffic) ===")
for endpoint, count in endpoint_counts.most_common(20):
pct = count / total * 100
print(f" {pct:.1f}% {endpoint}")
print("\n=== Hourly Distribution ===")
for hour in sorted(hourly_traffic):
bar = '█' * (hourly_traffic[hour] // 100)
print(f" {hour}:00 {bar} {hourly_traffic[hour]}")
# Export for k6 scenario
weights = {ep: round(cnt/total, 3) for ep, cnt in endpoint_counts.most_common(20)}
return weights
k6 Scenario with Realistic Behavior
// tests/realistic/user-journey.js
import http from 'k6/http'
import { check, sleep } from 'k6'
import { SharedArray } from 'k6/data'
import { randomItem, randomIntBetween } from 'https://jslib.k6.io/k6-utils/1.4.0/index.js'
// Load test data from CSV
const users = new SharedArray('users', function() {
return open('./data/test-users.csv').split('\n')
.slice(1)
.map(row => {
const [email, token, userId] = row.split(',')
return { email, token, userId }
})
})
const searchTerms = new SharedArray('searches', function() {
return open('./data/popular-searches.txt').split('\n').filter(Boolean)
})
export const options = {
scenarios: {
// Anonymous browsers (40% of traffic)
anonymous_browse: {
executor: 'ramping-vus',
startVUs: 0,
stages: [
{ duration: '5m', target: 40 },
{ duration: '30m', target: 40 },
{ duration: '5m', target: 0 }
],
exec: 'anonymousBrowse'
},
// Logged-in users (50% of traffic)
logged_in_users: {
executor: 'ramping-vus',
startVUs: 0,
stages: [
{ duration: '5m', target: 50 },
{ duration: '30m', target: 50 },
{ duration: '5m', target: 0 }
],
exec: 'loggedInJourney'
},
// API clients (10% of traffic)
api_clients: {
executor: 'constant-arrival-rate',
rate: 10,
timeUnit: '1s',
duration: '40m',
preAllocatedVUs: 20,
exec: 'apiClient'
}
},
thresholds: {
http_req_duration: ['p(95)<800'],
http_req_failed: ['rate<0.01'],
}
}
const BASE = __ENV.BASE_URL || 'https://staging.example.com'
// Scenario: anonymous browser
export function anonymousBrowse() {
// Landing → catalog → product → exit
http.get(`${BASE}/`)
sleep(randomIntBetween(1, 4))
const category = randomItem(['electronics', 'clothing', 'books', 'sports'])
http.get(`${BASE}/api/products?category=${category}&limit=20`)
sleep(randomIntBetween(2, 8))
// 30% leave immediately, 70% look at product
if (Math.random() > 0.3) {
const productId = randomIntBetween(1, 500)
http.get(`${BASE}/api/products/${productId}`)
sleep(randomIntBetween(3, 15))
}
// 20% search
if (Math.random() < 0.2) {
const term = randomItem(searchTerms)
http.get(`${BASE}/api/search?q=${encodeURIComponent(term)}`)
sleep(randomIntBetween(1, 5))
}
}
// Scenario: logged-in user
export function loggedInJourney() {
const user = randomItem(users)
const headers = {
'Authorization': `Bearer ${user.token}`,
'Content-Type': 'application/json'
}
// Profile
http.get(`${BASE}/api/me`, { headers })
sleep(randomIntBetween(1, 3))
// Browse products
for (let i = 0; i < randomIntBetween(2, 8); i++) {
const productId = randomIntBetween(1, 500)
http.get(`${BASE}/api/products/${productId}`, { headers })
sleep(randomIntBetween(2, 10))
}
// 40% add to cart
if (Math.random() < 0.4) {
http.post(`${BASE}/api/cart/items`, JSON.stringify({
productId: randomIntBetween(1, 500),
quantity: randomIntBetween(1, 3)
}), { headers })
sleep(randomIntBetween(1, 3))
// 60% of those who added—checkout
if (Math.random() < 0.6) {
http.get(`${BASE}/api/cart`, { headers })
sleep(randomIntBetween(2, 5))
const checkout = http.post(`${BASE}/api/orders`, JSON.stringify({
paymentMethod: 'saved_card',
shippingAddressId: 1
}), { headers })
check(checkout, { 'order created': (r) => r.status === 201 })
}
}
}
// Scenario: API client (integration)
export function apiClient() {
const apiKey = __ENV.API_KEY
const headers = {
'X-API-Key': apiKey,
'Content-Type': 'application/json'
}
// Product sync
const r = http.get(`${BASE}/api/v1/products?since=${Date.now() - 3600000}`,
{ headers })
check(r, { 'api: 200': (r) => r.status === 200 })
}
Pareto Distribution (80/20)
Real traffic: 20% of pages get 80% of traffic:
// Pareto distribution generator for IDs
function paretoId(maxId, shape = 1.5) {
// Power law: most requests to popular IDs
const u = Math.random()
return Math.ceil(maxId * Math.pow(1 - u, 1 / shape))
}
// Usage
const productId = paretoId(10000) // mostly IDs 1-200, rarely ID 9000+
Recording Real Traffic for Replay
# Record real requests to HAR file via Nginx
# nginx.conf
log_format har_format escape=json
'{"startedDateTime":"$time_iso8601",'
'"request":{"method":"$request_method","url":"$request_uri",'
'"headers":{"Authorization":"$http_authorization"}},'
'"response":{"status":$status}}';
access_log /var/log/nginx/har.log har_format;
# Convert to k6 scenario
npm install -g har-to-k6
har-to-k6 nginx-har.log -o tests/recorded-traffic.js
Traffic Heatmap by Hour
# Set load profile from real traffic data
HOURLY_WEIGHTS = {
0: 0.2, 1: 0.1, 2: 0.1, 3: 0.1, 4: 0.1, 5: 0.15,
6: 0.3, 7: 0.5, 8: 0.7, 9: 0.9, 10: 1.0, 11: 1.0,
12: 0.95, 13: 0.9, 14: 0.85, 15: 0.85, 16: 0.9, 17: 0.95,
18: 1.0, 19: 0.95, 20: 0.9, 21: 0.75, 22: 0.5, 23: 0.35
}
BASE_VUS = 100 # VU at peak hour
def generate_k6_stages():
stages = []
for hour in range(24):
vus = int(BASE_VUS * HOURLY_WEIGHTS[hour])
stages.append(f'{{ duration: "1h", target: {vus} }}')
return ',\n '.join(stages)
print(f"stages: [\n {generate_k6_stages()}\n]")
Timeline
Developing realistic load test scenario based on real traffic analysis—2–3 business days.







