Implementing AI-Powered Product Recommendations for E-Commerce
Product recommendations are one of the most measurable investments in e-commerce: according to McKinsey, 35% of Amazon's revenue is generated through recommendations. Technically, it's more complex than content recommendations: must account for categories, product attributes, seasonality, price ranges, and stock availability.
Types of E-Commerce Recommendations
- "Similar products" — products with close attributes (product card)
- "Frequently bought together" — complementary items (cart, checkout)
- "Personal recommendations" — homepage, sections (purchase + view history)
- "Alternatives" — if product unavailable, suggest replacement
- "Post-purchase" — upsell, accessories, consumables
Product Data Structure for Embedding
function buildProductText(product) {
return [
product.name,
product.brand,
product.category + ' > ' + product.subcategory,
product.description?.slice(0, 500),
product.tags?.join(', '),
Object.entries(product.attributes || {})
.map(([k, v]) => `${k}: ${v}`)
.join(', '),
].filter(Boolean).join('\n');
}
// Indexing
async function indexProduct(product) {
if (!product.active || product.stock === 0) return; // skip inactive
const text = buildProductText(product);
const { data: [{ embedding }] } = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: text,
});
await db.query(`
INSERT INTO product_embeddings (product_id, embedding, updated_at)
VALUES ($1, $2::vector, NOW())
ON CONFLICT (product_id) DO UPDATE
SET embedding = $2::vector, updated_at = NOW()
`, [product.id, JSON.stringify(embedding)]);
}
Similar Products
async function getSimilarProducts(productId, options = {}) {
const { limit = 8, minPrice, maxPrice, inStockOnly = true } = options;
const result = await db.query(`
WITH source AS (
SELECT pe.embedding, p.price, p.category_id
FROM product_embeddings pe
JOIN products p ON p.id = pe.product_id
WHERE pe.product_id = $1
)
SELECT
p.id, p.name, p.slug, p.price,
p.main_image, p.rating, p.reviews_count,
1 - (pe.embedding <=> source.embedding) AS similarity
FROM product_embeddings pe
JOIN products p ON p.id = pe.product_id
CROSS JOIN source
WHERE pe.product_id != $1
AND p.active = true
AND ($2::boolean IS FALSE OR p.stock > 0)
AND ($3::numeric IS NULL OR p.price >= $3)
AND ($4::numeric IS NULL OR p.price <= $4)
ORDER BY pe.embedding <=> source.embedding
LIMIT $5
`, [productId, inStockOnly, minPrice || null, maxPrice || null, limit]);
return result.rows;
}
Association Rules: "Frequently Bought Together"
Market Basket Analysis via Apriori or FP-Growth on order history:
# Python: periodic update (cron daily)
from mlxtend.frequent_patterns import fpgrowth, association_rules
import pandas as pd
def compute_frequently_bought_together():
# Load orders
orders = fetch_orders_last_90_days() # [(order_id, product_id)]
# Create order-product matrix
basket = orders.groupby(['order_id', 'product_id'])['product_id'] \
.count().unstack().fillna(0)
basket = basket.map(lambda x: 1 if x > 0 else 0)
# FP-Growth
frequent_sets = fpgrowth(basket, min_support=0.005, use_colnames=True)
rules = association_rules(frequent_sets, metric='lift', min_threshold=1.5)
# Save to DB
for _, rule in rules.iterrows():
antecedent = list(rule['antecedents'])[0]
consequent = list(rule['consequents'])[0]
save_association(antecedent, consequent, rule['confidence'], rule['lift'])
// Node.js: get associations
async function getFrequentlyBoughtTogether(productId, limit = 4) {
const result = await db.query(`
SELECT
p.id, p.name, p.slug, p.price, p.main_image,
ar.confidence, ar.lift
FROM association_rules ar
JOIN products p ON p.id = ar.consequent_id
WHERE ar.antecedent_id = $1
AND p.active = true AND p.stock > 0
ORDER BY ar.lift DESC
LIMIT $2
`, [productId, limit]);
return result.rows;
}
Personal Recommendations via Matrix Factorization
# Train on implicit feedback
import implicit
from scipy.sparse import csr_matrix
def train_product_model(events):
# events: user_id, product_id, weight
# weight: view=1, add_to_cart=3, purchase=10, review=8
users_idx = {u: i for i, u in enumerate(events['user_id'].unique())}
items_idx = {p: i for i, p in enumerate(events['product_id'].unique())}
matrix = csr_matrix((
events['weight'],
(events['user_id'].map(users_idx), events['product_id'].map(items_idx))
))
model = implicit.als.AlternatingLeastSquares(factors=64, iterations=30)
model.fit(matrix.T) # item-user
return model, users_idx, items_idx
# Get recommendations for user
def get_personal_recs(user_id, model_data, n=12):
model, users_idx, items_idx = model_data
items_idx_rev = {v: k for k, v in items_idx.items()}
if user_id not in users_idx:
return [] # cold start — return trending
user_items = get_user_item_matrix_row(user_id, users_idx, items_idx)
ids, scores = model.recommend(users_idx[user_id], user_items, N=n)
return [{'product_id': items_idx_rev[i], 'score': float(s)} for i, s in zip(ids, scores)]
Cold Start: New Users and Products
New user — show bestsellers personalized by UTM/entry category:
async function getNewUserRecs(entryCategory, limit = 8) {
return db.query(`
SELECT p.*, ps.views_7d, ps.purchases_7d
FROM products p
JOIN product_stats ps ON ps.product_id = p.id
WHERE p.active = true AND p.stock > 0
AND ($1::text IS NULL OR p.category_slug = $1)
ORDER BY ps.purchases_7d DESC, ps.rating DESC
LIMIT $2
`, [entryCategory || null, limit]);
}
New product — content-based embedding works immediately, collaborative filtering catches up after 50–100 events.
Recommendation Diversity
A block of 8 identical laptops is bad. Need diversity:
function diversify(recommendations, diversityFactor = 0.3) {
const selected = [recommendations[0]];
const remaining = recommendations.slice(1);
while (selected.length < 8 && remaining.length > 0) {
// Find least similar to already selected
const scores = remaining.map(candidate => {
const maxSimilarity = Math.max(
...selected.map(s => categorySimilarity(s, candidate))
);
return {
item: candidate,
score: candidate.score * (1 - diversityFactor * maxSimilarity),
};
});
scores.sort((a, b) => b.score - a.score);
selected.push(scores[0].item);
remaining.splice(remaining.indexOf(scores[0].item), 1);
}
return selected;
}
function categorySimilarity(a, b) {
if (a.category_id === b.category_id) return 1;
if (a.parent_category_id === b.parent_category_id) return 0.5;
return 0;
}
A/B Testing Algorithms
async function getRecommendations(userId, productId) {
const variant = await getABVariant(userId, 'recs-algorithm');
switch (variant) {
case 'content-based':
return getSimilarProducts(productId);
case 'collaborative':
return getPersonalRecs(userId);
case 'hybrid':
return getHybridRecs(userId, productId);
default:
return getSimilarProducts(productId);
}
}
// Track recommendation conversion
async function trackRecommendationClick(userId, productId, position, algorithm) {
await db.query(`
INSERT INTO rec_events (user_id, product_id, position, algorithm, event_type, created_at)
VALUES ($1, $2, $3, $4, 'click', NOW())
`, [userId, productId, position, algorithm]);
}
Timeline
- Content-based similar products via pgvector — 3–4 days
- "Frequently bought together" via association rules — plus 2–3 days
- Personal recommendations (ALS) + Python service — plus 4–5 days
- Full system (all recommendation types + A/B + analytics) — 3–4 weeks







