LLM API Backend Integration
Integration of Language Model APIs (OpenAI, Anthropic, Google) into website backend. Covers authentication, rate limiting, error handling, cost optimization.
API Integration Pattern
import OpenAI from 'openai';
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
timeout: 30000,
maxRetries: 2,
});
async function callLLM(messages, options = {}) {
const {
model = 'gpt-4o-mini',
temperature = 0.7,
max_tokens = 500,
} = options;
try {
const response = await openai.chat.completions.create({
model,
messages,
temperature,
max_tokens,
});
return {
success: true,
content: response.choices[0].message.content,
usage: response.usage,
};
} catch (error) {
if (error.status === 429) {
// Rate limited - retry after delay
await sleep(60000);
return callLLM(messages, options);
}
throw error;
}
}
Cost Optimization
// Cache expensive calls
const redis = require('redis').createClient();
async function cachedLLMCall(prompt, ttl = 86400) {
const cacheKey = `llm:${crypto.createHash('sha256').update(prompt).digest('hex')}`;
const cached = await redis.get(cacheKey);
if (cached) return JSON.parse(cached);
const result = await callLLM([{ role: 'user', content: prompt }]);
await redis.setex(cacheKey, ttl, JSON.stringify(result));
return result;
}
// Batch requests
async function batchLLMCalls(prompts) {
const results = await Promise.all(
prompts.map(p => callLLM([{ role: 'user', content: p }]))
);
return results;
}
Rate Limiting
import Bottleneck from 'bottleneck';
const limiter = new Bottleneck({
minTime: 100, // min 100ms between requests
maxConcurrent: 5, // max 5 concurrent
});
export async function limitedLLMCall(prompt, options) {
return limiter.schedule(() => callLLM([{ role: 'user', content: prompt }], options));
}
Error Handling
class LLMError extends Error {
constructor(message, originalError) {
super(message);
this.originalError = originalError;
this.isRetryable = [408, 429, 500, 502, 503].includes(originalError?.status);
}
}
async function robustLLMCall(prompt, maxRetries = 3) {
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
return await callLLM([{ role: 'user', content: prompt }]);
} catch (error) {
const llmError = new LLMError(`LLM call failed: ${error.message}`, error);
if (!llmError.isRetryable || attempt === maxRetries - 1) {
throw llmError;
}
const delay = Math.pow(2, attempt) * 1000; // exponential backoff
await sleep(delay);
}
}
}
Timeline
- Basic API integration — 1–2 days
- Rate limiting + caching — 1 day
- Error handling + retries — 1 day
- Cost monitoring — 1 day
- Load testing & optimization — 2–3 days







