Streaming AI Responses via Server-Sent Events (SSE)
Stream LLM responses token-by-token to browsers for real-time, responsive UX. No need to wait for complete response generation.
Server Implementation
import OpenAI from 'openai';
const openai = new OpenAI();
export async function POST(request) {
const { message } = await request.json();
const stream = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: message }],
stream: true,
});
return new Response(
(async function* () {
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
if (content) {
yield `data: ${JSON.stringify({ content })}\n\n`;
}
}
yield 'data: [DONE]\n\n';
})(),
{
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
},
}
);
}
Client Implementation
async function streamResponse(message, onChunk) {
const response = await fetch('/api/stream', {
method: 'POST',
body: JSON.stringify({ message }),
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
let fullResponse = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
const text = decoder.decode(value);
const lines = text.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = JSON.parse(line.slice(6));
if (data.content) {
fullResponse += data.content;
onChunk(data.content);
}
}
}
}
return fullResponse;
}
React Component
function StreamingChat() {
const [response, setResponse] = useState('');
const [streaming, setStreaming] = useState(false);
async function handleSubmit(message) {
setResponse('');
setStreaming(true);
await streamResponse(message, (chunk) => {
setResponse(prev => prev + chunk);
});
setStreaming(false);
}
return (
<div>
<textarea
onChange={(e) => handleSubmit(e.target.value)}
placeholder="Ask something..."
/>
<div className="response">
{response}
{streaming && <span className="cursor">|</span>}
</div>
</div>
);
}
Error Handling
async function robustStream(message, onChunk, onError) {
try {
await streamResponse(message, onChunk);
} catch (error) {
if (error.name === 'AbortError') {
onError('Connection interrupted');
} else {
onError(`Error: ${error.message}`);
}
}
}
Timeline
- Basic SSE streaming — 1–2 days
- Error handling + reconnect — 1 day
- UI polish (cursor, animations) — 1–2 days
- Performance optimization — 1 day







