Implementing AI-Powered Semantic Search on Website
Traditional full-text search (PostgreSQL tsvector, Elasticsearch) searches by word matching. A user searches "how to pay" and finds articles with the word "pay", but misses "payment methods" or "account top-up". Semantic search works differently: it compares meaning vectors instead of strings.
How Vector Search Works
Text is transformed into a vector (embedding) — a numeric array of 768–3072 dimensions, where semantically similar texts have similar vectors. Distance between vectors (cosine or Euclidean) = semantic proximity.
"payment methods" → [0.12, -0.87, 0.34, ...]
"how to pay" → [0.11, -0.85, 0.31, ...] ← close
"borscht recipe" → [0.91, 0.23, -0.67, ...] ← far
Choosing an Embedding Model
| Model | Vector Size | Context | Speed | Cost |
|---|---|---|---|---|
| OpenAI text-embedding-3-small | 1536 | 8K tokens | Fast | $0.02/1M tokens |
| OpenAI text-embedding-3-large | 3072 | 8K tokens | Slower | $0.13/1M tokens |
| Cohere embed-multilingual-v3 | 1024 | 512 tokens | Fast | $0.10/1M tokens |
| BGE-M3 (self-hosted) | 1024 | 8K tokens | GPU-dependent | Free |
| nomic-embed-text (Ollama) | 768 | 8K tokens | CPU-possible | Free |
For multilingual content, text-embedding-3-large or Cohere multilingual give better results.
Vector Database
pgvector — PostgreSQL extension. Ideal if already using Postgres:
CREATE EXTENSION vector;
CREATE TABLE content_chunks (
id BIGSERIAL PRIMARY KEY,
content_id BIGINT REFERENCES content(id),
chunk_text TEXT NOT NULL,
chunk_index INT,
embedding vector(1536),
metadata JSONB
);
-- Index for ANN search (approximate nearest neighbor)
CREATE INDEX ON content_chunks USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
-- Or HNSW (better for most cases):
CREATE INDEX ON content_chunks USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
Qdrant — specialized vector DB with filtering:
const qdrant = new QdrantClient({ url: 'http://localhost:6333' });
await qdrant.createCollection('content', {
vectors: { size: 1536, distance: 'Cosine' },
optimizers_config: { indexing_threshold: 20000 },
hnsw_config: { m: 16, ef_construct: 100 },
});
Indexing Content
import OpenAI from 'openai';
const openai = new OpenAI();
function chunkText(text, options = { maxTokens: 400, overlap: 50 }) {
// Split by paragraphs, then combine up to maxTokens
const paragraphs = text.split(/\n{2,}/);
const chunks = [];
let current = '';
let currentTokens = 0;
for (const para of paragraphs) {
const paraTokens = estimateTokens(para);
if (currentTokens + paraTokens > options.maxTokens && current) {
chunks.push(current.trim());
// Overlap: take last N words
const words = current.split(' ');
current = words.slice(-options.overlap).join(' ') + ' ' + para;
currentTokens = estimateTokens(current);
} else {
current += (current ? '\n\n' : '') + para;
currentTokens += paraTokens;
}
}
if (current) chunks.push(current.trim());
return chunks;
}
async function indexContent(contentItem) {
const chunks = chunkText(contentItem.body);
// Batch embeddings (up to 2048 inputs at a time)
const batchSize = 100;
for (let i = 0; i < chunks.length; i += batchSize) {
const batch = chunks.slice(i, i + batchSize);
const { data: embeddings } = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: batch,
encoding_format: 'float',
});
// Save to pgvector
await db.query(`
INSERT INTO content_chunks (content_id, chunk_text, chunk_index, embedding, metadata)
SELECT $1, unnest($2::text[]), generate_series(0, $3), unnest($4::vector[]), $5
`, [
contentItem.id,
batch,
batch.length - 1,
embeddings.map(e => `[${e.embedding.join(',')}]`),
JSON.stringify({ title: contentItem.title, url: contentItem.url, type: contentItem.type }),
]);
}
}
Search
async function semanticSearch(query, options = {}) {
const {
limit = 10,
threshold = 0.7,
filter = {}, // { type: 'article', lang: 'en' }
hybrid = true, // Combine with full-text
} = options;
// Vectorize query
const { data: [{ embedding }] } = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: query,
});
let results;
if (hybrid) {
// Hybrid search: vector + full-text, RRF (Reciprocal Rank Fusion)
results = await db.query(`
WITH semantic AS (
SELECT
content_id,
chunk_text,
1 - (embedding <=> $1::vector) AS score,
ROW_NUMBER() OVER (ORDER BY embedding <=> $1::vector) AS rank
FROM content_chunks
WHERE metadata->>'type' = ANY($3::text[])
ORDER BY embedding <=> $1::vector
LIMIT 20
),
fulltext AS (
SELECT
id AS content_id,
body AS chunk_text,
ts_rank(to_tsvector('english', body), plainto_tsquery('english', $2)) AS score,
ROW_NUMBER() OVER (ORDER BY ts_rank(to_tsvector('english', body), plainto_tsquery('english', $2)) DESC) AS rank
FROM content
WHERE to_tsvector('english', body) @@ plainto_tsquery('english', $2)
LIMIT 20
)
SELECT
COALESCE(s.content_id, f.content_id) AS id,
COALESCE(s.chunk_text, f.chunk_text) AS text,
(
COALESCE(1.0 / (60 + s.rank), 0) +
COALESCE(1.0 / (60 + f.rank), 0)
) AS rrf_score
FROM semantic s
FULL OUTER JOIN fulltext f ON s.content_id = f.content_id
ORDER BY rrf_score DESC
LIMIT $4
`, [`[${embedding.join(',')}]`, query, Object.values(filter), limit]);
} else {
// Pure semantic search
results = await db.query(`
SELECT DISTINCT ON (content_id)
content_id,
chunk_text,
1 - (embedding <=> $1::vector) AS score
FROM content_chunks
WHERE 1 - (embedding <=> $1::vector) > $2
ORDER BY content_id, score DESC
LIMIT $3
`, [`[${embedding.join(',')}]`, threshold, limit]);
}
return results.rows;
}
Search UI Component
function SemanticSearch() {
const [query, setQuery] = useState('');
const [results, setResults] = useState([]);
const [loading, setLoading] = useState(false);
// Debounce requests
useEffect(() => {
if (query.length < 3) { setResults([]); return; }
const timer = setTimeout(async () => {
setLoading(true);
const res = await fetch(`/api/search?q=${encodeURIComponent(query)}`);
const data = await res.json();
setResults(data.results);
setLoading(false);
}, 300);
return () => clearTimeout(timer);
}, [query]);
return (
<div className="search-wrapper">
<input
value={query}
onChange={(e) => setQuery(e.target.value)}
placeholder="Search documentation..."
className="search-input"
/>
{loading && <Spinner />}
<ul className="search-results">
{results.map(r => (
<li key={r.id}>
<a href={r.url}>
<strong>{r.title}</strong>
<p>{highlightMatch(r.snippet, query)}</p>
<span className="score">{(r.score * 100).toFixed(0)}% match</span>
</a>
</li>
))}
</ul>
</div>
);
}
Reranking
After vector search, accuracy can be improved with a cross-encoder model:
import { CohereClient } from 'cohere-ai';
const cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
async function rerank(query, documents) {
const response = await cohere.rerank({
model: 'rerank-multilingual-v3.0',
query,
documents: documents.map(d => d.text),
topN: 5,
});
return response.results.map(r => ({
...documents[r.index],
rerankScore: r.relevanceScore,
}));
}
Image Search
For visual content — multimodal embeddings (CLIP, OpenAI Vision):
// Index image
async function indexImage(imageUrl) {
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: await generateImageCaption(imageUrl), // Vision API → text
});
// Save to same collection
}
Quality Monitoring
-- Queries with no results (expand knowledge base)
SELECT query, COUNT(*) as count
FROM search_logs
WHERE results_count = 0
GROUP BY query ORDER BY count DESC LIMIT 20;
-- Queries with low CTR (results irrelevant)
SELECT query, clicks / impressions AS ctr
FROM search_metrics
WHERE impressions > 100
ORDER BY ctr ASC LIMIT 20;
Timeline
- Semantic search on 10K documents with pgvector — 4–5 days
- Hybrid search (vector + full-text) — plus 1–2 days
- Reranking via Cohere Rerank — plus 1 day
- UI with result highlighting, analytics — plus 2–3 days
- Incremental reindexing on content update — plus 1–2 days







