Full-Text Search Implementation for Web Applications
Full-text search — searching by word meaning, not exact match. LIKE '%query%' doesn't scale and doesn't understand morphology: "buy", "bought", "buying" — different strings. FTS handles all three.
PostgreSQL FTS: Built-in Option
For most projects built-in PostgreSQL FTS covers the task without external services.
Schema preparation:
ALTER TABLE products
ADD COLUMN search_vector TSVECTOR
GENERATED ALWAYS AS (
to_tsvector('russian',
coalesce(title, '') || ' ' ||
coalesce(description, '') || ' ' ||
coalesce(brand, '')
)
) STORED;
CREATE INDEX idx_products_fts ON products USING GIN (search_vector);
GENERATED ALWAYS AS ... STORED — PostgreSQL 12+. Column updates automatically on INSERT/UPDATE, no trigger needed.
For multilingual search and different field weights:
-- Without GENERATED (flexible setup):
UPDATE products SET search_vector =
setweight(to_tsvector('russian', coalesce(title, '')), 'A') ||
setweight(to_tsvector('russian', coalesce(brand, '')), 'B') ||
setweight(to_tsvector('russian', coalesce(description, '')), 'C');
CREATE OR REPLACE FUNCTION products_search_update() RETURNS TRIGGER AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('russian', coalesce(NEW.title, '')), 'A') ||
setweight(to_tsvector('russian', coalesce(NEW.brand, '')), 'B') ||
setweight(to_tsvector('russian', coalesce(NEW.description, '')), 'C');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER products_search_trigger
BEFORE INSERT OR UPDATE ON products
FOR EACH ROW EXECUTE FUNCTION products_search_update();
Search:
-- Simple query
SELECT id, title,
ts_rank(search_vector, query) AS rank,
ts_headline('russian', description, query,
'MaxWords=30, MinWords=15, StartSel=<b>, StopSel=</b>'
) AS excerpt
FROM products,
plainto_tsquery('russian', 'wireless headphones') AS query
WHERE search_vector @@ query
ORDER BY rank DESC
LIMIT 20;
-- websearch_to_tsquery: supports "phrases", -exclusions, OR
SELECT id, title
FROM products
WHERE search_vector @@ websearch_to_tsquery('russian', '"wireless headphones" -wired')
ORDER BY ts_rank(search_vector, websearch_to_tsquery('russian', '"wireless headphones" -wired')) DESC;
ts_headline generates snippet with highlighted matches.
Elasticsearch: When External Engine Needed
PostgreSQL FTS is limited: no fuzzy search, no synonyms out-of-the-box, no facet aggregations. If all three needed — Elasticsearch or OpenSearch.
Index schema:
PUT /products
{
"settings": {
"analysis": {
"analyzer": {
"russian_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "russian_stop", "russian_stemmer"]
}
},
"filter": {
"russian_stop": {
"type": "stop",
"stopwords": "_russian_"
},
"russian_stemmer": {
"type": "stemmer",
"language": "russian"
}
}
}
},
"mappings": {
"properties": {
"title": { "type": "text", "analyzer": "russian_analyzer", "boost": 3 },
"brand": { "type": "text", "analyzer": "russian_analyzer", "boost": 2 },
"description": { "type": "text", "analyzer": "russian_analyzer" },
"category_id": { "type": "keyword" },
"price": { "type": "double" },
"status": { "type": "keyword" },
"created_at": { "type": "date" }
}
}
}
Search with facets:
POST /products/_search
{
"query": {
"bool": {
"must": {
"multi_match": {
"query": "wireless headphones",
"fields": ["title^3", "brand^2", "description"],
"type": "best_fields",
"fuzziness": "AUTO"
}
},
"filter": [
{ "term": { "status": "published" } },
{ "range": { "price": { "gte": 1000, "lte": 15000 } } }
]
}
},
"aggs": {
"by_brand": {
"terms": { "field": "brand.keyword", "size": 20 }
},
"price_stats": {
"stats": { "field": "price" }
}
},
"highlight": {
"fields": {
"title": { "number_of_fragments": 0 },
"description": { "fragment_size": 150, "number_of_fragments": 3 }
}
},
"from": 0,
"size": 20
}
Synchronization with PostgreSQL:
# Option 1: sync in service
async def create_product(data: ProductCreate, db: AsyncSession) -> Product:
product = Product(**data.dict())
db.add(product)
await db.flush() # get id
await es.index(
index='products',
id=str(product.id),
document=product_to_es_doc(product),
)
await db.commit()
return product
# Option 2: via CDC (Change Data Capture)
# Debezium reads PostgreSQL WAL and publishes events to Kafka
# Consumer subscribes and updates Elasticsearch
CDC approach is more reliable: data reaches ES even if service crashed.
Typed Client (Python)
from elasticsearch import AsyncElasticsearch
from pydantic import BaseModel
from typing import Any
class SearchResult(BaseModel):
id: str
score: float
title: str
price: float
highlight: dict[str, list[str]] = {}
async def search_products(
query: str,
category_id: int | None = None,
price_min: float | None = None,
price_max: float | None = None,
page: int = 1,
per_page: int = 20,
) -> tuple[list[SearchResult], int]:
es = AsyncElasticsearch(hosts=['http://localhost:9200'])
filters: list[dict[str, Any]] = [{"term": {"status": "published"}}]
if category_id:
filters.append({"term": {"category_id": category_id}})
if price_min or price_max:
filters.append({"range": {"price": {
**({"gte": price_min} if price_min else {}),
**({"lte": price_max} if price_max else {}),
}}})
body = {
"query": {
"bool": {
"must": {"multi_match": {
"query": query,
"fields": ["title^3", "brand^2", "description"],
"fuzziness": "AUTO",
}},
"filter": filters,
}
},
"highlight": {"fields": {"title": {}, "description": {"fragment_size": 150}}},
"from": (page - 1) * per_page,
"size": per_page,
}
resp = await es.search(index="products", body=body)
total = resp["hits"]["total"]["value"]
hits = [
SearchResult(
id=h["_id"],
score=h["_score"],
highlight=h.get("highlight", {}),
**h["_source"],
)
for h in resp["hits"]["hits"]
]
return hits, total
Solution Selection
| PostgreSQL FTS | Elasticsearch/OpenSearch | Meilisearch | |
|---|---|---|---|
| Setup | Minutes | Hours–days | Minutes |
| Fuzzy search | Via extensions | Built-in | Built-in |
| Facets | Complex | Built-in | Built-in |
| Sync | Not needed | CDC or sync | CDC or sync |
| Infrastructure | Already exists | +JVM server | +Go server |
Timelines
PostgreSQL FTS (trigger, index, queries, highlight): 1 day. Elasticsearch with Russian analyzer, facets and CDC-sync via Debezium: 3–4 days.







