Developing a Product Review Scraper Bot
Reviews from marketplaces and aggregators are valuable content for product cards: increase trust, add keywords to UGC, impact SEO through structured data. A scraper collects reviews, normalizes them, and imports them into the store database.
Sources and Access Methods
| Platform | Method | Features |
|---|---|---|
| Wildberries | JSON API | Open API, pagination |
| Ozon | Playwright | SPA, requires auth |
| Google Reviews | Places API | Paid, official |
| iHerb | HTML / JSON API | Structured HTML |
Wildberries: Parsing via JSON API
# scraper/reviews/wildberries.py
import httpx
import asyncio
from dataclasses import dataclass
@dataclass
class Review:
external_id: str
product_nm_id: int
author: str
rating: int
text: str
pros: str | None
cons: str | None
date: str
helpful_count: int
class WildberriesReviewScraper:
REVIEWS_URL = "https://feedbacks2.wb.ru/feedbacks/v2/{nm_id}"
async def get_reviews(self, nm_id: int, take: int = 100) -> list[Review]:
all_reviews = []
skip = 0
while True:
params = {"immt": nm_id, "skip": skip, "take": take, "order": "dateDesc"}
resp = await self.client.get(self.REVIEWS_URL.format(nm_id=nm_id), params=params)
resp.raise_for_status()
data = resp.json()
feedbacks = data.get("feedbacks", [])
if not feedbacks:
break
for fb in feedbacks:
all_reviews.append(self._normalize(nm_id, fb))
skip += take
await asyncio.sleep(1.0)
if skip >= 1000:
break
return all_reviews
Laravel Job with Duplicate Processing
// app/Jobs/ImportProductReviews.php
class ImportProductReviews implements ShouldQueue
{
public int $tries = 3;
public function handle(ReviewImportService $service): void
{
$mapping = ProductReviewMapping::where('product_id', $this->productId)->firstOrFail();
$reviews = $this->scrape($mapping->external_id);
$imported = $skipped = 0;
foreach ($reviews as $reviewData) {
// Deduplication by external_id + source
$exists = ProductReview::where([
'source' => $this->source,
'external_id' => $reviewData['external_id'],
])->exists();
if ($exists) {
$skipped++;
continue;
}
$service->import($this->productId, $this->source, $reviewData);
$imported++;
}
Log::info("Reviews imported", [
'product_id' => $this->productId,
'imported' => $imported,
'skipped' => $skipped,
]);
}
}
Moderation and Filtering
class ReviewImportService
{
private array $stopWords = ['buy', 'discount', 'promo', 'vk.com', 't.me'];
public function import(int $productId, string $source, array $data): ?ProductReview
{
// Filter too short reviews
if (mb_strlen($data['text']) < 20) return null;
// Filter stop words (spam)
foreach ($this->stopWords as $word) {
if (mb_stripos($data['text'], $word) !== false) return null;
}
return ProductReview::create([
'product_id' => $productId,
'source' => $source,
'external_id' => $data['external_id'],
'author' => $this->anonymizeAuthor($data['author']),
'rating' => max(1, min(5, (int) $data['rating'])),
'text' => $this->sanitize($data['text']),
'verified' => $data['verified'] ?? false,
'status' => 'pending',
]);
}
}
Development Timeline
One marketplace parser with moderation and structured data: 3-5 business days.







