Development of a Bot Parser for Competitor Promotions and Discounts
Monitoring competitor promotions allows quick reaction: launch a counter-promotion, prepare own offer, or adjust marketing calendar. Promotions are more complex for parsing than prices: they have start/end dates, conditions, discount types.
What is Tracked
- Percentage discounts — "30% off entire catalog"
- Promotional prices — specific price for specific product
- Promo codes — public competitor promo codes
- Time-limited promotions — flash-sale, weekend sales
- Loyalty programs — cashback, bonus points
- Bundles — "2+1 free", discounted sets
Promotion Page Parser
// app/Services/PromotionScraper/PromotionPageScraper.php
class PromotionPageScraper
{
public function scrapePromotionsPage(string $url): array
{
$html = $this->fetch($url);
$crawler = new Crawler($html);
$promotions = [];
// Standard promotion blocks
$crawler->filter('.promotion-card, .sale-block, .promo-item, [data-promo]')
->each(function (Crawler $node) use (&$promotions) {
$promo = $this->extractPromotion($node);
if ($promo) $promotions[] = $promo;
});
// If structured blocks not found — parse by text
if (empty($promotions)) {
$promotions = $this->extractFromText($crawler->text(), $url);
}
return $promotions;
}
private function extractPromotion(Crawler $node): ?array
{
$title = $node->filter('h2, h3, .promo-title, .sale-title')->first()->text('');
if (empty(trim($title))) return null;
$description = $node->filter('p, .promo-desc')->first()->text('');
$link = $node->filter('a')->first()->attr('href') ?? '';
// Extract dates from text
$dates = $this->extractDates($title . ' ' . $description);
// Extract discount percentage
$discount = $this->extractDiscount($title . ' ' . $description);
// Look for promo code in text
$promoCode = $this->extractPromoCode($title . ' ' . $description);
return [
'title' => trim($title),
'description' => trim($description),
'discount_pct'=> $discount,
'promo_code' => $promoCode,
'starts_at' => $dates['start'] ?? null,
'ends_at' => $dates['end'] ?? null,
'url' => $link,
];
}
private function extractDiscount(string $text): ?int
{
// "discount 30%", "−30%", "30% OFF", "up to 50% off"
if (preg_match('/[-–]?\s*(\d{1,3})\s*%/u', $text, $m)) {
return (int) $m[1];
}
return null;
}
private function extractPromoCode(string $text): ?string
{
// Promo code usually uppercase, 4-12 characters, sometimes in quotes or after "promo code"
if (preg_match('/promo(?:code)?[:\s]+([A-Z0-9_-]{3,15})/ui', $text, $m)) {
return strtoupper($m[1]);
}
if (preg_match('/promo(?:code)?[:\s]+([A-Z0-9_-]{3,15})/i', $text, $m)) {
return strtoupper($m[1]);
}
// Words in quotes resembling promo code
if (preg_match('/[«"\'"]([A-Z0-9_-]{4,12})[»"\'"]/u', $text, $m)) {
return strtoupper($m[1]);
}
return null;
}
private function extractDates(string $text): array
{
$dates = [];
// "from 01.03 to 31.03", "until 31 March", "01.03.2025 - 15.03.2025"
$monthMap = [
'january'=>'01','february'=>'02','march'=>'03','april'=>'04',
'may'=>'05','june'=>'06','july'=>'07','august'=>'08',
'september'=>'09','october'=>'10','november'=>'11','december'=>'12',
];
$pattern = '/(\d{1,2})\s+(' . implode('|', array_keys($monthMap)) . ')/ui';
if (preg_match_all($pattern, $text, $matches, PREG_SET_ORDER)) {
foreach ($matches as $i => $match) {
$day = sprintf('%02d', $match[1]);
$month = $monthMap[mb_strtolower($match[2])];
$year = date('Y');
$date = "{$year}-{$month}-{$day}";
if ($i === 0) $dates['start'] = $date;
if ($i === 1) $dates['end'] = $date;
}
}
return $dates;
}
}
Monitoring Discounts on Specific Products
// app/Services/PromotionScraper/ProductSaleDetector.php
class ProductSaleDetector
{
public function detectSale(string $html): ?SaleInfo
{
$crawler = new Crawler($html);
// Look for both old and new price simultaneously
$originalPriceNode = $crawler->filter(
'.original-price, .old-price, del, [data-original-price], s'
)->first();
$salePriceNode = $crawler->filter(
'.sale-price, .special-price, .discount-price, [data-sale-price]'
)->first();
if (!$originalPriceNode->count() || !$salePriceNode->count()) {
return null;
}
$originalPrice = $this->parsePrice($originalPriceNode->text());
$salePrice = $this->parsePrice($salePriceNode->text());
if ($originalPrice <= 0 || $salePrice <= 0 || $salePrice >= $originalPrice) {
return null;
}
$discountPct = round((1 - $salePrice / $originalPrice) * 100);
// Promotion validity period
$endDate = null;
$countdownNode = $crawler->filter('.countdown, [data-countdown], .sale-ends');
if ($countdownNode->count()) {
$endDate = $countdownNode->first()->attr('data-end-date')
?? $this->extractDateFromText($countdownNode->first()->text());
}
return new SaleInfo(
originalPrice: $originalPrice,
salePrice: $salePrice,
discountPct: $discountPct,
endsAt: $endDate,
);
}
}
Storage and History of Promotions
// Migration
Schema::create('competitor_promotions', function (Blueprint $table) {
$table->id();
$table->foreignId('competitor_id')->constrained();
$table->string('title');
$table->text('description')->nullable();
$table->integer('discount_pct')->nullable();
$table->string('promo_code')->nullable();
$table->string('source_url');
$table->date('starts_at')->nullable();
$table->date('ends_at')->nullable();
$table->boolean('is_active')->default(true);
$table->json('affected_categories')->nullable();
$table->timestamp('first_seen_at');
$table->timestamp('last_seen_at');
$table->timestamps();
$table->index(['competitor_id', 'is_active', 'ends_at']);
});
// app/Jobs/ScrapeCompetitorPromotions.php
class ScrapeCompetitorPromotions implements ShouldQueue
{
public function handle(PromotionPageScraper $scraper): void
{
$competitor = Competitor::findOrFail($this->competitorId);
$promotionUrls = $competitor->promotion_urls ?? [];
$currentPromos = [];
foreach ($promotionUrls as $url) {
$scraped = $scraper->scrapePromotionsPage($url);
$currentPromos = array_merge($currentPromos, $scraped);
sleep(rand(2, 4));
}
// Deactivate promotions that no longer exist
CompetitorPromotion::where('competitor_id', $this->competitorId)
->where('is_active', true)
->whereNotIn('source_url', array_column($currentPromos, 'url'))
->update(['is_active' => false]);
// Update or create promotions
foreach ($currentPromos as $promo) {
CompetitorPromotion::updateOrCreate(
[
'competitor_id' => $this->competitorId,
'source_url' => $promo['url'],
],
[
'title' => $promo['title'],
'description' => $promo['description'],
'discount_pct' => $promo['discount_pct'],
'promo_code' => $promo['promo_code'],
'starts_at' => $promo['starts_at'],
'ends_at' => $promo['ends_at'],
'is_active' => true,
'last_seen_at' => now(),
'first_seen_at' => now(), // updateOrCreate will save on create
]
);
}
// Notification about new major promotions
$newBigSales = CompetitorPromotion::where('competitor_id', $this->competitorId)
->where('first_seen_at', '>=', now()->subMinutes(10))
->where('discount_pct', '>=', 20)
->get();
if ($newBigSales->isNotEmpty()) {
Notification::route('slack', config('monitoring.slack_url'))
->notify(new BigCompetitorSaleNotification($competitor, $newBigSales));
}
}
}
Schedule
// Promotions — more frequently, especially Fri/weekends
$schedule->command('scrape:promotions')
->everyTwoHours()->withoutOverlapping();
// Before weekends — increased frequency
$schedule->command('scrape:promotions')
->fridays()->at('09:00');
Development timeframe: promotions parser for 3-5 competitors with history, Slack notifications and dashboard — 5-8 business days.







