Development of a Bot Parser for Supplier New Arrivals
A new arrivals parser solves a specific task: automatically detect products that did not exist before in the supplier's catalog and notify the team or directly import them into the store. The difference from a regular parser is the focus on delta: what appeared new since the last visit.
Strategies for Detecting New Items
By addition date — if the supplier's website shows the product appearance date:
// app/Services/NewArrivals/DateBasedDetector.php
class DateBasedDetector
{
public function detectNew(string $categoryUrl, \DateTimeInterface $since): array
{
$page = 1;
$newProducts = [];
do {
$items = $this->scrapePage($categoryUrl, $page);
$hasOlderItems = false;
foreach ($items as $item) {
$itemDate = $this->parseDate($item['date_added'] ?? '');
if ($itemDate && $itemDate < $since) {
$hasOlderItems = true;
break; // Only old products below
}
if (!$this->existsInDatabase($item['sku'])) {
$newProducts[] = $item;
}
}
$page++;
} while (!$hasOlderItems && count($items) > 0);
return $newProducts;
}
}
By "New Items" section — most suppliers have a separate URL:
// config/suppliers.php
'supplier_abc' => [
'new_arrivals_url' => 'https://supplier.ru/catalog/new/',
'new_arrivals_selector' => '.product-card',
'strategy' => 'new_section', // Parse only this section
],
By SKU comparison — universal method independent of website structure:
// app/Services/NewArrivals/SkuDiffDetector.php
class SkuDiffDetector
{
public function detect(int $supplierId, array $currentSkus): array
{
// Load previous SKU snapshot
$previousSnapshot = SupplierSnapshot::where('supplier_id', $supplierId)
->latest()
->first();
if (!$previousSnapshot) {
// First run — save as baseline, no new items
$this->saveSnapshot($supplierId, $currentSkus);
return [];
}
$previousSkus = $previousSnapshot->sku_list;
$newSkus = array_diff($currentSkus, $previousSkus);
$removedSkus = array_diff($previousSkus, $currentSkus);
// Update snapshot
$this->saveSnapshot($supplierId, $currentSkus);
// Log discontinued products
if (!empty($removedSkus)) {
Log::info("Supplier #{$supplierId}: removed SKUs", ['skus' => $removedSkus]);
SupplierProductsRemoved::dispatch($supplierId, $removedSkus);
}
return $newSkus;
}
private function saveSnapshot(int $supplierId, array $skus): void
{
SupplierSnapshot::create([
'supplier_id' => $supplierId,
'sku_list' => $skus,
'sku_count' => count($skus),
'captured_at' => now(),
]);
}
}
Full Detection and Processing Cycle
// app/Jobs/CheckSupplierNewArrivals.php
class CheckSupplierNewArrivals implements ShouldQueue
{
public int $tries = 3;
public int $timeout = 600;
public function handle(
SupplierScraper $scraper,
SkuDiffDetector $detector,
NewArrivalsNotifier $notifier
): void {
$supplier = Supplier::findOrFail($this->supplierId);
// Step 1: Get all SKUs from supplier website
$allProducts = $scraper->scrapeAllProductSkus($supplier);
$currentSkus = array_column($allProducts, 'sku');
// Step 2: Determine new SKUs
$newSkus = $detector->detect($this->supplierId, $currentSkus);
if (empty($newSkus)) {
Log::info("No new arrivals for supplier #{$this->supplierId}");
return;
}
// Step 3: Load details for new products
$newProducts = array_filter(
$allProducts,
fn($p) => in_array($p['sku'], $newSkus)
);
// Step 4: Notification
$notifier->notify($supplier, $newProducts);
// Step 5: Auto-import if configured
if ($supplier->auto_import_new_arrivals) {
foreach ($newProducts as $product) {
ImportNewSupplierProduct::dispatch($this->supplierId, $product)
->onQueue('imports');
}
} else {
// Save as "pending review"
foreach ($newProducts as $product) {
PendingImport::create([
'supplier_id' => $this->supplierId,
'data' => $product,
'status' => 'pending_review',
]);
}
}
Log::info("Found new arrivals", [
'supplier_id' => $this->supplierId,
'count' => count($newProducts),
]);
}
}
New Arrivals Notifications
// app/Notifications/NewSupplierArrivalsNotification.php
class NewSupplierArrivalsNotification extends Notification implements ShouldQueue
{
use Queueable;
public function via($notifiable): array
{
return ['mail', 'slack'];
}
public function toMail($notifiable): MailMessage
{
return (new MailMessage)
->subject("New arrivals: {$this->supplier->name} ({$this->count} items)")
->line("Detected {$this->count} new products from **{$this->supplier->name}**")
->line("Detection date: " . now()->format('m.d.Y H:i'))
->action('View new items', route('admin.pending-imports.index', [
'supplier_id' => $this->supplier->id,
]))
->line('Products are waiting for review before publication.');
}
public function toSlack($notifiable): SlackMessage
{
return (new SlackMessage)
->content(
"🆕 *{$this->supplier->name}*: {$this->count} new products\n" .
implode("\n", array_map(
fn($p) => "• {$p['sku']} — {$p['name']}",
array_slice($this->products, 0, 10)
))
);
}
}
Queue for Manual Review
New products often require manual review: category verification, SEO description addition, photo verification. Moderator interface:
// app/Http/Controllers/Admin/PendingImportController.php
class PendingImportController extends Controller
{
public function index(Request $request): Response
{
$pending = PendingImport::query()
->with('supplier')
->when($request->supplier_id, fn($q, $id) => $q->where('supplier_id', $id))
->where('status', 'pending_review')
->orderBy('created_at', 'desc')
->paginate(50);
return Inertia::render('Admin/PendingImports/Index', [
'imports' => $pending,
]);
}
public function approve(PendingImport $import): RedirectResponse
{
ImportNewSupplierProduct::dispatch($import->supplier_id, $import->data);
$import->update(['status' => 'approved']);
return back()->with('success', 'Product sent to import');
}
public function reject(PendingImport $import, Request $request): RedirectResponse
{
$import->update([
'status' => 'rejected',
'reject_reason' => $request->reason,
]);
return back()->with('success', 'Product rejected');
}
}
Schedule
// Check new arrivals — every morning
$schedule->command('check:new-arrivals --all-suppliers')
->dailyAt('08:00')
->withoutOverlapping();
// Priority suppliers — more frequently
$schedule->command('check:new-arrivals --supplier=priority')
->everyFourHours()
->withoutOverlapping();
Snapshot Storage
Snapshots accumulate — old ones need cleanup:
// Remove snapshots older than 90 days, keep one per month
$schedule->command('snapshots:cleanup --keep-monthly --older-than=90')
->weekly();
Development timeframe: new arrivals detector for 1 supplier with notifications and review queue — 4-6 business days.







