Implementing Automatic Product Card Creation from Scraped Data
Automatic product card creation is the final stage of the pipeline: data scraped from suppliers or marketplaces is transformed into complete product cards in the store. The task requires data normalization, matching with existing catalog, creating variable products, and publishing media.
General Pipeline Diagram
Raw Scraped Data (JSON)
↓
DataValidator – validates required fields
↓
DataNormalizer – normalizes fields, units, formats
↓
CategoryMatcher – determines category from data
↓
DuplicateChecker – checks if such product already exists
↓
VariantBuilder – assembles variable products from sizes/colors
↓
ImageProcessor – downloads and processes photos
↓
ProductCreator – writes to database via platform repository
↓
SEOGenerator – populates meta tags, slug
↓
PublicationDecider – draft / published based on settings
Input Data Validation
// app/Services/ProductImport/DataValidator.php
use Illuminate\Support\Facades\Validator;
class DataValidator
{
private array $rules = [
'sku' => 'required|string|max:100',
'name' => 'required|string|max:500',
'price' => 'required|numeric|min:0.01|max:9999999',
'description' => 'nullable|string',
'images' => 'nullable|array',
'images.*' => 'nullable|url',
'specs' => 'nullable|array',
'in_stock' => 'nullable|boolean',
];
public function validate(array $data): ValidationResult
{
$validator = Validator::make($data, $this->rules);
if ($validator->fails()) {
return ValidationResult::fail($validator->errors()->toArray());
}
// Additional business rules
if (isset($data['price']) && $data['price'] < config('import.min_price', 1)) {
return ValidationResult::fail(['price' => ['Price suspiciously low']]);
}
return ValidationResult::pass($validator->validated());
}
}
Category Matching
// app/Services/ProductImport/CategoryMatcher.php
class CategoryMatcher
{
public function match(array $productData): ?int
{
// Strategy 1: by supplier → category mapping
if ($supplierId = $productData['supplier_id'] ?? null) {
$mapping = SupplierCategoryMapping::where('supplier_id', $supplierId)
->where('supplier_category', $productData['category'] ?? '')
->first();
if ($mapping) return $mapping->local_category_id;
}
// Strategy 2: fuzzy search by category name
if ($categoryName = $productData['category'] ?? null) {
$category = Category::where('name', 'ilike', "%{$categoryName}%")->first();
if ($category) return $category->id;
}
// Strategy 3: by keywords in product name
return $this->matchByKeywords($productData['name']);
}
private function matchByKeywords(string $name): ?int
{
$rules = CategoryKeywordRule::orderBy('priority', 'desc')->get();
foreach ($rules as $rule) {
foreach ($rule->keywords as $keyword) {
if (mb_stripos($name, $keyword) !== false) {
return $rule->category_id;
}
}
}
return config('import.default_category_id');
}
}
Duplicate Detector
// app/Services/ProductImport/DuplicateChecker.php
class DuplicateChecker
{
public function findExisting(array $data): ?Product
{
// Exact match by supplier SKU
if ($supplierId = $data['supplier_id'] ?? null) {
$existing = Product::whereHas('supplierMappings', function ($q) use ($data, $supplierId) {
$q->where('supplier_id', $supplierId)
->where('supplier_sku', $data['sku']);
})->first();
if ($existing) return $existing;
}
// Match by EAN/GTIN
if ($gtin = $data['gtin'] ?? null) {
$existing = Product::where('gtin', $gtin)->first();
if ($existing) return $existing;
}
// Fuzzy match by name + brand (90% threshold)
if (($name = $data['name'] ?? null) && ($brand = $data['brand'] ?? null)) {
$candidates = Product::where('brand', $brand)
->get(['id', 'name']);
foreach ($candidates as $candidate) {
similar_text(
mb_strtolower($name),
mb_strtolower($candidate->name),
$pct
);
if ($pct >= 90) return $candidate;
}
}
return null;
}
}
Variant Builder
// app/Services/ProductImport/VariantBuilder.php
class VariantBuilder
{
/**
* Groups flat product list into configurable products with variants
*
* Example: 3 rows "T-shirt Nike S/red", "S/blue", "M/red"
* → 1 configurable product + 3 simple variants
*/
public function buildVariants(array $products): array
{
// Group by base name (without size/color)
$groups = [];
foreach ($products as $product) {
$baseKey = $this->extractBaseKey($product);
$groups[$baseKey][] = $product;
}
$result = [];
foreach ($groups as $baseKey => $variants) {
if (count($variants) === 1) {
$result[] = ['type' => 'simple', 'data' => $variants[0]];
} else {
$result[] = [
'type' => 'configurable',
'base' => $this->buildBase($variants),
'variants' => $variants,
];
}
}
return $result;
}
private function extractBaseKey(array $product): string
{
// Remove size and color patterns from name
$name = preg_replace('/\b(xs|s|m|l|xl|xxl|\d+\s*(см|мм|дюйм))/iu', '', $product['name']);
$name = preg_replace('/\b(красный|синий|чёрный|белый|зелёный|red|blue|black|white)/iu', '', $name);
// Group by SKU prefix if available
if (preg_match('/^([A-Z0-9]+)-/i', $product['sku'], $m)) {
return strtoupper($m[1]);
}
return trim($product['brand'] ?? '') . '|' . trim($name);
}
}
Main Product Card Creator Service
// app/Services/ProductImport/ProductCreator.php
class ProductCreator
{
public function create(array $data, string $type = 'simple'): Product
{
$slug = $this->generateUniqueSlug($data['name']);
$product = Product::create([
'type' => $type,
'sku' => $data['sku'],
'name' => $data['name'],
'slug' => $slug,
'description' => $data['description'] ?? '',
'price' => $data['price'],
'brand' => $data['brand'] ?? null,
'gtin' => $data['gtin'] ?? null,
'category_id' => $data['category_id'],
'status' => $data['auto_publish'] ? 'active' : 'draft',
'meta_title' => $this->generateMetaTitle($data),
'meta_description' => $this->generateMetaDesc($data),
]);
// Bind to supplier
if ($supplierId = $data['supplier_id'] ?? null) {
$product->supplierMappings()->create([
'supplier_id' => $supplierId,
'supplier_sku' => $data['supplier_sku'] ?? $data['sku'],
'supplier_url' => $data['source_url'] ?? null,
]);
}
// Attributes
if (!empty($data['specs'])) {
foreach ($data['specs'] as $key => $value) {
$attribute = ProductAttribute::firstOrCreate(['code' => $this->slug($key)], ['name' => $key]);
$product->attributeValues()->create([
'attribute_id' => $attribute->id,
'value' => $value,
]);
}
}
// Images (via Job)
if (!empty($data['images'])) {
DownloadAndAttachProductImages::dispatch($product->id, $data['images'])
->onQueue('image-processing');
}
return $product;
}
private function generateUniqueSlug(string $name): string
{
$base = \Str::slug($name);
$slug = $base;
$i = 1;
while (Product::where('slug', $slug)->exists()) {
$slug = "{$base}-{$i}";
$i++;
}
return $slug;
}
private function generateMetaTitle(array $data): string
{
$title = $data['name'];
if ($brand = $data['brand'] ?? null) {
$title = "{$brand} {$title}";
}
return mb_substr($title, 0, 70);
}
}
Orchestrating Job
// app/Jobs/CreateProductFromScrapedData.php
class CreateProductFromScrapedData implements ShouldQueue
{
public int $tries = 3;
public function handle(
DataValidator $validator,
CategoryMatcher $matcher,
DuplicateChecker $checker,
ProductCreator $creator
): void {
// Validation
$result = $validator->validate($this->rawData);
if (!$result->passes()) {
ImportLog::create([
'source' => $this->source,
'sku' => $this->rawData['sku'] ?? 'unknown',
'status' => 'validation_failed',
'errors' => $result->errors(),
]);
return;
}
$data = $result->data();
// Check for duplicates
if ($existing = $checker->findExisting($data)) {
// Update existing product, don't create duplicate
$existing->update(['price' => $data['price'], 'in_stock' => $data['in_stock']]);
ImportLog::create(['status' => 'updated', 'product_id' => $existing->id]);
return;
}
// Category matching
$data['category_id'] = $matcher->match($data);
// Creation
$product = $creator->create($data);
ImportLog::create([
'status' => 'created',
'product_id' => $product->id,
'source' => $this->source,
]);
}
}
Development Timeline
| Component | Timeline |
|---|---|
| Validator + normalizer | 1–2 days |
| Category matching | 1–2 days |
| Duplicate detector | 1 day |
| Variant builder | 2–3 days |
| Card creator + SEO | 1–2 days |
| Logging + import dashboard | 1–2 days |
| Total | 7–12 working days |







