AI-Based Background Removal from Images
The task of separating an object from the background divides into two classes: rough removal of rectangular background for e-commerce and precise alpha matting for hair, fur, semi-transparent objects. These tasks are technically different and solved by different tools.
Grounded-SAM2 — Modern Standard
Segment Anything Model 2 (Meta, 2024) combined with Grounding DINO provides state-of-the-art quality for most tasks. SAM2 is promptable: pass a point or bbox, get a mask.
import torch
import numpy as np
from PIL import Image
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
from groundingdino.util.inference import load_model, predict
def remove_background_grounded_sam2(
image_path: str,
text_prompt: str = 'product', # what to cut out
box_threshold: float = 0.3,
text_threshold: float = 0.25,
output_path: str = None
) -> Image.Image:
image = Image.open(image_path).convert('RGB')
image_np = np.array(image)
# Grounding DINO → object bbox by text
gdino_model = load_model(
'groundingdino/config/GroundingDINO_SwinT_OGC.py',
'weights/groundingdino_swint_ogc.pth'
)
boxes, _, _ = predict(
model=gdino_model,
image=image_np,
caption=text_prompt,
box_threshold=box_threshold,
text_threshold=text_threshold
)
if len(boxes) == 0:
raise ValueError(f'Object "{text_prompt}" not found')
# SAM2 → precise mask from bbox
sam2 = build_sam2(
'sam2_hiera_large.yaml',
'weights/sam2_hiera_large.pt',
device='cuda'
)
predictor = SAM2ImagePredictor(sam2)
predictor.set_image(image_np)
# Take most confident bbox
best_box = boxes[0].numpy() * np.array([
image_np.shape[1], image_np.shape[0],
image_np.shape[1], image_np.shape[0]
])
masks, scores, _ = predictor.predict(
box=best_box,
multimask_output=True
)
best_mask = masks[np.argmax(scores)] # (H, W) bool
# Apply mask → RGBA
result_rgba = np.dstack([image_np, best_mask.astype(np.uint8) * 255])
result = Image.fromarray(result_rgba, 'RGBA')
if output_path:
result.save(output_path, 'PNG')
return result
Alpha Matting for Complex Edges
Hair, fur, thin branches — SAM2 gives rough mask from bbox, edges become pixelated. For these cases, alpha matting is applied on top of SAM mask:
from pymatting import estimate_alpha_cf, estimate_foreground_ml
import cv2
def refine_mask_with_matting(
image: np.ndarray, # (H, W, 3) RGB
rough_mask: np.ndarray, # (H, W) bool from SAM
erosion_px: int = 10, # size of "uncertain" zone
dilation_px: int = 10
) -> np.ndarray:
"""
Mask refinement through closed-form matting.
Trimap: definite_fg=255, definite_bg=0, uncertain=128
"""
kernel = np.ones((erosion_px, erosion_px), np.uint8)
fg_mask = cv2.erode(
rough_mask.astype(np.uint8) * 255, kernel
)
bg_mask = cv2.dilate(
rough_mask.astype(np.uint8) * 255, kernel
)
trimap = np.full(rough_mask.shape, 128, dtype=np.uint8)
trimap[fg_mask > 0] = 255
trimap[bg_mask == 0] = 0
# Closed-form matting (Levin et al.)
image_float = image.astype(np.float64) / 255.0
trimap_float = trimap.astype(np.float64) / 255.0
alpha = estimate_alpha_cf(image_float, trimap_float)
# alpha ∈ [0, 1], edge values are fractional (transparency)
return (alpha * 255).astype(np.uint8)
Batch Processing for E-commerce
Online stores process thousands of product photos. Optimal scheme: REMBG (library based on U2-Net/IS-Net) for fast batch inference, SAM2 for edge cases.
from rembg import remove, new_session
from PIL import Image
from pathlib import Path
import concurrent.futures
def batch_remove_background(
input_dir: str,
output_dir: str,
model_name: str = 'isnet-general-use', # best for products
max_workers: int = 4
) -> dict:
"""
REMBG models: u2net, u2netp (faster, lower quality),
isnet-general-use (quality), isnet-anime (anime).
"""
session = new_session(model_name)
input_paths = list(Path(input_dir).glob('*.{jpg,jpeg,png,webp}'))
results = {'success': 0, 'failed': 0, 'errors': []}
def process_one(img_path: Path) -> bool:
try:
with open(img_path, 'rb') as f:
input_data = f.read()
output_data = remove(input_data, session=session)
out_path = Path(output_dir) / (img_path.stem + '.png')
with open(out_path, 'wb') as f:
f.write(output_data)
return True
except Exception as e:
results['errors'].append(str(e))
return False
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as ex:
futures = {ex.submit(process_one, p): p for p in input_paths}
for fut in concurrent.futures.as_completed(futures):
if fut.result():
results['success'] += 1
else:
results['failed'] += 1
return results
Tool Comparison
| Tool | Speed | Edge Quality | Hair/Fur | Application |
|---|---|---|---|---|
| REMBG (U2-Net) | 0.3–0.8s/img | Average | Poor | Fast batch |
| REMBG (IS-Net) | 0.5–1.2s/img | Good | Satisfactory | Products |
| SAM2 | 0.8–2s/img | Very good | Good | Precise segmentation |
| SAM2 + matting | 2–5s/img | Excellent | Excellent | Portraits, fur |
| BiMatting | 1–3s/img | Excellent | Excellent | Professional |
Timelines
| Task | Timeline |
|---|---|
| Background removal API service (REMBG) | 1–2 weeks |
| System with SAM2 + domain fine-tuning | 3–5 weeks |
| Full pipeline with matting and QA | 5–8 weeks |







