AI Anomaly Detection in Video for Mobile Apps
Anomaly detection is a problem with unclear labels for "bad" events. You cannot enumerate all possible anomalies in advance, especially in security or production monitoring systems. This leads to an architectural choice: unsupervised or semi-supervised approaches, unlike classical object detection.
Defining Anomaly: Problem Statement
Before coding, establish clear requirements with stakeholders:
- Spatial anomaly: object in a zone where it shouldn't be (person in server room)
- Behavioral anomaly: normal object acting unusually (person running where everyone walks; car driving backwards)
- Temporal anomaly: event happening at wrong time (movement during non-work hours)
- Technical anomaly: equipment malfunctioning (vibration, smoke, sparks)
Each type requires different architecture.
Architecture: Two-Level Approach
// iOS: combined anomaly detector
class VideoAnomalyDetector {
// Level 1: deterministic rules (fast, cheap)
private let rulesEngine: AnomalyRulesEngine
// Level 2: AI for non-obvious anomalies (only if rules pass)
private let aiDetector: MLAnomalyDetector
func analyze(frame: CVPixelBuffer, timestamp: Date) async -> AnomalyResult {
// Rules first—faster and more accurate for known scenarios
let ruleViolations = rulesEngine.check(frame: frame, timestamp: timestamp)
if !ruleViolations.isEmpty {
return AnomalyResult(detected: true,
type: .ruleViolation,
violations: ruleViolations,
confidence: 1.0) // deterministic
}
// AI for unknown patterns
return await aiDetector.detect(frame: frame, timestamp: timestamp)
}
}
Deterministic Rules
class AnomalyRulesEngine {
struct RestrictedZone {
let polygon: [CGPoint] // normalized coordinates
let schedule: WorkSchedule? // nil = always restricted
let name: String
}
private let restrictedZones: [RestrictedZone]
private let personDetector: VNCoreMLModel // light YOLOv8n
func check(frame: CVPixelBuffer, timestamp: Date) -> [RuleViolation] {
let persons = detectPersons(frame)
var violations: [RuleViolation] = []
for person in persons {
let personCenter = person.boundingBox.center
for zone in restrictedZones {
// Cannot enter zone outside schedule
if zone.polygon.contains(personCenter) {
if let schedule = zone.schedule, !schedule.isActive(at: timestamp) {
violations.append(RuleViolation(
type: .unauthorizedZoneAccess,
zone: zone.name,
timestamp: timestamp
))
} else if zone.schedule == nil {
violations.append(RuleViolation(type: .restrictedZone, zone: zone.name))
}
}
}
}
return violations
}
}
AI Detection: Autoencoder for Behavioral Anomalies
For behavioral anomalies, use an autoencoder approach: train on normal behavior; anomaly = high reconstruction error.
# Train autoencoder on normal video clips
import torch
import torch.nn as nn
class VideoAnomalyAutoencoder(nn.Module):
"""
Input tensor: [batch, frames, height, width, channels]
Train only on NORMAL scenes
Anomaly: reconstruction_error > threshold
"""
def __init__(self, input_shape=(16, 64, 64, 3)):
super().__init__()
self.encoder = nn.Sequential(
nn.Conv3d(3, 32, kernel_size=(3,3,3), padding=1),
nn.ReLU(),
nn.MaxPool3d((1,2,2)),
nn.Conv3d(32, 64, kernel_size=(3,3,3), padding=1),
nn.ReLU(),
nn.MaxPool3d((2,2,2)),
)
self.decoder = nn.Sequential(
nn.ConvTranspose3d(64, 32, kernel_size=(3,3,3),
stride=(2,2,2), padding=1, output_padding=1),
nn.ReLU(),
nn.ConvTranspose3d(32, 3, kernel_size=(3,3,3),
stride=(1,2,2), padding=1, output_padding=(0,1,1)),
nn.Sigmoid()
)
def forward(self, x):
z = self.encoder(x)
return self.decoder(z)
def anomaly_score(self, x):
reconstructed = self(x)
# MSE across space and time
return ((x - reconstructed) ** 2).mean(dim=[1,2,3,4])
Threshold determined on validation set of normal scenes (99th percentile of reconstruction error → normal boundary).
On mobile, this autoencoder converts to CoreML / TFLite. Size is significantly smaller than YOLOv8: 5–15 MB.
Mobile Inference: Sliding Window Processing
// iOS: analyze video stream with sliding 16-frame window
class SlidingWindowAnalyzer {
private var frameBuffer: CircularBuffer<CVPixelBuffer> = CircularBuffer(capacity: 16)
private var frameCounter = 0
private let stepSize = 8 // new window every 8 frames (50% overlap)
func addFrame(_ frame: CVPixelBuffer) async -> AnomalyScore? {
frameBuffer.append(frame)
frameCounter += 1
// Analyze every stepSize frames
guard frameCounter % stepSize == 0,
frameBuffer.count == 16 else { return nil }
return try? await computeAnomalyScore(frames: Array(frameBuffer))
}
private func computeAnomalyScore(frames: [CVPixelBuffer]) async throws -> AnomalyScore {
let tensor = prepareTensor(frames) // [1, 16, 64, 64, 3]
let output = try autoencoderModel.prediction(input: tensor)
let score = output.anomalyScore.floatValue
return AnomalyScore(
value: score,
isAnomaly: score > anomalyThreshold,
frameWindow: frames
)
}
}
Alerts and Response
// Android: multi-level alert system
sealed class AnomalyAlert {
data class Warning(val message: String, val score: Float) : AnomalyAlert()
data class Critical(val message: String, val violations: List<RuleViolation>) : AnomalyAlert()
}
class AlertManager(private val notificationManager: NotificationManager) {
private val cooldownMap = mutableMapOf<String, Long>()
private val alertCooldownMs = 30_000L // don't spam: max once per 30 sec
fun emit(alert: AnomalyAlert, alertKey: String) {
val lastAlertTime = cooldownMap[alertKey] ?: 0L
if (System.currentTimeMillis() - lastAlertTime < alertCooldownMs) return
cooldownMap[alertKey] = System.currentTimeMillis()
when (alert) {
is AnomalyAlert.Warning -> showLocalNotification(alert.message, priority = LOW)
is AnomalyAlert.Critical -> {
showLocalNotification(alert.message, priority = HIGH)
sendWebhook(alert) // integration with external security system
}
}
}
}
Timeline Estimates
Zone violation detection with deterministic rules (no AI autoencoder) takes 1–2 weeks. Full system with autoencoder for behavioral anomalies, sliding window, multi-level alerts, integration with security/monitoring system, and iOS + Android support requires 2–4 weeks plus time to collect normal data and train the model.







