Automatic generation of sports statistics using AI
Match statistics—number of shots, ball possession, player distances, passing networks—traditionally require manual marking or expensive optical systems like Hawk-Eye. AI based on video from standard cameras delivers 80–90% the accuracy of professional systems at a fraction of the cost.
Key metrics and calculation methods
import numpy as np
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class PlayerStats:
player_id: int
team: str
distance_m: float = 0.0
sprint_count: int = 0 # спринты > 25 км/ч
max_speed_kmh: float = 0.0
touches: int = 0
shots: int = 0
passes_attempted: int = 0
passes_completed: int = 0
heatmap: np.ndarray = field(default_factory=lambda: np.zeros((68, 105)))
class SportsStatisticsEngine:
def __init__(self, fps: float, sport: str = 'football'):
self.fps = fps
self.sport = sport
self.player_stats: dict[int, PlayerStats] = {}
self.possession_log = [] # (frame_idx, team_with_ball)
self.event_log = []
# Пороги скорости (м/с на кадр → км/ч)
self.sprint_threshold = 25 / 3.6 / fps # м/кадр
self.jogging_threshold = 11 / 3.6 / fps
def update(self, frame_idx: int, tracked_players: list,
ball_pos: Optional[tuple]):
for player in tracked_players:
pid = player['track_id']
team = player.get('team', 'unknown')
pos = player.get('field_pos')
if pid not in self.player_stats:
self.player_stats[pid] = PlayerStats(
player_id=pid, team=team
)
stats = self.player_stats[pid]
# Расстояние и скорость
if hasattr(stats, '_prev_pos') and stats._prev_pos and pos:
dist = np.sqrt((pos[0]-stats._prev_pos[0])**2 +
(pos[1]-stats._prev_pos[1])**2)
stats.distance_m += dist
# Скорость в км/ч
speed_ms = dist * fps
speed_kmh = speed_ms * 3.6
if speed_kmh > stats.max_speed_kmh:
stats.max_speed_kmh = speed_kmh
if speed_kmh > 25:
stats.sprint_count += 1
stats._prev_pos = pos
# Тепловая карта
if pos:
hm_x = int(np.clip(pos[0], 0, 104))
hm_y = int(np.clip(pos[1], 0, 67))
stats.heatmap[hm_y, hm_x] += 1
# Владение мячом
if ball_pos:
possessing_team = self._determine_possession(
ball_pos, tracked_players
)
self.possession_log.append((frame_idx, possessing_team))
def _determine_possession(self, ball_pos: tuple,
players: list) -> Optional[str]:
"""Ближайший игрок к мячу = владеет мячом"""
if not players or not ball_pos:
return None
min_dist = float('inf')
possessing_team = None
for player in players:
pos = player.get('field_pos')
if pos is None:
continue
dist = np.sqrt((ball_pos[0]-pos[0])**2 + (ball_pos[1]-pos[1])**2)
if dist < min_dist:
min_dist = dist
possessing_team = player.get('team')
# Игрок ближе 2м = владеет
return possessing_team if min_dist < 2.0 else None
def compute_possession_stats(self) -> dict:
total = len(self.possession_log)
if total == 0:
return {'team_a': 50.0, 'team_b': 50.0}
counts = defaultdict(int)
for _, team in self.possession_log:
if team:
counts[team] += 1
contested = total - sum(counts.values())
return {
team: round(count / total * 100, 1)
for team, count in counts.items()
}
def generate_match_report(self) -> dict:
report = {
'possession': self.compute_possession_stats(),
'players': {}
}
for pid, stats in self.player_stats.items():
report['players'][pid] = {
'team': stats.team,
'distance_km': round(stats.distance_m / 1000, 2),
'sprint_count': stats.sprint_count,
'max_speed_kmh': round(stats.max_speed_kmh, 1),
'shots': stats.shots,
'passes_attempted': stats.passes_attempted,
'pass_accuracy': (stats.passes_completed /
max(stats.passes_attempted, 1) * 100)
}
return report
Automatic generation of text reports
from transformers import pipeline
class MatchReportGenerator:
def __init__(self):
# GPT или локальная LLM для генерации текстового отчёта
self.generator = pipeline('text-generation',
model='mistralai/Mistral-7B-Instruct-v0.2',
device=0)
def generate_narrative(self, stats: dict, match_info: dict) -> str:
prompt = f"""Сгенерируй краткий аналитический отчёт матча.
Матч: {match_info['team_a']} vs {match_info['team_b']}
Счёт: {match_info['score']}
Владение: {stats['possession']}
Топ по пробегу: {self._top_runners(stats['players'])}
Удары по воротам: {sum(p['shots'] for p in stats['players'].values())}
Напиши профессиональный аналитический текст на 3-4 предложения."""
result = self.generator(prompt, max_new_tokens=200,
temperature=0.7, do_sample=True)
return result[0]['generated_text'].split(prompt)[-1].strip()
Accuracy of automated statistics vs. professional systems
| Metrics | AI by video | Hawk-Eye/Opta |
|---|---|---|
| Player's Run (Error) | ±5–8% | ±1–2% |
| Ball possession | ±3–5% | ±1% |
| Number of hits | ±10–15% | ±2% |
| Definition of a pass | ±15–20% | ±3% |
| Maximum speed | ±8–12% | ±2% |
For amateur and semi-professional competitions, this is quite sufficient. For professional leagues, additional cameras and calibration improve accuracy to an acceptable level.
| Project type | Term |
|---|---|
| Basic Stats (Mileage + Ownership) | 4–7 weeks |
| Complete statistical platform | 8–14 weeks |







