AI Automated Sports Statistics Generation

We design and deploy artificial intelligence systems: from prototype to production-ready solutions. Our team combines expertise in machine learning, data engineering and MLOps to make AI work not in the lab, but in real business.
Showing 1 of 1 servicesAll 1566 services
AI Automated Sports Statistics Generation
Medium
~2-4 weeks
FAQ
AI Development Areas
AI Solution Development Stages
Latest works
  • image_web-applications_feedme_466_0.webp
    Development of a web application for FEEDME
    1161
  • image_ecommerce_furnoro_435_0.webp
    Development of an online store for the company FURNORO
    1041
  • image_logo-advance_0.png
    B2B Advance company logo design
    561
  • image_crm_enviok_479_0.webp
    Development of a web application for Enviok
    823
  • image_logo-aider_0.jpg
    AIDER company logo development
    762
  • image_crm_chasseurs_493_0.webp
    CRM development for Chasseurs
    848

Automatic generation of sports statistics using AI

Match statistics—number of shots, ball possession, player distances, passing networks—traditionally require manual marking or expensive optical systems like Hawk-Eye. AI based on video from standard cameras delivers 80–90% the accuracy of professional systems at a fraction of the cost.

Key metrics and calculation methods

import numpy as np
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Optional

@dataclass
class PlayerStats:
    player_id: int
    team: str
    distance_m: float = 0.0
    sprint_count: int = 0      # спринты > 25 км/ч
    max_speed_kmh: float = 0.0
    touches: int = 0
    shots: int = 0
    passes_attempted: int = 0
    passes_completed: int = 0
    heatmap: np.ndarray = field(default_factory=lambda: np.zeros((68, 105)))

class SportsStatisticsEngine:
    def __init__(self, fps: float, sport: str = 'football'):
        self.fps = fps
        self.sport = sport
        self.player_stats: dict[int, PlayerStats] = {}
        self.possession_log = []  # (frame_idx, team_with_ball)
        self.event_log = []

        # Пороги скорости (м/с на кадр → км/ч)
        self.sprint_threshold = 25 / 3.6 / fps  # м/кадр
        self.jogging_threshold = 11 / 3.6 / fps

    def update(self, frame_idx: int, tracked_players: list,
                ball_pos: Optional[tuple]):
        for player in tracked_players:
            pid = player['track_id']
            team = player.get('team', 'unknown')
            pos = player.get('field_pos')

            if pid not in self.player_stats:
                self.player_stats[pid] = PlayerStats(
                    player_id=pid, team=team
                )

            stats = self.player_stats[pid]

            # Расстояние и скорость
            if hasattr(stats, '_prev_pos') and stats._prev_pos and pos:
                dist = np.sqrt((pos[0]-stats._prev_pos[0])**2 +
                                (pos[1]-stats._prev_pos[1])**2)
                stats.distance_m += dist

                # Скорость в км/ч
                speed_ms = dist * fps
                speed_kmh = speed_ms * 3.6

                if speed_kmh > stats.max_speed_kmh:
                    stats.max_speed_kmh = speed_kmh

                if speed_kmh > 25:
                    stats.sprint_count += 1

            stats._prev_pos = pos

            # Тепловая карта
            if pos:
                hm_x = int(np.clip(pos[0], 0, 104))
                hm_y = int(np.clip(pos[1], 0, 67))
                stats.heatmap[hm_y, hm_x] += 1

        # Владение мячом
        if ball_pos:
            possessing_team = self._determine_possession(
                ball_pos, tracked_players
            )
            self.possession_log.append((frame_idx, possessing_team))

    def _determine_possession(self, ball_pos: tuple,
                                players: list) -> Optional[str]:
        """Ближайший игрок к мячу = владеет мячом"""
        if not players or not ball_pos:
            return None

        min_dist = float('inf')
        possessing_team = None

        for player in players:
            pos = player.get('field_pos')
            if pos is None:
                continue
            dist = np.sqrt((ball_pos[0]-pos[0])**2 + (ball_pos[1]-pos[1])**2)
            if dist < min_dist:
                min_dist = dist
                possessing_team = player.get('team')

        # Игрок ближе 2м = владеет
        return possessing_team if min_dist < 2.0 else None

    def compute_possession_stats(self) -> dict:
        total = len(self.possession_log)
        if total == 0:
            return {'team_a': 50.0, 'team_b': 50.0}

        counts = defaultdict(int)
        for _, team in self.possession_log:
            if team:
                counts[team] += 1

        contested = total - sum(counts.values())
        return {
            team: round(count / total * 100, 1)
            for team, count in counts.items()
        }

    def generate_match_report(self) -> dict:
        report = {
            'possession': self.compute_possession_stats(),
            'players': {}
        }

        for pid, stats in self.player_stats.items():
            report['players'][pid] = {
                'team': stats.team,
                'distance_km': round(stats.distance_m / 1000, 2),
                'sprint_count': stats.sprint_count,
                'max_speed_kmh': round(stats.max_speed_kmh, 1),
                'shots': stats.shots,
                'passes_attempted': stats.passes_attempted,
                'pass_accuracy': (stats.passes_completed /
                                   max(stats.passes_attempted, 1) * 100)
            }

        return report

Automatic generation of text reports

from transformers import pipeline

class MatchReportGenerator:
    def __init__(self):
        # GPT или локальная LLM для генерации текстового отчёта
        self.generator = pipeline('text-generation',
                                   model='mistralai/Mistral-7B-Instruct-v0.2',
                                   device=0)

    def generate_narrative(self, stats: dict, match_info: dict) -> str:
        prompt = f"""Сгенерируй краткий аналитический отчёт матча.
        Матч: {match_info['team_a']} vs {match_info['team_b']}
        Счёт: {match_info['score']}
        Владение: {stats['possession']}
        Топ по пробегу: {self._top_runners(stats['players'])}
        Удары по воротам: {sum(p['shots'] for p in stats['players'].values())}

        Напиши профессиональный аналитический текст на 3-4 предложения."""

        result = self.generator(prompt, max_new_tokens=200,
                                 temperature=0.7, do_sample=True)
        return result[0]['generated_text'].split(prompt)[-1].strip()

Accuracy of automated statistics vs. professional systems

Metrics AI by video Hawk-Eye/Opta
Player's Run (Error) ±5–8% ±1–2%
Ball possession ±3–5% ±1%
Number of hits ±10–15% ±2%
Definition of a pass ±15–20% ±3%
Maximum speed ±8–12% ±2%

For amateur and semi-professional competitions, this is quite sufficient. For professional leagues, additional cameras and calibration improve accuracy to an acceptable level.

Project type Term
Basic Stats (Mileage + Ownership) 4–7 weeks
Complete statistical platform 8–14 weeks