project_harrier/scripts/benchmark.py at main · RobotFlow-Labs/project_harrier · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
"""Latency benchmark for HARRIER predictors.

Runs N warmup + M timed iterations against a single image or a
directory of EO/IR samples and reports p50/p95/mean latency plus
throughput. Intended for both CUDA (server) and MLX (Mac) hosts.
"""

from __future__ import annotations

import argparse
import statistics
import sys
import time
from pathlib import Path

_REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(_REPO_ROOT / "src"))

from anima_harrier.data.indraeye import IndraEyeModality  # noqa: E402
from anima_harrier.inference import HarrierPredictor, PredictRequest  # noqa: E402


def _iter_sources(source: Path) -> list[Path]:
    if source.is_file():
        return [source]
    return sorted(
        path
        for path in source.rglob("*")
        if path.is_file() and path.suffix.lower() in {".jpg", ".jpeg", ".png", ".bmp"}
    )


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(prog="anima-harrier-benchmark")
    parser.add_argument("--weights", type=Path, required=True)
    parser.add_argument("--source", type=Path, required=True)
    parser.add_argument("--modality", choices=[m.value for m in IndraEyeModality], required=True)
    parser.add_argument("--warmup", type=int, default=5)
    parser.add_argument("--iters", type=int, default=50)
    parser.add_argument("--backend", default="auto", choices=["auto", "cuda", "mlx", "cpu"])
    args = parser.parse_args(argv)

    sources = _iter_sources(args.source)
    if not sources:
        print(f"[benchmark] no sources found under {args.source}", file=sys.stderr)
        return 2

    predictor = HarrierPredictor(weights=args.weights, backend=args.backend)
    predictor.load()
    modality = IndraEyeModality(args.modality)

    latencies_ms: list[float] = []
    idx = 0
    total = args.warmup + args.iters
    while idx < total:
        src = sources[idx % len(sources)]
        request = PredictRequest(modality=modality, source=src)
        start = time.time()
        predictor.predict(request)
        elapsed_ms = (time.time() - start) * 1000.0
        if idx >= args.warmup:
            latencies_ms.append(elapsed_ms)
        idx += 1

    mean = statistics.mean(latencies_ms)
    p50 = statistics.median(latencies_ms)
    p95 = statistics.quantiles(latencies_ms, n=20)[18] if len(latencies_ms) >= 20 else max(latencies_ms)
    throughput = 1000.0 / mean if mean else 0.0
    print("HARRIER latency benchmark")
    print(f"  backend       : {predictor.backend_info.resolved}")
    print(f"  samples       : {len(latencies_ms)} timed (+{args.warmup} warmup)")
    print(f"  mean_ms       : {mean:.2f}")
    print(f"  p50_ms        : {p50:.2f}")
    print(f"  p95_ms        : {p95:.2f}")
    print(f"  throughput_fps: {throughput:.2f}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())