-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.py
More file actions
97 lines (84 loc) · 3.01 KB
/
train.py
File metadata and controls
97 lines (84 loc) · 3.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
"""HARRIER YOLO26 training entry point.
Respects the ANIMA training rules:
- Dual backend: ``--backend auto|cuda|mlx|cpu``
- Artifacts disk only: everything goes under ``/mnt/artifacts-datai``
- ``--dry-run`` validates the plan without importing ultralytics
- ``--resume PATH`` for checkpoint resume
Usage:
python scripts/train.py --experiment eo_daynight_to_eo_day --dry-run
python scripts/train.py --experiment ir_daynight_to_ir_night \\
--backend cuda --batch 32
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
_REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(_REPO_ROOT / "src"))
from anima_harrier.experiments import YOLO26_EXPERIMENTS # noqa: E402
from anima_harrier.training import ( # noqa: E402
HarrierTrainer,
build_training_plan,
render_plan_report,
)
def _parse_args(argv: list[str] | None) -> argparse.Namespace:
parser = argparse.ArgumentParser(prog="anima-harrier-train")
parser.add_argument(
"--experiment",
required=True,
choices=sorted(YOLO26_EXPERIMENTS),
help="HARRIER YOLO26 experiment name.",
)
parser.add_argument(
"--configs-dir",
type=Path,
default=_REPO_ROOT / "configs",
)
parser.add_argument(
"--artifact-root",
type=Path,
default=None,
help="Override the artifacts disk root (defaults to /mnt/artifacts-datai).",
)
parser.add_argument(
"--run-name",
type=str,
default=None,
help="Optional run name; otherwise auto-derived from experiment + model.",
)
parser.add_argument("--backend", default="auto", choices=["auto", "cuda", "mlx", "cpu"])
parser.add_argument("--model", default=None, help="Override the YOLO26 model.")
parser.add_argument("--batch", type=int, default=16)
parser.add_argument("--resume", type=Path, default=None)
parser.add_argument(
"--dry-run",
action="store_true",
help="Render the plan and persist it to the artifacts disk without training.",
)
return parser.parse_args(argv)
def main(argv: list[str] | None = None) -> int:
args = _parse_args(argv)
plan = build_training_plan(
experiment_name=args.experiment,
configs_dir=args.configs_dir,
artifact_root=args.artifact_root,
run_name=args.run_name,
backend=args.backend,
model=args.model,
batch_size=args.batch,
resume_from=args.resume,
)
print(render_plan_report(plan))
trainer = HarrierTrainer(plan)
if args.dry_run:
outcome = trainer.dry_run()
print(f"\n[dry-run] {outcome.message}")
return 0 if outcome.success else 1
outcome = trainer.run()
print(f"\n[train] {outcome.message} (success={outcome.success})")
if outcome.best_checkpoint:
print(f"[train] best_checkpoint={outcome.best_checkpoint}")
return 0 if outcome.success else 2
if __name__ == "__main__":
raise SystemExit(main())