Skip to content

Commit f9b5a61

Browse files
feat: did a couple things idk
1 parent 65e01d0 commit f9b5a61

File tree

314 files changed

+7128
-983
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

314 files changed

+7128
-983
lines changed

PROJECTS/advanced/ai-threat-detection/.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ©AngelaMos | 2026
22
# .gitignore
33

4-
# Planning docs
4+
# dev docs
55
.angelusvigil/
66

77
# Environment

PROJECTS/advanced/ai-threat-detection/backend/app/api/models_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ async def _get_active_models(
5252
Query all active model metadata records
5353
"""
5454
query = select(ModelMetadata).where(
55-
ModelMetadata.is_active == True # noqa: E712
55+
ModelMetadata.is_active == True # type: ignore[arg-type] # noqa: E712
5656
)
5757
rows = (await session.execute(query)).scalars().all()
5858
return [{

PROJECTS/advanced/ai-threat-detection/backend/app/core/detection/inference.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@
66
import json
77
import logging
88
from pathlib import Path
9+
from typing import Any
910

1011
import numpy as np
1112

1213
try:
1314
import onnxruntime as ort
1415
except ImportError:
15-
ort = None # type: ignore[assignment]
16+
ort = None
1617

1718
logger = logging.getLogger(__name__)
1819

@@ -33,9 +34,9 @@ class InferenceEngine:
3334
"""
3435

3536
def __init__(self, model_dir: str) -> None:
36-
self._ae_session: ort.InferenceSession | None = None # type: ignore[union-attr]
37-
self._rf_session: ort.InferenceSession | None = None # type: ignore[union-attr]
38-
self._if_session: ort.InferenceSession | None = None # type: ignore[union-attr]
37+
self._ae_session: ort.InferenceSession | None = None
38+
self._rf_session: ort.InferenceSession | None = None
39+
self._if_session: ort.InferenceSession | None = None
3940
self._scaler_center: np.ndarray | None = None
4041
self._scaler_scale: np.ndarray | None = None
4142
self._threshold: float = 0.0
@@ -127,12 +128,12 @@ def _scale_for_ae(self, batch: np.ndarray) -> np.ndarray:
127128
"""
128129
if self._scaler_center is None or self._scaler_scale is None:
129130
return batch
130-
return (batch - self._scaler_center) / self._scaler_scale
131+
return (batch - self._scaler_center) / self._scaler_scale # type: ignore[no-any-return]
131132

132133
@staticmethod
133134
def _extract_rf_proba(
134-
ort_output: list | np.ndarray
135-
) -> np.ndarray: # type: ignore[type-arg]
135+
ort_output: list[Any] | np.ndarray
136+
) -> np.ndarray:
136137
"""
137138
Extract attack probability from skl2onnx RF output format.
138139

PROJECTS/advanced/ai-threat-detection/backend/app/core/features/mappings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@
9393
"file_extension": EXTENSION_MAP,
9494
}
9595

96+
WINDOWED_FEATURE_NAMES: list[str] = FEATURE_ORDER[23:]
97+
9698
BOOLEAN_FEATURES: frozenset[str] = frozenset({
9799
"has_encoded_chars",
98100
"has_double_encoding",

PROJECTS/advanced/ai-threat-detection/backend/app/factory.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from collections.abc import AsyncIterator
1010
from contextlib import asynccontextmanager
1111
from pathlib import Path
12+
from typing import TYPE_CHECKING
1213

1314
from fastapi import FastAPI
1415
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
@@ -24,6 +25,9 @@
2425
from app.models import model_metadata as _model_metadata_reg # noqa: F401
2526
from app.models import threat_event as _threat_event_reg # noqa: F401
2627

28+
if TYPE_CHECKING:
29+
from app.core.detection.inference import InferenceEngine
30+
2731
logger = logging.getLogger(__name__)
2832

2933

@@ -110,7 +114,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
110114
logger.info("AngelusVigil shut down cleanly")
111115

112116

113-
def _load_inference_engine() -> object | None:
117+
def _load_inference_engine() -> InferenceEngine | None:
114118
"""
115119
Attempt to load the ONNX inference engine from the
116120
configured model directory, returning None if ML

PROJECTS/advanced/ai-threat-detection/backend/app/models/model_metadata.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ class ModelMetadata(TimestampedModel, table=True):
1818
__table_args__ = (Index(
1919
"idx_model_metadata_active",
2020
"model_type",
21-
unique=True,
2221
postgresql_where=text("is_active = TRUE"),
2322
), )
2423

PROJECTS/advanced/ai-threat-detection/backend/cli/main.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
main.py
44
"""
55

6+
import asyncio
7+
import dataclasses
68
from pathlib import Path
79

810
import typer
@@ -22,6 +24,44 @@
2224
DEFAULT_SERVER_URL = "http://localhost:8000"
2325

2426

27+
async def _write_metadata(
28+
model_dir: Path,
29+
training_samples: int,
30+
metrics: dict[str, object],
31+
mlflow_run_id: str | None,
32+
threshold: float | None,
33+
) -> None:
34+
"""
35+
Persist training metadata to the database
36+
"""
37+
from app.config import settings
38+
from ml.metadata import save_model_metadata
39+
from sqlalchemy.ext.asyncio import (
40+
AsyncSession,
41+
async_sessionmaker,
42+
create_async_engine,
43+
)
44+
45+
engine = create_async_engine(settings.database_url)
46+
try:
47+
factory = async_sessionmaker(
48+
engine,
49+
class_=AsyncSession,
50+
expire_on_commit=False,
51+
)
52+
async with factory() as session:
53+
await save_model_metadata(
54+
session,
55+
model_dir=model_dir,
56+
training_samples=training_samples,
57+
metrics=metrics,
58+
mlflow_run_id=mlflow_run_id,
59+
threshold=threshold,
60+
)
61+
finally:
62+
await engine.dispose()
63+
64+
2565
@app.command()
2666
def serve(
2767
host: str = typer.Option("0.0.0.0", help="Bind address"),
@@ -92,9 +132,10 @@ def train(
92132
)
93133
raise typer.Exit(code=1)
94134

95-
from ml.data_loader import load_csic_dataset
135+
from ml.data_loader import load_csic_dataset, load_csic_normal
96136

97137
normal_path = csic_dir / "normalTrafficTraining.txt"
138+
normal_test_path = csic_dir / "normalTrafficTest.txt"
98139
attack_path = csic_dir / "anomalousTrafficTest.txt"
99140
typer.echo(f"Loading CSIC data from {csic_dir}")
100141
X_csic, y_csic = load_csic_dataset(
@@ -106,6 +147,14 @@ def train(
106147
f" CSIC: {len(X_csic)} samples"
107148
)
108149

150+
if normal_test_path.exists():
151+
X_extra, y_extra = load_csic_normal(normal_test_path)
152+
X_parts.append(X_extra)
153+
y_parts.append(y_extra)
154+
typer.echo(
155+
f" CSIC normal test: {len(X_extra)} samples"
156+
)
157+
109158
if synthetic_normal > 0 or synthetic_attack > 0:
110159
from ml.synthetic import generate_mixed_dataset
111160

@@ -143,6 +192,25 @@ def train(
143192
)
144193
result = orch.run(X, y)
145194

195+
try:
196+
metrics: dict[str, object] = (
197+
dataclasses.asdict(result.ensemble_metrics)
198+
if result.ensemble_metrics else {}
199+
)
200+
asyncio.run(_write_metadata(
201+
Path(output_dir),
202+
int(len(X)),
203+
metrics,
204+
result.mlflow_run_id,
205+
result.ae_metrics.get("ae_threshold"),
206+
))
207+
typer.echo(" Model metadata saved to database")
208+
except Exception as exc:
209+
typer.echo(
210+
f" Warning: could not save metadata to DB: {exc}",
211+
err=True,
212+
)
213+
146214
typer.echo(f"Models exported to {output_dir}")
147215
if result.ensemble_metrics is not None:
148216
typer.echo(

PROJECTS/advanced/ai-threat-detection/backend/ml/autoencoder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ def encode(self, x: Tensor) -> Tensor:
5151
"""
5252
Compress input through the encoder to the 6-dim bottleneck.
5353
"""
54-
return self.encoder(x)
54+
return self.encoder(x) # type: ignore[no-any-return]
5555

5656
def decode(self, z: Tensor) -> Tensor:
5757
"""
5858
Reconstruct input from the bottleneck representation.
5959
"""
60-
return self.decoder(z)
60+
return self.decoder(z) # type: ignore[no-any-return]
6161

6262
def forward(self, x: Tensor) -> Tensor:
6363
"""

PROJECTS/advanced/ai-threat-detection/backend/ml/data_loader.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from app.core.features.encoder import encode_for_inference
1515
from app.core.features.extractor import extract_request_features
16+
from app.core.features.mappings import WINDOWED_FEATURE_NAMES
1617
from app.core.ingestion.parsers import ParsedLogEntry
1718

1819
logger = logging.getLogger(__name__)
@@ -26,21 +27,6 @@
2627
_DEFAULT_UA = ("Mozilla/5.0 (compatible; Konqueror/3.5; Linux)"
2728
" KHTML/3.5.8 (like Gecko)")
2829

29-
_WINDOWED_FEATURE_NAMES: list[str] = [
30-
"req_count_1m",
31-
"req_count_5m",
32-
"req_count_10m",
33-
"error_rate_5m",
34-
"unique_paths_5m",
35-
"unique_uas_10m",
36-
"method_entropy_5m",
37-
"avg_response_size_5m",
38-
"status_diversity_5m",
39-
"path_depth_variance_5m",
40-
"inter_request_time_mean",
41-
"inter_request_time_std",
42-
]
43-
4430

4531
@dataclass
4632
class CSICRequest:
@@ -192,7 +178,7 @@ def load_csic_dataset(
192178
entry = csic_to_parsed_entry(req)
193179
features = extract_request_features(entry)
194180

195-
for name in _WINDOWED_FEATURE_NAMES:
181+
for name in WINDOWED_FEATURE_NAMES:
196182
features[name] = 0.0
197183

198184
vector = encode_for_inference(features)
@@ -211,3 +197,32 @@ def load_csic_dataset(
211197
)
212198

213199
return X, y
200+
201+
202+
def load_csic_normal(
203+
path: Path,
204+
) -> tuple[np.ndarray, np.ndarray]:
205+
"""
206+
Load a CSIC 2010 normal traffic file and return (X, y) arrays
207+
with all labels set to 0
208+
"""
209+
reqs = parse_csic_file(path, label=0)
210+
211+
vectors: list[list[float]] = []
212+
for req in reqs:
213+
entry = csic_to_parsed_entry(req)
214+
features = extract_request_features(entry)
215+
for name in WINDOWED_FEATURE_NAMES:
216+
features[name] = 0.0
217+
vectors.append(encode_for_inference(features))
218+
219+
X = np.array(vectors, dtype=np.float32)
220+
y = np.zeros(len(vectors), dtype=np.int32)
221+
222+
logger.info(
223+
"Loaded %d normal samples from %s",
224+
len(vectors),
225+
path.name,
226+
)
227+
228+
return X, y

PROJECTS/advanced/ai-threat-detection/backend/ml/download_csic.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import logging
88
import sys
99
from pathlib import Path
10-
from urllib.request import urlretrieve
10+
11+
import httpx
1112

1213
logger = logging.getLogger(__name__)
1314

@@ -26,24 +27,6 @@
2627
MIN_FILE_BYTES = 1_000_000
2728

2829

29-
def _progress_hook(
30-
block_num: int,
31-
block_size: int,
32-
total_size: int,
33-
) -> None:
34-
"""
35-
Print download progress to stdout
36-
"""
37-
downloaded = block_num * block_size
38-
if total_size > 0:
39-
pct = min(downloaded * 100 / total_size, 100)
40-
sys.stdout.write(f"\r {pct:.0f}%")
41-
else:
42-
mb = downloaded / 1_048_576
43-
sys.stdout.write(f"\r {mb:.1f} MB")
44-
sys.stdout.flush()
45-
46-
4730
def _compute_sha256(path: Path) -> str:
4831
"""
4932
Compute SHA-256 hash of a file
@@ -73,7 +56,31 @@ def download_csic(output_dir: Path = DATASET_DIR, ) -> None:
7356
print(f"Downloading {filename}...")
7457

7558
try:
76-
urlretrieve(url, dest, reporthook=_progress_hook)
59+
with httpx.stream(
60+
"GET",
61+
url,
62+
follow_redirects=True,
63+
) as response:
64+
response.raise_for_status()
65+
total = int(
66+
response.headers.get("content-length", 0)
67+
)
68+
downloaded = 0
69+
with open(dest, "wb") as f:
70+
for chunk in response.iter_bytes(
71+
chunk_size=65536
72+
):
73+
f.write(chunk)
74+
downloaded += len(chunk)
75+
if total > 0:
76+
pct = min(
77+
downloaded * 100 / total, 100
78+
)
79+
sys.stdout.write(f"\r {pct:.0f}%")
80+
else:
81+
mb = downloaded / 1_048_576
82+
sys.stdout.write(f"\r {mb:.1f} MB")
83+
sys.stdout.flush()
7784
print()
7885
except Exception as exc:
7986
logger.error(

0 commit comments

Comments
 (0)