fix report_to

Samoed · Samoed · commit 64d58646844a · 2026-02-09T01:13:56.000+03:00
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
@@ -7,4 +7,4 @@ jobs:
       - uses: actions/checkout@v4
       - uses: astral-sh/ruff-action@v2
         with:
-          version: "0.8.4"
+          version: "0.15.0"
diff --git a/src/autointent/_callbacks/__init__.py b/src/autointent/_callbacks/__init__.py
@@ -8,7 +8,7 @@
 
 REPORTERS = {cb.name: cb for cb in [WandbCallback, TensorBoardCallback, EmissionsTrackerCallback]}
 
-REPORTERS_NAMES = Literal[tuple(REPORTERS.keys())]  # type: ignore[valid-type]
+REPORTERS_NAMES = Literal[tuple(REPORTERS.keys()) + ("none",)]  # type: ignore[valid-type]
 
 
 def get_callbacks(reporters: list[str] | None) -> CallbackHandler:
@@ -25,6 +25,8 @@ def get_callbacks(reporters: list[str] | None) -> CallbackHandler:
 
     reporters_cb = []
     for reporter in reporters:
+        if reporter == "none":
+            continue
         if reporter not in REPORTERS:
             msg = f"Reporter {reporter} not supported. Supported reporters {','.join(REPORTERS)}"
             raise ValueError(msg)
diff --git a/src/autointent/_callbacks/tensorboard.py b/src/autointent/_callbacks/tensorboard.py
@@ -53,11 +53,11 @@ def start_module(self, module_name: str, num: int, module_kwargs: dict[str, Any]
         """
         module_run_name = f"{self.run_name}_{module_name}_{num}"
         log_dir = Path(self.dirpath) / module_run_name
-        self.module_writer = self.writer(log_dir=log_dir)  # type: ignore[no-untyped-call]
+        self.module_writer = self.writer(log_dir=log_dir)
 
-        self.module_writer.add_text("module_info", f"Starting module {module_name}_{num}")  # type: ignore[no-untyped-call]
+        self.module_writer.add_text("module_info", f"Starting module {module_name}_{num}")
         for key, value in module_kwargs.items():
-            self.module_writer.add_text(f"module_params/{key}", str(value))  # type: ignore[no-untyped-call]
+            self.module_writer.add_text(f"module_params/{key}", str(value))
 
     def log_value(self, **kwargs: dict[str, int | float | Any]) -> None:
         """Logs scalar or text values.
@@ -69,7 +69,7 @@ def log_value(self, **kwargs: dict[str, int | float | Any]) -> None:
             if isinstance(value, int | float):
                 self.module_writer.add_scalar(key, value)
             else:
-                self.module_writer.add_text(key, str(value))  # type: ignore[no-untyped-call]
+                self.module_writer.add_text(key, str(value))
 
     def log_metrics(self, metrics: dict[str, Any]) -> None:
         """Logs training metrics.
@@ -79,9 +79,9 @@ def log_metrics(self, metrics: dict[str, Any]) -> None:
         """
         for key, value in metrics.items():
             if isinstance(value, int | float):
-                self.module_writer.add_scalar(key, value)  # type: ignore[no-untyped-call]
+                self.module_writer.add_scalar(key, value)
             else:
-                self.module_writer.add_text(key, str(value))  # type: ignore[no-untyped-call]
+                self.module_writer.add_text(key, str(value))
 
     def log_final_metrics(self, metrics: dict[str, Any]) -> None:
         """Logs final metrics at the end of training.
@@ -97,13 +97,13 @@ def log_final_metrics(self, metrics: dict[str, Any]) -> None:
             raise RuntimeError(msg)
 
         log_dir = Path(self.dirpath) / "final_metrics"
-        self.module_writer = self.writer(log_dir=log_dir)  # type: ignore[no-untyped-call]
+        self.module_writer = self.writer(log_dir=log_dir)
 
         for key, value in metrics.items():
             if isinstance(value, int | float):
-                self.module_writer.add_scalar(key, value)  # type: ignore[no-untyped-call]
+                self.module_writer.add_scalar(key, value)
             else:
-                self.module_writer.add_text(key, str(value))  # type: ignore[no-untyped-call]
+                self.module_writer.add_text(key, str(value))
 
     def end_module(self) -> None:
         """Ends the current module and closes the TensorBoard writer.
@@ -115,8 +115,8 @@ def end_module(self) -> None:
             msg = "start_run must be called before end_module."
             raise RuntimeError(msg)
 
-        self.module_writer.add_text("module_info", "Ending module")  # type: ignore[no-untyped-call]
-        self.module_writer.close()  # type: ignore[no-untyped-call]
+        self.module_writer.add_text("module_info", "Ending module")
+        self.module_writer.close()
 
     def end_run(self) -> None:
         """Ends the current run. This method is currently a placeholder."""
diff --git a/src/autointent/configs/_optimization.py b/src/autointent/configs/_optimization.py
@@ -1,8 +1,9 @@
 """Configuration for the optimization process."""
 
 from pathlib import Path
+from typing import Literal
 
-from pydantic import BaseModel, ConfigDict, Field, PositiveInt
+from pydantic import BaseModel, ConfigDict, Field, PositiveInt, field_validator
 
 from autointent._callbacks import REPORTERS_NAMES
 from autointent.custom_types import FloatFromZeroToOne, SamplerType, ValidationScheme
@@ -57,7 +58,7 @@ class LoggingConfig(BaseModel):
     clear_ram: bool = Field(False, description="Whether to clear the RAM after dumping the modules")
     """Whether to clear the RAM after dumping the modules"""
     report_to: list[REPORTERS_NAMES] | None = Field(  # type: ignore[valid-type]
-        None, description="List of callbacks to report to. If None, no callbacks will be used"
+        ['none'], description="List of callbacks to report to. If None, no callbacks will be used"
     )
     log_interval_time: float = Field(
         0.1, description="Sampling interval for the system monitor in seconds for Wandb logger."
@@ -88,6 +89,13 @@ def get_run_name(self) -> str:
             self.run_name = get_run_name()
         return self.run_name
 
+    @field_validator("report_to")
+    def validate_report_to(cls, value: list[REPORTERS_NAMES] | None) -> list[REPORTERS_NAMES]:
+        """Validate the `report_to` field to ensure it is either 'none' or a list of valid reporter names."""
+        if value is None:
+            return ['none']  # since transformers v5 doesn't allow None for report_to
+        return value
+
 
 class HPOConfig(BaseModel):
     """Configuration for hyperparameter optimization using Optuna.
diff --git a/src/autointent/modules/scoring/_catboost/catboost_scorer.py b/src/autointent/modules/scoring/_catboost/catboost_scorer.py
@@ -218,7 +218,9 @@ def fit(
             **self.get_extra_params(),
         )
         self._model.fit(
-            dataset, labels, early_stopping_rounds=self.early_stopping_rounds if self.val_fraction is not None else None
+            dataset,
+            list(labels),  # datasets >4 would pass `Column` instead of list, which causes error in CatBoostClassifier
+            early_stopping_rounds=self.early_stopping_rounds if self.val_fraction is not None else None,
         )
 
     def predict(self, utterances: list[str]) -> npt.NDArray[np.float64]:
diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py
@@ -98,7 +98,7 @@ def test_cv(dataset, task_type):
     context = pipeline_optimizer.fit(dataset, refit_after=True)
     context.dump()
 
-    assert len(pipeline_optimizer.logging_config.dump_dir.iterdir()) > 0
+    assert len(list(pipeline_optimizer.logging_config.dump_dir.iterdir())) > 0
 
 
 @pytest.mark.parametrize(
@@ -161,7 +161,7 @@ def test_dump_modules(dataset, task_type):
     context = pipeline_optimizer.fit(dataset)
     context.dump()
 
-    assert pipeline_optimizer.logging_config.dump_dir.iterdir() > 0
+    assert len(list(pipeline_optimizer.logging_config.dump_dir.iterdir())) > 0
 
 
 @pytest.mark.parametrize(

Original file line number	Diff line number	Diff line change
`@@ -218,7 +218,9 @@ def fit(`
`218`	`218`	`**self.get_extra_params(),`
`219`	`219`	`)`
`220`	`220`	`self._model.fit(`
`221`		`- dataset, labels, early_stopping_rounds=self.early_stopping_rounds if self.val_fraction is not None else None`
	`221`	`+ dataset,`
	`222`	+ list(labels), # datasets >4 would pass `Column` instead of list, which causes error in CatBoostClassifier
	`223`	`+ early_stopping_rounds=self.early_stopping_rounds if self.val_fraction is not None else None,`
`222`	`224`	`)`
`223`	`225`
`224`	`226`	`def predict(self, utterances: list[str]) -> npt.NDArray[np.float64]:`