Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions skpro/regression/compose/_ttr.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,14 @@ def _update(self, X, y, C=None):
-------
self : reference to self
"""
yt = self.transformer_(y)
if self.transformer_ is not None:
yt = self.transformer_.transform(X=y)
if not isinstance(yt, pd.DataFrame):
yt = pd.DataFrame(yt, index=y.index, columns=y.columns)
else:
yt.columns = y.columns
else:
yt = y
self.regressor_.update(X=X, y=yt, C=C)
return self

Expand All @@ -187,7 +194,10 @@ def _predict(self, X):
labels predicted for `X`
"""
y_pred = self.regressor_.predict(X=X)
y_pred_it = self.transformer_.inverse_transform(y_pred)
if self.transformer_ is None:
return y_pred

y_pred_it = self.transformer_.inverse_transform(X=y_pred)
if not isinstance(y_pred_it, pd.DataFrame):
y_cols = self._y_metadata["feature_names"]
y_pred_it = pd.DataFrame(y_pred_it, index=X.index, columns=y_cols)
Expand Down Expand Up @@ -218,6 +228,9 @@ def _predict_quantiles(self, X, alpha):
at quantile probability in second col index, for the row index.
"""
y_pred = self.regressor_.predict_quantiles(X=X, alpha=alpha)
if self.transformer_ is None:
return y_pred

y_pred_it = self._get_inverse_transform_pred_int(
transformer=self.transformer_, y=y_pred
)
Expand Down Expand Up @@ -253,6 +266,9 @@ def _predict_interval(self, X, coverage):
quantile predictions at alpha = 0.5 - c/2, 0.5 + c/2 for c in coverage.
"""
y_pred = self.regressor_.predict_interval(X=X, coverage=coverage)
if self.transformer_ is None:
return y_pred

y_pred_it = self._get_inverse_transform_pred_int(
transformer=self.transformer_, y=y_pred
)
Expand Down Expand Up @@ -302,6 +318,9 @@ def _predict_proba(self, X):
labels predicted for `X`
"""
y_pred = self.regressor_.predict_proba(X=X)
if self.transformer_ is None:
return y_pred

y_pred_it = TransformedDistribution(
distribution=y_pred,
transform=self.transformer_.inverse_transform,
Expand Down Expand Up @@ -416,6 +435,7 @@ def get_test_params(cls, parameter_set="default"):
from sklearn.preprocessing import StandardScaler

from skpro.regression.linear import DummyProbaRegressor
from skpro.regression.online import OnlineRefit
from skpro.survival.compose import ConditionUncensored

params1 = {
Expand All @@ -426,4 +446,8 @@ def get_test_params(cls, parameter_set="default"):
"regressor": ConditionUncensored.create_test_instance(),
"transformer": StandardScaler(),
}
return [params1, params2]
params3 = {
"regressor": OnlineRefit(DummyProbaRegressor()),
"transformer": None,
}
return [params1, params2, params3]
96 changes: 96 additions & 0 deletions skpro/regression/tests/test_ttr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Tests for transformed target regressor edge cases."""

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin

from skpro.distributions.normal import Normal
from skpro.regression.base import BaseProbaRegressor
from skpro.regression.compose import TransformedTargetRegressor
from skpro.regression.dummy import DummyProbaRegressor


class _ScaleTransformer(BaseEstimator, TransformerMixin):
"""Simple DataFrame-preserving transformer for target scaling."""

def __init__(self, factor=10.0):
self.factor = factor

def fit(self, X, y=None):
"""Fit and return self."""
return self

def transform(self, X):
"""Scale values by the configured factor."""
return X * self.factor

def fit_transform(self, X, y=None):
"""Fit and transform in one step."""
return self.fit(X=X, y=y).transform(X=X)

def inverse_transform(self, X):
"""Undo scaling."""
return X / self.factor


class _UpdateRecordingRegressor(BaseProbaRegressor):
"""Regressor stub that records the last observed targets."""

_tags = {"capability:update": True}

def _fit(self, X, y, C=None):
self.fit_y_ = y.copy()
self.update_y_ = None
return self

def _update(self, X, y, C=None):
self.update_y_ = y.copy()
return self

def _predict(self, X):
mean_val = float(self.fit_y_.iloc[:, 0].mean())
return pd.DataFrame({"target": np.repeat(mean_val, len(X))}, index=X.index)

def _predict_proba(self, X):
mean_val = float(self.fit_y_.iloc[:, 0].mean())
mu = np.repeat(mean_val, len(X)).reshape(-1, 1)
sigma = np.ones((len(X), 1))
return Normal(mu=mu, sigma=sigma, index=X.index, columns=["target"])


def test_ttr_without_transformer_predicts_and_returns_distribution():
"""Constructor default ``transformer=None`` should work across predict APIs."""
X = pd.DataFrame({"x": [0.0, 1.0, 2.0]})
y = pd.DataFrame({"target": [1.0, 3.0, 5.0]})

reg = TransformedTargetRegressor(
regressor=DummyProbaRegressor(strategy="normal"),
transformer=None,
)
reg.fit(X, y)

y_pred = reg.predict(X)
y_pred_proba = reg.predict_proba(X)

assert list(y_pred.columns) == ["target"]
assert y_pred.index.equals(X.index)
assert y_pred_proba.columns.equals(pd.Index(["target"]))
assert y_pred_proba.index.equals(X.index)


def test_ttr_update_applies_transformer_before_delegating():
"""Update should transform ``y`` via ``transform``, not call the transformer."""
X = pd.DataFrame({"x": [0.0, 1.0, 2.0]})
y = pd.DataFrame({"target": [1.0, 2.0, 3.0]})
y_new = pd.DataFrame({"target": [4.0, 5.0]})
X_new = pd.DataFrame({"x": [10.0, 11.0]})

reg = TransformedTargetRegressor(
regressor=_UpdateRecordingRegressor(),
transformer=_ScaleTransformer(factor=10.0),
)
reg.fit(X, y)
reg.update(X_new, y_new)

expected = y_new * 10.0
pd.testing.assert_frame_equal(reg.regressor_.update_y_, expected)
Loading