Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions skpro/regression/cyclic_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,10 @@ class CyclicBoosting(BaseProbaRegressor):
be on a bounded interval, with support between ``lower`` and ``upper``.
maximal_iterations : int, default=10
maximum number of iterations for the cyclic boosting algorithm
dist_type: str, one of ``'normal'`` (default), ``'logistic'``
inner base distribution to use for the Johnson QPD, i.e., before
arcosh and similar transformations.
Available options are ``'normal'`` (default), ``'logistic'``,
or ``'sinhlogistic'``.
dist: str, default='normal',
One of ``'normal'`` or ``'logistic'``inner base distribution to use for
the Johnson QPD, i.e., before arcosh and similar transformations. Available
options are ``'normal'`` (default), ``'logistic'`` or ``'sinhlogistic'``.

Attributes
----------
Expand Down Expand Up @@ -133,6 +132,11 @@ class CyclicBoosting(BaseProbaRegressor):
"tests:vm": True, # requires its own test VM to run
}

# TODO (release 2.14.0)
# remove the 'dist_type' argument from '__init__' signature
# remove the following 'if' check and deprecation warning
# de-indent the following 'else' check

def __init__(
self,
feature_groups: Union[List[str], List[Tuple[str, ...]], None] = None,
Expand All @@ -142,7 +146,8 @@ def __init__(
lower: Union[float, None] = None,
upper: Union[float, None] = None,
maximal_iterations=10,
dist_type: Union[str, None] = "normal",
dist_type: Union[str, None] = "deprecated",
dist: Union[str, None] = "normal",
dist_shape: Union[float, None] = 0.0,
):
self.feature_groups = feature_groups
Expand All @@ -153,10 +158,28 @@ def __init__(
self.upper = upper
self.maximal_iterations = maximal_iterations
self.dist_type = dist_type
self.dist = dist
self.dist_shape = dist_shape

super().__init__()

# handle deprecation of dist_type -> dist
if dist_type != "deprecated":
from warnings import warn

warn(
"in `CyclicBoosting`, parameter 'dist_type' "
"will be renamed to 'dist' in version 2.14.0. "
"To keep current behaviour and to silence this warning, "
"use 'dist' instead of 'dist_type', "
"set dist explicitly via kwarg, and do not set dist_type.",
category=DeprecationWarning,
stacklevel=2,
)
self._dist = dist_type
else:
self._dist = dist

self.quantiles = [self.alpha, 0.5, 1 - self.alpha]
self.quantile_values = list()
self.quantile_est = list()
Expand Down Expand Up @@ -315,7 +338,7 @@ def _predict_proba(self, X):
"qv_high": self.quantile_values[2].reshape(-1, 1),
"lower": self.lower,
"upper": self.upper,
"base_dist": self.dist_type,
"base_dist": self._dist,
"index": index,
"columns": y_cols,
}
Expand Down
36 changes: 30 additions & 6 deletions skpro/regression/gam/_gam.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class GAMRegressor(BaseProbaRegressor):
By default a univariate spline term will be allocated for each feature.
Can be a ``pygam`` terms expression for custom model specification.

distribution : str or pygam.Distribution, optional (default='Normal')
dist : str or pygam.Distribution, optional (default='Normal')
Distribution family to use in the model.
Supported strings (case-insensitive):

Expand Down Expand Up @@ -81,17 +81,17 @@ class GAMRegressor(BaseProbaRegressor):
>>> y_positive = y.abs() + 1 # ensure positive targets for Poisson/Gamma
>>>
>>> # Normal distribution (default)
>>> gam_normal = GAMRegressor(distribution='Normal')
>>> gam_normal = GAMRegressor(dist='Normal')
>>> gam_normal.fit(X, y)
GAMRegressor(...)
>>>
>>> # Poisson distribution for count data
>>> gam_poisson = GAMRegressor(distribution='Poisson', link='log')
>>> gam_poisson = GAMRegressor(dist='Poisson', link='log')
>>> gam_poisson.fit(X, y_positive)
GAMRegressor(...)
>>>
>>> # Gamma distribution for positive continuous data
>>> gam_gamma = GAMRegressor(distribution='Gamma', link='log')
>>> gam_gamma = GAMRegressor(dist='Gamma', link='log')
>>> gam_gamma.fit(X, y_positive)
GAMRegressor(...)
"""
Expand All @@ -109,10 +109,16 @@ class GAMRegressor(BaseProbaRegressor):
"tests:vm": True,
}

# TODO (release 2.14.0)
# remove the 'distribution' argument from '__init__' signature
# remove the following 'if' check and deprecation warning
# de-indent the following 'else' check

def __init__(
self,
terms="auto",
distribution="normal",
distribution="deprecated",
dist="normal",
link="identity",
max_iter=100,
tol=1e-4,
Expand All @@ -122,6 +128,7 @@ def __init__(
):
self.terms = terms
self.distribution = distribution
self.dist = dist
self.link = link
self.max_iter = max_iter
self.tol = tol
Expand All @@ -131,6 +138,23 @@ def __init__(

super().__init__()

# handle deprecation of distribution -> dist
if distribution != "deprecated":
from warnings import warn

warn(
"in `GAMRegressor`, parameter 'distribution' "
"will be renamed to 'dist' in version 2.14.0. "
"To keep current behaviour and to silence this warning, "
"use 'dist' instead of 'distribution', "
"set dist explicitly via kwarg, and do not set distribution.",
category=DeprecationWarning,
stacklevel=2,
)
self._dist = distribution
else:
self._dist = dist

def _fit(self, X, y):
"""Fit regressor to training data.

Expand Down Expand Up @@ -158,7 +182,7 @@ def _fit(self, X, y):
if callbacks is None:
callbacks = ["deviance", "diffs"]

dist_name = self._get_distribution_name(self.distribution)
dist_name = self._get_distribution_name(self._dist)

# Map common names to skpro distribution names
dist_map = {
Expand Down
57 changes: 40 additions & 17 deletions skpro/regression/linear/_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ class GLMRegressor(BaseProbaRegressor):

Parameters
----------
family : string, default : "Normal"
The family parameter denotes the type of distribution
dist : string, default : "Normal"
The dist parameter denotes the type of distribution
that will be used.
Available family/distributions are
Available distributions are
1."Normal"
2."Poisson"
3."Gamma"
Expand Down Expand Up @@ -205,7 +205,7 @@ class GLMRegressor(BaseProbaRegressor):
"y_inner_mtype": "pd_DataFrame_Table",
}

def _str_to_sm_family(self, family, link):
def _str_to_sm_family(self, dist, link):
"""Convert the string to a statsmodel object.

If the link function is also explicitly mentioned then include then
Expand All @@ -232,16 +232,22 @@ def _str_to_sm_family(self, family, link):
if link in links:
link_function = links[link]()
try:
return sm_fmly[family](link_function)
return sm_fmly[dist](link_function)
except Exception:
msg = "Invalid link for family, default link will be used"
msg = "Invalid link for distribution, default link will be used"
warn(msg)

return sm_fmly[family]()
return sm_fmly[dist]()

# TODO (release 2.14.0)
# remove the 'family' argument from '__init__' signature
# remove the following 'if' check and deprecation warning
# de-indent the following 'else' check

def __init__(
self,
family="Normal",
family="deprecated",
dist="Normal",
link=None,
offset_var=None,
exposure_var=None,
Expand All @@ -262,6 +268,7 @@ def __init__(
super().__init__()

self.family = family
self.dist = dist
self.link = link
self.offset_var = offset_var
self.exposure_var = exposure_var
Expand All @@ -279,7 +286,23 @@ def __init__(
self.max_start_irls = max_start_irls
self.add_constant = add_constant

self._family = self.family
# handle deprecation of family -> dist
if family != "deprecated":
from warnings import warn

warn(
"in `GLMRegressor`, parameter 'family' "
"will be renamed to 'dist' in version 2.14.0. "
"To keep current behaviour and to silence this warning, "
"use 'dist' instead of 'family', "
"set dist explicitly via kwarg, and do not set family.",
category=DeprecationWarning,
stacklevel=2,
)
self._dist = family
else:
self._dist = dist

self._link = self.link
self._offset_var = self.offset_var
self._exposure_var = self.exposure_var
Expand Down Expand Up @@ -325,12 +348,12 @@ def _fit(self, X, y):

# remove the offset and exposure columns which
# was inserted to maintain the shape
family = self._family
dist = self._dist
link = self._link

# ensure numerical stability for Gamma by injecting an intercept
self._auto_added_constant = False
if family == "Gamma" and not self._add_constant:
if dist == "Gamma" and not self._add_constant:
self._add_constant = True
self._auto_added_constant = True

Expand All @@ -341,7 +364,7 @@ def _fit(self, X, y):

y_col = y.columns

sm_family = self._str_to_sm_family(family=family, link=link)
sm_family = self._str_to_sm_family(dist=dist, link=link)

glm_estimator = GLM(
endog=y,
Expand Down Expand Up @@ -434,7 +457,7 @@ def _predict(self, X):

return y_pred

def _params_sm_to_skpro(self, y_predictions_df, index, columns, family):
def _params_sm_to_skpro(self, y_predictions_df, index, columns, dist):
"""Convert the statsmodels output to equivalent skpro distribution."""
from skpro.distributions.gamma import Gamma
from skpro.distributions.normal import Normal
Expand All @@ -449,8 +472,8 @@ def _params_sm_to_skpro(self, y_predictions_df, index, columns, family):
params = {}
skp_dist = Normal

if family in skpro_distr:
skp_dist = skpro_distr[family]
if dist in skpro_distr:
skp_dist = skpro_distr[dist]

if skp_dist == Normal:
y_mu = y_predictions_df["mean"].rename("mu").to_frame()
Expand Down Expand Up @@ -512,11 +535,11 @@ def _predict_proba(self, X):
y_predictions_df = self.glm_fit_.get_prediction(X_).summary_frame()

# convert the returned values to skpro equivalent distribution
family = self._family
dist = self._dist
index = X_.index
columns = y_column

y_pred = self._params_sm_to_skpro(y_predictions_df, index, columns, family)
y_pred = self._params_sm_to_skpro(y_predictions_df, index, columns, dist)
return y_pred

def _prep_x(self, X, offset_var, exposure_var, rtn_off_exp_arr):
Expand Down
Loading
Loading