Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions skpro/distributions/base/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

__all__ = ["BaseDistribution"]

import textwrap
from warnings import warn

import numpy as np
Expand All @@ -13,10 +14,140 @@

from skpro.base import BaseObject

# mapping of public methods to formula doc hooks
_DOC_METHODS = {
"pdf": "_pdf_formula_doc",
"cdf": "_cdf_formula_doc",
"log_pdf": "_log_pdf_formula_doc",
"pmf": "_pmf_formula_doc",
"log_pmf": "_log_pmf_formula_doc",
"ppf": "_ppf_formula_doc",
"surv": "_surv_formula_doc",
"haz": "_haz_formula_doc",
"mean": "_mean_formula_doc",
"var": "_var_formula_doc",
"energy": "_energy_formula_doc",
"pdfnorm": "_pdfnorm_formula_doc",
}


def _inject_formula_doc(base_doc, formula_doc):
"""Inject formula_doc into base_doc at {formula_doc} placeholder."""
if not base_doc or "{formula_doc}" not in base_doc:
return base_doc

if formula_doc is None:
# Cleanly remove the placeholder if no formula is provided
return base_doc.replace(" {formula_doc}\n\n", "").replace(
"{formula_doc}", ""
)

# 1. Find exactly how many spaces are before {formula_doc} in the base docstring
lines = base_doc.split("\n")
indent_spaces = ""
for line in lines:
if "{formula_doc}" in line:
indent_spaces = line[: line.find("{formula_doc}")]
break

# 2. Clean the user's formula (preserves relative indent inside the math block)
clean_formula = textwrap.dedent(formula_doc).strip()

# 3. Add the base indentation to every new line in the formula
indented_formula = clean_formula.replace("\n", "\n" + indent_spaces)

return base_doc.replace("{formula_doc}", indented_formula)


class BaseDistribution(BaseObject):
"""Base probability distribution."""

# hooks for distribution-specific documentation
_pdf_formula_doc = None
_cdf_formula_doc = None
_log_pdf_formula_doc = None
_pmf_formula_doc = None
_log_pmf_formula_doc = None
_ppf_formula_doc = None
_surv_formula_doc = None
_haz_formula_doc = None
_mean_formula_doc = None
_var_formula_doc = None
_energy_formula_doc = None
_pdfnorm_formula_doc = None

def __init_subclass__(cls, **kwargs):
"""Inject distribution-specific math formulae into docstrings."""
super().__init_subclass__(**kwargs)

if cls is BaseDistribution:
return

# Skip adapters that might behave weirdly
if cls.__name__.startswith("_BaseTF"):
return

for method_name, hook_name in _DOC_METHODS.items():
# ALWAYS use the pristine docstring from BaseDistribution as the template
base_method = getattr(BaseDistribution, method_name, None)
if base_method is None or base_method.__doc__ is None:
continue

if "{formula_doc}" in base_method.__doc__:
formula_doc = getattr(cls, hook_name, None)
new_doc = _inject_formula_doc(base_method.__doc__, formula_doc)

# Get the actual method we need to wrap
method = getattr(cls, method_name)

# Factory function to avoid Python's late-binding loop closure bug
def _make_wrapper(original_method, new_docstring):
import functools

# Unwrap to prevent deep wrapper chains from multi-level inheritance
while hasattr(original_method, "__wrapped__"):
original_method = original_method.__wrapped__

@functools.wraps(original_method)
def wrapper(self, *args, **kwargs_inner):
return original_method(self, *args, **kwargs_inner)

wrapper.__doc__ = new_docstring
return wrapper

# Safely attach the new wrapped method to the subclass
setattr(cls, method_name, _make_wrapper(method, new_doc))

@classmethod
def _has_implementation_of(cls, method):
"""Check if method has a concrete implementation, ignoring docstring wrapper."""
# 1. Ask the standard framework if it thinks the method is implemented
is_implemented = super()._has_implementation_of(method)

if is_implemented:
# 2. If it says YES, let's peek underneath the wrapper (X-Ray Vision)
method_obj = getattr(cls, method, None)

if hasattr(method_obj, "__wrapped__"):
base_method = getattr(BaseDistribution, method, None)

# Unwrap the subclass method to find the real function
unwrapped = method_obj
while hasattr(unwrapped, "__wrapped__"):
unwrapped = unwrapped.__wrapped__

# Unwrap the base method just in case
if base_method is not None:
while hasattr(base_method, "__wrapped__"):
base_method = base_method.__wrapped__

# 3. If the real function underneath is exactly the Base default,
# then the subclass didn't write custom math. It's just our doc wrapper!
if unwrapped is base_method:
return False

return is_implemented

# default tag values - these typically make the "safest" assumption
_tags = {
"object_type": "distribution", # type of object, e.g., 'distribution'
Expand Down Expand Up @@ -712,6 +843,10 @@ def _boilerplate(self, method, columns=None, **kwargs):
def pdf(self, x):
r"""Probability density function.

{formula_doc}

Let :math:`X` be a random variables with the distribution of ``self``,

Let :math:`X` be a random variables with the distribution of ``self``,
taking values in ``(N, n)`` ``DataFrame``-s
Let :math:`x\in \mathbb{R}^{N\times n}`.
Expand Down Expand Up @@ -779,6 +914,10 @@ def _pdf(self, x):
def log_pdf(self, x):
r"""Logarithmic probability density function.

{formula_doc}

Numerically more stable than calling pdf and then taking logartihms.

Numerically more stable than calling pdf and then taking logarithms.

Let :math:`X` be a random variables with the distribution of ``self``,
Expand Down Expand Up @@ -871,6 +1010,10 @@ def _approx_derivative(x, fun, h=1e-7):
def pmf(self, x):
r"""Probability mass function.

{formula_doc}

Let :math:`X` be a random variables with the distribution of ``self``,

Let :math:`X` be a random variables with the distribution of ``self``,
taking values in ``(N, n)`` ``DataFrame``-s
Let :math:`x\in \mathbb{R}^{N\times n}`.
Expand Down Expand Up @@ -927,6 +1070,10 @@ def _pmf(self, x):
def log_pmf(self, x):
r"""Logarithmic probability mass function.

{formula_doc}

Numerically more stable than calling pmf and then taking logarithms.

Numerically more stable than calling pmf and then taking logarithms.

Let :math:`X` be a random variables with the distribution of ``self``,
Expand Down Expand Up @@ -983,6 +1130,10 @@ def _log_pmf(self, x):
def cdf(self, x):
r"""Cumulative distribution function.

{formula_doc}

Let :math:`X` be a random variables with the distribution of ``self``,

Let :math:`X` be a random variables with the distribution of ``self``,
taking values in ``(N, n)`` ``DataFrame``-s
Let :math:`x\in \mathbb{R}^{N\times n}`.
Expand Down Expand Up @@ -1026,6 +1177,10 @@ def _cdf(self, x):
def surv(self, x):
r"""Survival function.

{formula_doc}

Let :math:`X` be a random variables with the distribution of ``self``,

Let :math:`X` be a random variables with the distribution of ``self``,
taking values in ``(N, n)`` ``DataFrame``-s
Let :math:`x\in \mathbb{R}^{N\times n}`.
Expand Down Expand Up @@ -1060,6 +1215,9 @@ def _surv(self, x):
def haz(self, x):
r"""Hazard function.

{formula_doc}

Let :math:`X` be a random variables with the distribution of ``self``,
Let :math:`X` be a random variables with the distribution of ``self``,
taking values in ``(N, n)`` ``DataFrame``-s
Let :math:`x\in \mathbb{R}^{N\times n}`.
Expand Down Expand Up @@ -1096,6 +1254,10 @@ def _haz(self, x):
def ppf(self, p):
r"""Quantile function = percent point function = inverse cdf.

{formula_doc}

Let :math:`X` be a random variables with the distribution of ``self``,

Let :math:`X` be a random variables with the distribution of ``self``,
taking values in ``(N, n)`` ``DataFrame``-s
Let :math:`x\in \mathbb{R}^{N\times n}`.
Expand Down Expand Up @@ -1189,6 +1351,10 @@ def opt_fun(x):
def energy(self, x=None):
r"""Energy of self, w.r.t. self or a constant frame x.

{formula_doc}

Let :math:`X, Y` be i.i.d. random variables with the distribution of ``self``.

Let :math:`X, Y` be i.i.d. random variables with the distribution of ``self``.

If ``x`` is ``None``, returns :math:`\mathbb{E}[|X-Y|]` (per row),
Expand Down Expand Up @@ -1358,6 +1524,10 @@ def _sample_mean(self, spl):
def mean(self):
r"""Return expected value of the distribution.

{formula_doc}

Let :math:`X` be a random variable with the distribution of ``self``.

Let :math:`X` be a random variable with the distribution of ``self``.
Returns the expectation :math:`\mathbb{E}[X]`

Expand Down Expand Up @@ -1400,6 +1570,10 @@ def _mean(self):
def var(self):
r"""Return element/entry-wise variance of the distribution.

{formula_doc}

Let :math:`X` be a random variable with the distribution of ``self``.

Let :math:`X` be a random variable with the distribution of ``self``.
Returns :math:`\mathbb{V}[X] = \mathbb{E}\left(X - \mathbb{E}[X]\right)^2`,
where the square is element-wise.
Expand Down Expand Up @@ -1451,6 +1625,10 @@ def _var(self):
def pdfnorm(self, a=2):
r"""a-norm of pdf, defaults to 2-norm.

{formula_doc}

computes a-norm of the entry marginal pdf, i.e.,

computes a-norm of the entry marginal pdf, i.e.,
:math:`\mathbb{E}[p_X(X)^{a-1}] = \int p(x)^a dx`,
where :math:`X` is a random variable distributed according to the entry marginal
Expand Down
49 changes: 49 additions & 0 deletions skpro/distributions/exponential.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,55 @@ class Exponential(_ScipyAdapter):
"broadcast_init": "on",
}

# documentation hooks for formula injection
_pdf_formula_doc = r"""
The probability density function is given by:

.. math::
f(x) = \lambda \exp(-\lambda x), \quad x \ge 0
"""

_log_pdf_formula_doc = r"""
The log-density is given by:

.. math::
\log f(x) = \log(\lambda) - \lambda x, \quad x \ge 0
"""

_cdf_formula_doc = r"""
The cumulative distribution function is given by:

.. math::
F(x) = 1 - \exp(-\lambda x), \quad x \ge 0
"""
_ppf_formula_doc = r"""
The quantile function (inverse cdf) is:

.. math::
F^{-1}(p; \lambda) = -\frac{\ln(1 - p)}{\lambda}
"""

_mean_formula_doc = r"""
The expected value is:

.. math::
\mathbb{E}[X] = \lambda^{-1}
"""

_var_formula_doc = r"""
The variance is:

.. math::
\text{Var}(X) = \lambda^{-2}
"""

_energy_formula_doc = r"""
The analytical self-energy is:

.. math::
\mathbb{E}[|X - Y|] = \lambda^{-1}
"""

def __init__(self, rate, index=None, columns=None):
self.rate = rate

Expand Down
54 changes: 54 additions & 0 deletions skpro/distributions/laplace.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,60 @@ class Laplace(BaseDistribution):
"broadcast_init": "on",
}

# documentation hooks for formula injection
_pdf_formula_doc = r"""
The probability density function is given by:

.. math::
f(x) = \frac{1}{2b} \exp \left( - \frac{|x - \mu|}{b} \right)
"""

_log_pdf_formula_doc = r"""
The log-density is given by:

.. math::
\log f(x) = - \log(2b) - \frac{|x - \mu|}{b}
"""

_cdf_formula_doc = r"""
The cumulative distribution function is given by:

.. math::
F(x) =
\begin{cases}
\frac{1}{2} \exp \left( \frac{x - \mu}{b} \right), & x < \mu \\
1 - \frac{1}{2} \exp \left( - \frac{x - \mu}{b} \right), & x \geq \mu
\end{cases}
"""

_ppf_formula_doc = r"""
The quantile function (inverse cdf) is:

.. math::
F^{-1}(p; \mu, b) = \mu - b \operatorname{sgn}(p - 0.5) \ln(1 - 2|p - 0.5|)
"""

_mean_formula_doc = r"""
The expected value is:

.. math::
\mathbb{E}[X] = \mu
"""

_var_formula_doc = r"""
The variance is:

.. math::
\text{Var}(X) = 2b^2
"""

_energy_formula_doc = r"""
The analytical self-energy is:

.. math::
\mathbb{E}[|X - Y|] = \frac{3}{2}b
"""

def __init__(self, mu, scale, index=None, columns=None):
self.mu = mu
self.scale = scale
Expand Down
Loading
Loading