Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Pareto distribution #396

Merged
merged 5 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/api_reference/distributions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Continuous support
Logistic
LogLaplace
Normal
Pareto
TDistribution
Weibull

Expand Down
2 changes: 2 additions & 0 deletions skpro/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"LogNormal",
"Mixture",
"Normal",
"Pareto",
"Poisson",
"QPD_Empirical",
"QPD_S",
Expand Down Expand Up @@ -53,6 +54,7 @@
from skpro.distributions.lognormal import LogNormal
from skpro.distributions.mixture import Mixture
from skpro.distributions.normal import Normal
from skpro.distributions.pareto import Pareto
from skpro.distributions.poisson import Poisson
from skpro.distributions.qpd import QPD_B, QPD_S, QPD_U, QPD_Johnson
from skpro.distributions.qpd_empirical import QPD_Empirical
Expand Down
165 changes: 165 additions & 0 deletions skpro/distributions/pareto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
"""Pareto probability distribution."""

__author__ = ["sukjingitsit"]

import numpy as np
import pandas as pd

from skpro.distributions.base import BaseDistribution


class Pareto(BaseDistribution):
r"""Pareto distribution (skpro native).

The scale is represented by the parameter ``scale``,
and the Pareto index (or shape parameter) :math:`\alpha`
by the parameter ``alpha``.

The CDF can be represented as,
:math:`F(x) = 1-\left(\frac{\text{scale}}{x}\right)^\alpha
\text{ if } x>0, 0 \text{ if } x<0`

Parameters
----------
scale : float or array of float (1D or 2D), must be positive
scale of the Pareto distribution
alpha : float or array of float (1D or 2D), must be positive
shape of the Pareto distribution
index : pd.Index, optional, default = RangeIndex
columns : pd.Index, optional, default = RangeIndex

Example
-------
>>> from skpro.distributions.pareto import Pareto

>>> n = Pareto(scale=[[1, 1.5], [2, 2.5], [3, 4]], alpha=3)
"""

_tags = {
"capabilities:approx": ["pdfnorm", "energy"],
"capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
"distr:measuretype": "continuous",
"distr:paramtype": "parametric",
"broadcast_init": "on",
}

def __init__(self, scale, alpha, index=None, columns=None):
self.scale = scale
self.alpha = alpha

super().__init__(index=index, columns=columns)

def _mean(self):
"""Return expected value of the distribution.

Returns
-------
2D np.ndarray, same shape as ``self``
expected value of distribution (entry-wise)
"""
alpha = self._bc_params["alpha"]
scale = self._bc_params["scale"]
mean = np.where(alpha <= 1, np.infty, scale**alpha / (alpha - 1))
return mean

def _var(self):
r"""Return element/entry-wise variance of the distribution.

Returns
-------
2D np.ndarray, same shape as ``self``
variance of the distribution (entry-wise)
"""
alpha = self._bc_params["alpha"]
scale = self._bc_params["scale"]
var = np.where(
alpha <= 2, np.infty, scale**2 * alpha / ((alpha - 2) * (alpha - 1) ** 2)
)
return var

def _pdf(self, x):
"""Probability density function.

Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pdf at

Returns
-------
2D np.ndarray, same shape as ``self``
pdf values at the given points
"""
alpha = self._bc_params["alpha"]
scale = self._bc_params["scale"]
pdf_arr = alpha * np.power(scale, alpha)
pdf_arr /= np.power(x, alpha + 1)
return pdf_arr

def _log_pdf(self, x):
"""Logarithmic probability density function.

Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pdf at

Returns
-------
2D np.ndarray, same shape as ``self``
log pdf values at the given points
"""
alpha = self._bc_params["alpha"]
scale = self._bc_params["scale"]
return np.log(alpha / x) + alpha * np.log(scale / x)

def _cdf(self, x):
"""Cumulative distribution function.

Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the cdf at

Returns
-------
2D np.ndarray, same shape as ``self``
cdf values at the given points
"""
alpha = self._bc_params["alpha"]
scale = self._bc_params["scale"]
cdf_arr = np.where(x < scale, 0, 1 - np.power(scale / x, alpha))
return cdf_arr

def _ppf(self, p):
"""Quantile function = percent point function = inverse cdf.

Parameters
----------
p : 2D np.ndarray, same shape as ``self``
values to evaluate the ppf at

Returns
-------
2D np.ndarray, same shape as ``self``
ppf values at the given points
"""
alpha = self._bc_params["alpha"]
scale = self._bc_params["scale"]
return scale / np.power(1 - p, 1 / alpha)

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator."""
# array case examples
params1 = {"scale": [[1, 1.5], [2, 3], [4, 5]], "alpha": 3}
params2 = {
"scale": 1,
"alpha": 3,
"index": pd.Index([1, 2, 5]),
"columns": pd.Index(["a", "b"]),
}
# scalar case examples
params3 = {"scale": 1, "alpha": 2}
return [params1, params2, params3]
Loading