Source code for chaospy.distributions.kernel.gaussian

"""Gaussian kernel density estimation."""
import numpy
from scipy import special

from .baseclass import KernelDensityBaseclass
from ..collection.mv_normal import MvNormal


[docs]class GaussianKDE(KernelDensityBaseclass):
    """
    Gaussian kernel density estimator.

    Density estimator that handles both univariate and multivariate data. It
    provides automatic bandwidth selection method using Scott's and Silverman's
    method.

    Attributes:
        samples:
            The raw data as provided by the user reshaped to have `ndim == 2`.
        h_mat:
            The covariance matrix to each sample. Assuming uncorrelated
            dimensions, the bandwidth is the square root of the diagonals. It
            will have either dimensions `(1, n_dim, n_dim)` if all samples
            shares covariance, or `(n_samples, n_dim, n_dim)` if not.
        weights:
            How much each sample is weighted. Either a scalar when the samples
            are equally weighted, or with length `n_samples` otherwise.

    Examples:
        >>> samples = [[-1, 0, 1], [0, 1, 2]]
        >>> distribution = chaospy.GaussianKDE(samples, estimator_rule="silverman")
        >>> distribution.h_mat  # H-matrix or bandwidth**2
        array([[[0.38614462, 0.        ],
                [0.        , 0.38614462]]])
        >>> uloc = [[0, 0, 1, 1], [0, 1, 0, 1]]
        >>> distribution.pdf(uloc).round(4)
        array([0.0469, 0.0982, 0.0074, 0.0469])
        >>> distribution.fwd(uloc).round(4)
        array([[0.5   , 0.5   , 0.8152, 0.8152],
               [0.1233, 0.5   , 0.0142, 0.1532]])
        >>> distribution.inv(uloc).round(4)
        array([[-6.577 , -6.577 ,  5.3948,  5.3948],
               [-4.3871,  4.6411, -5.9611,  7.7779]])
        >>> distribution.mom([(0, 1, 1), (1, 0, 1)]).round(4)
        array([1.    , 0.    , 0.6667])

    """

    @property
    def samples(self):
        return self._samples

    @property
    def h_mat(self):
        return self._covariance

    @staticmethod
    def _kernel(z_loc):
        """The underlying density kernel."""
        return numpy.prod(
            numpy.e ** (-(z_loc**2) / 2.0) / numpy.sqrt(2 * numpy.pi), axis=-1
        )

    @staticmethod
    def _ikernel(z_loc):
        """The integrand of the underlying density kernel."""
        return special.ndtr(z_loc)

    def _mom(self, k_loc, cache):
        """Raw statistical moments."""
        length = self.samples.shape[-1]
        h_mat = numpy.broadcast_to(self.h_mat, (length,) + self.h_mat.shape[1:])
        out = numpy.array(
            [
                MvNormal._mom(
                    self,
                    k_loc,
                    mean=self.samples[:, idx],
                    sigma=h_mat[idx],
                    cache={},
                )
                for idx in range(length)
            ]
        )
        return numpy.sum(out * self.weights)

    def _lower(self, idx, dim, cache):
        """Lower bounds."""
        del dim
        del cache
        return (self.samples[idx] - 10 * numpy.sqrt(self.h_mat[:, idx, idx]).T).min(-1)

    def _upper(self, idx, dim, cache):
        """Upper bounds."""
        del dim
        del cache
        return (self.samples[idx] + 10 * numpy.sqrt(self.h_mat[:, idx, idx]).T).max(-1)