Source code for qp_flexzboost.flexzboost_pdf

"""This implements a PDF sub-class specifically for FlexZBoost"""
from enum import Enum
from typing import List

import numpy as np
from flexcode.basis_functions import BasisCoefs
from qp.factory import add_class
from qp.pdf_gen import Pdf_rows_gen
from qp.plotting import get_axes_and_xlims, plot_pdf_on_axes
from qp.utils import (CASE_FACTOR, CASE_PRODUCT, get_eval_case,
                      interpolate_multi_x_y)
from scipy.stats import rv_continuous


# pylint: disable=invalid-name
[docs]class BasisSystem(Enum):
    """This enumerates the various basis systems that FlexCode supports

    Parameters
    ----------
    Enum : enum
        This enum inherits from the Enum class.
    """
[docs]    cosine = 1
[docs]    Fourier = 2
[docs]    db4 = 3


# pylint: disable=too-many-arguments,too-many-instance-attributes
[docs]class FlexzboostGen(Pdf_rows_gen):
    """Distribution based on weighted basis functions output from FlexZBoost.

    Notes
    -----
    This class is meant primarily to be a compact storage mechanism for output
    from FlexCode.
    """
    # pylint: disable=protected-access

[docs]    name = 'flexzboost'
[docs]    version = 0

[docs]    _support_mask = rv_continuous._support_mask

    def __init__(self, weights:List[List[float]], basis_system_enum_value:int,
                 z_min:float, z_max:float, bump_threshold:float,
                sharpen_alpha:float, *args, **kwargs):
        """This is the primary constructor for this `qp` generator.

        Parameters
        ----------
        weights : List[List[float]]
            A list of lists were each element is a floating point value. The weights
            represent the contribution of each basis function to the final PDF.
            The shape of `weights` should be N x b, where N = number of PDFs
            and b = number of basis functions.
        basis_system_enum_value : int
            The enum id to define the FlexCode basis system used to produce the results
        z_min : float
            The minimum z value considered when producing results with FlexCode
        z_max : float
            The maximum z value considered when producing results with FlexCode
        bump_threshold : float
            A parameter used by FlexCode to remove small bumps from the results
        sharpen_alpha : float
            A parameter used by FlexCode to sharpen peaks in the results

        Returns
        -------
        flexzboost_gen
            PDF generator for FlexZBoost distributions

        Notes
        -----
        The argument list of this constructor is admittedly rather long. This approach
        makes it easier to interface with the greater `qp` infrastructure. To ease the
        burden on the user, there is a classmethod that allows passing a single parameter, 
        a `Flexcode:BasisCoefs` object, that is unpacked.

        See the method `FlexzboostGen:create_from_basis_coef_object`.
        """

        # kwargs['shape'] is used to by the parent class to define the total
        # number of PDFs stored in this generator object.
        kwargs['shape'] = np.asarray(weights).shape[:-1]
        super().__init__(*args, **kwargs)

        self._weights = np.asarray(weights)
        self._basis_system_enum_value = basis_system_enum_value
        self._z_min = z_min
        self._z_max = z_max
        self._bump_threshold = None
        self._sharpen_alpha = None

        # These two assignments all the use of property.setter functions, which
        # encapsulate some type checking and will also update the parent class
        # metadata as needed.
        self.bump_threshold = bump_threshold
        self.sharpen_alpha = sharpen_alpha

        self._basis_coefficients = self._build_basis_coef_object()

        self._xvals = None
        self._yvals = None
        self._ycumul = None

        self._addmetadata('basis_system_enum_value', self._basis_system_enum_value)
        self._addmetadata('z_min', self._z_min)
        self._addmetadata('z_max', self._z_max)
        self._addmetadata('bump_threshold', self._bump_threshold)
        self._addmetadata('sharpen_alpha', self._sharpen_alpha)
        self._addobjdata('weights', self._weights)

    @property
[docs]    def basis_system_enum(self)->BasisSystem:
        """Return the BasisSystem enum for this object.

        Returns
        -------
        BasisSystem
            The BasisSystem enum that defines the basis system used for these results.
        """
        return BasisSystem(self._basis_system_enum_value)

    @property
[docs]    def z_min(self)->float:
        """Return the minimum z value used for the results stored in this object.

        Returns
        -------
        float
            Minimum z value used to predict these results
        """
        return self._z_min

    @property
[docs]    def z_max(self)->float:
        """Return the maximum z value used for the results stored in this object.

        Returns
        -------
        float
            Maximum z value used to predict these results
        """
        return self._z_max

    @property
[docs]    def bump_threshold(self)->float:
        """Return the bump threshold used for the results stored in this object.

        Returns
        -------
        float
            Bump threshold value used to predict these results
        """
        return self._bump_threshold

    @bump_threshold.setter
    def bump_threshold(self, new_bump_threshold):
        """This is a setter for bump threshold that allows users to modify
        the parameter on the fly without rerunning the model.

        The conditional logic is a byproduct of the way that scipy will pass
        values to the __init__ method when taking a slice of an ensemble containing
        this generator.

        `new_bump_threshold` can be passed in as a 0 dimensional
        numpy array. For floats this is fine, but the comparison logic in Flexcode
        breaks when a numpy 0 dimensional array (i.e. scalar) `None` value is passed in.
        To account for this, we explicitly assign `None` when we detect a
        None-like input.

        Parameters
        ----------
        new_bump_threshold : float
            The new bump threshold to use in the BasisCoefs object.
        """

        # We use the `==` comparison because Numpy will broadcast the contents
        # of new_bump_threshold appropriately.
        # pylint: disable-next=singleton-comparison
        if new_bump_threshold == None:
            self._bump_threshold = None
        else:
            self._bump_threshold = new_bump_threshold

        # _addmetadata updates the parent class, so that slices into an ensemble
        # will create new instances of this class with the correct values.
        self._addmetadata('bump_threshold', self._bump_threshold)
        self._update_basis_coef_object()

    @property
[docs]    def sharpen_alpha(self)->float:
        """Return the sharpen alpha used for the results stored in this object.

        Returns
        -------
        float
            Sharpen alpha value used to predict these results
        """
        return self._sharpen_alpha

    @sharpen_alpha.setter
    def sharpen_alpha(self, new_sharpen_alpha):
        """This is a setter for sharpen alpha that allows users to modify
        the parameter on the fly without rerunning the model.

        The conditional logic is a byproduct of the way that scipy will pass
        values to the __init__ method when taking a slice of an ensemble containing
        this generator.

        `new_sharpen_alpha` can be passed in as a 0 dimensional
        numpy array. For floats this is fine, but the comparison logic in Flexcode
        breaks when a numpy 0 dimensional array (i.e. scalar) `None` value is passed in.
        To account for this, we explicitly assign `None` when we detect a
        None-like input.

        Parameters
        ----------
        new_sharpen_alpha : float
            The new sharpen parameter to use in the BasisCoefs object.
        """

        # We use the `==` comparison because Numpy will broadcast the contents
        # of new_sharpen_alpha appropriately.
        # pylint: disable-next=singleton-comparison
        if new_sharpen_alpha == None:
            self._sharpen_alpha = None
        else:
            self._sharpen_alpha = new_sharpen_alpha

        # _addmetadata updates the parent class, so that slices into an ensemble
        # will create new instances of this class with the correct values.
        self._addmetadata('sharpen_alpha', self._sharpen_alpha)
        self._update_basis_coef_object()

    @property
[docs]    def basis_coefficients(self)->BasisCoefs:
        """Return the BasisCoef object that was used to instantiate this object.

        Returns
        -------
        BasisCoefs
            Object used to initialize the class instance
        """
        return self._basis_coefficients

[docs]    def _build_basis_coef_object(self):
        """Private method that builds and returns a `FlexCode:BasisCoefs` object
        from the constructor parameters.

        Returns
        -------
        BasisCoefs
            Object used to initialize the class instance
        """
        return BasisCoefs(coefs=None,
                          basis_system=BasisSystem(self._basis_system_enum_value).name,
                          z_min=self._z_min,
                          z_max=self._z_max,
                          bump_threshold=self._bump_threshold,
                          sharpen_alpha=self._sharpen_alpha)

[docs]    def _update_basis_coef_object(self):
        """Simple method to update the `BasisCoefs` object 'in place'."""
        self._basis_coefficients = self._build_basis_coef_object()

[docs]    def _calculate_yvals_if_needed(self, xvals:List[float]) -> None:
        """If self._yvals is None or the xvals have changed, reevaluate the y values.

        Parameters
        ----------
        xvals : List[float]
            The x-values to evaluate the basis function.
        """
        if self._yvals is None or xvals is not self._xvals:
            self._evaluate_basis_coefficients(xvals)

[docs]    def _evaluate_basis_coefficients(self, xvals:List[float]) -> None:
        """Assign the list of x values to self._xvals. Use that grid to evaluate
        the y_values of PDFs using the weights and parameters stored in
        self._basis_coefficients.

        Parameters
        ----------
        xvals : List[float]
            The x-values to evaluate the analytical PDFs

        Notes
        -----
        We'll maintain a copy of the x values in memory for this object, but it
        won't be stored to disk.

        FlexCode requires that the x values be reshaped, we'll do that in the call
        to `.evaluate`, but we won't keep the reshaped x values in memory.

        The `.evaluate` method expects the `BasisCoefs` object to contain the
        output weights. So we'll add the weights back to the object for evaluation,
        and then remove them when we've completed `evaluation`. We do this to ensure
        that the value of weights is not accidentally stored to disk twice. Once
        as `self._weights`, and once as `self._basis_coefficients.coefs`.

        If storage to disk wasn't a concern, then this wouldn't be a problem. Due
        to the way that Python maintains references to values in memory, assigning
        the same values to `self._weights` and `self._basis_coefficients.coefs`
        doesn't actually use 2x the memory.
        """
        self._xvals = xvals
        self._basis_coefficients.coefs = self._weights

        #! I think that `evaluate` will accept a 2d array of values where each row
        # is a PDF and each column is an x value) But for now, let's stick with
        # the simple case of one set of x values for all PDFs. Ultimately we
        # might need to do something different with the self._xvals.reshape(-1,1)

        self._yvals = self._basis_coefficients.evaluate(self._xvals.reshape(-1,1))
        self._basis_coefficients.coefs = None

[docs]    def _compute_ycumul(self, xvals:List[float]) -> None:
        """Compute the cumulative values of y given an x grid

        Parameters
        ----------
        xvals : List[float]
            The x-values to evaluate the cumulative y value.
        """
        self._evaluate_basis_coefficients(xvals)

        # Do the magic to calculate cumulative values of y
        copy_shape = np.array(self._yvals.shape)
        self._ycumul = np.ndarray(copy_shape)
        self._ycumul[:, 0] = 0.5 * self._yvals[:, 0] * (self._xvals[1] - self._xvals[0])
        self._ycumul[:, 1:] = np.cumsum((self._xvals[1:] - self._xvals[:-1]) *
                                        0.5 * np.add(self._yvals[:,1:],
                                                     self._yvals[:,:-1]), axis=1)

    # pylint: disable-next=arguments-differ
[docs]    def _pdf(self, x:List[float], row:List[int]) -> List[List[float]]:
        """Return the numerical PDFs, evaluated on the grid, `x`.

        Parameters
        ----------
        x : List[float]
            The x-values to evaluate the analytical PDFs
        row : List[int], optional
            The indices for which numerical PDFs should be generated

        Returns
        -------
        List[List[float]]
            A list of lists corresponding to individual PDF's y-values. Each of
            the outer lists is a single PDF. The elements of the inner list are
            the resulting y-values corresponding to the input x-values.
        """
        # Calculate yvals for the given x's, if needed
        case_idx, xx, _ = get_eval_case(x, row)
        if case_idx in [CASE_PRODUCT, CASE_FACTOR]:
            self._calculate_yvals_if_needed(xx)
            return self._yvals.ravel()

        raise ValueError("Only CASE_PRODUCT and CASE_FACTOR are supported.")

    # pylint: disable-next=arguments-differ
[docs]    def _cdf(self, x:List[float], row:List[int]) -> List[List[float]]:
        """Return the numerical CDF, evaluated on the grid, `x`.

        Parameters
        ----------
        x : List[float]
            The x-values to evaluate the analytical CDFs
        row : List[int], optional
            The indices for which numerical CDFs should be generated

        Returns
        -------
        List[List[float]]
            A list of lists corresponding to individual CDF's y-values. Each of
            the outer lists is a single CDF. The elements of the inner list are
            the resulting y-values corresponding to the input x-values.
        """
        case_idx, xx, _ = get_eval_case(x, row)
        if case_idx in [CASE_PRODUCT, CASE_FACTOR]:
            self._compute_ycumul(xx)
        else:
            raise ValueError("Only CASE_PRODUCT and CASE_FACTOR are supported.")

        return self._ycumul.ravel()

    # pylint: disable-next=arguments-differ
[docs]    def _ppf(self, x:List[float], row:List[int]) -> List[List[float]]:
        """Return the numerical PPF, evaluated on the grid, `x`.

        Parameters
        ----------
        x : List[float]
            The x-values to evaluate the analytical PPFs
        row : List[int], optional
            The indices for which numerical PPFs should be generated

        Returns
        -------
        List[List[float]]
            A list of lists corresponding to individual PPF's y-values. Each of
            the outer lists is a single PPF. The elements of the inner list are
            the resulting y-values corresponding to the input x-values.
        """

        self._xvals = np.linspace(self._z_min, self._z_max, 100)
        self._compute_ycumul(self._xvals)

        return interpolate_multi_x_y(x, row, self._ycumul, self._xvals,
            bounds_error=False, fill_value=(np.min(x), np.max(x))).ravel()

[docs]    def _updated_ctor_param(self):
        """Specify the constructor parameters. This is required by scipy in order
        extend the rv_continuous class.

        Returns
        -------
        dct dict
            Dictionary of the constructor arguments and object instance variables
            needed to create this object.
        """
        dct = super()._updated_ctor_param()
        dct['weights'] = self._weights
        dct['basis_system_enum_value'] = self._basis_system_enum_value
        dct['z_min'] = self._z_min
        dct['z_max'] = self._z_max
        dct['bump_threshold'] = self._bump_threshold
        dct['sharpen_alpha'] = self._sharpen_alpha
        return dct

    @classmethod
[docs]    def create_from_basis_coef_object(cls,
                                      weights:List[List[float]],
                                      basis_coefficients_object:BasisCoefs,
                                      **kwargs) -> Pdf_rows_gen:
        """This is a convenience method that allows the user to define a generator
        by passing a `BasisCoefs` object, instead of the typical 5 additional
        values.

        Parameters
        ----------
        weights : List[List[float]]
            A list of lists were each element is a floating point value. The weights
            represent the contribution of each basis function to the final PDF.
            The shape of `weights` should be N x b, where N = number of PDFs
            and b = number of basis functions.

        basis_coefficients : BasisCoefs
            An object that contains the FlexZBoost output weights as well as the
            parameters required to define the set of basis functions.

        Returns
        -------
        FlexzboostGen
            Returns an instance of this class. Note that FlexzboostGen is a subclass
            of Pdf_rows_gen, the return type defined in the method signature.
        """
        generator_object = cls(
            weights=weights,
            basis_system_enum_value=BasisSystem[basis_coefficients_object.basis_system].value,
            z_min=basis_coefficients_object.z_min,
            z_max=basis_coefficients_object.z_max,
            bump_threshold=basis_coefficients_object.bump_threshold,
            sharpen_alpha=basis_coefficients_object.sharpen_alpha)

        return generator_object(**kwargs)

    @classmethod
[docs]    def get_allocation_kwds(cls, npdf, **kwargs):
        """Return the keywords necessary to create an 'empty' hdf5 file with npdf entries
        for iterative file write out. We only need to allocate the objdata columns, as
        the metadata can be written when we finalize the file.

        Parameters
        ----------
        npdf : int
            The total number of PDFs that will be written out

        Returns
        -------
        dict
            A dictionary that defines the storage requirements for this object.
        """
        try:
            weights = kwargs['weights']
        except KeyError as key_error:
            raise KeyError("Required argument `weights` was not included in kwargs") from key_error

        num_weights = np.shape(weights)[-1]
        return {"weights", ((npdf, num_weights), 'f4')}

    @classmethod
[docs]    def plot_native(cls, pdf, **kwargs):
        """Plot the PDF in a way that is particular to this type of distribution

        Here we'll use interpolated x,y points derived from the weights and FlexCode
        evaluation parameters.
        """
        axes, xlim, kwarg = get_axes_and_xlims(**kwargs)
        xvals = np.linspace(xlim[0], xlim[1], kwarg.pop('npts', 101))
        return plot_pdf_on_axes(axes, pdf, xvals, **kwarg)

    @classmethod
[docs]    def add_mappings(cls):
        """
        Add this classes mappings to the conversion dictionary
        """
        cls._add_creation_method(cls.create, None)
        cls._add_creation_method(cls.create_from_basis_coef_object, 'basis_coef_object')

    @classmethod
[docs]    def make_test_data(cls):
        """ Make data for unit tests """
        WEIGHTS = np.asarray([[ 0.99999994,  1.4135911 ,  1.3578598 ,  1.3848811 ,  1.1752609 ,
         1.2507105 ,  0.96589327,  1.2579455 ,  1.1328095 ,  0.9338199 ,
         1.3668357 ,  0.63097477,  0.19285281, -0.08388292,  0.05250954,
        -0.5464654 , -0.3771514 , -0.3948611 ,  0.13923086, -0.20495746,
        -0.58977485, -0.6391217 , -0.46343976, -0.5011808 , -0.01433064,
         0.278602  ,  0.5333237 ,  0.826034  ,  0.06464108,  0.9108775 ,
         0.6811071 ,  0.69773537, -0.11616451, -0.09364327,  0.63583785],
       [ 0.99999994,  1.3128049 ,  1.4268231 ,  1.3475941 ,  1.3009573 ,
         1.1934606 ,  1.1979764 ,  1.4587557 ,  1.0695385 ,  1.0334687 ,
         0.85049105,  0.6772867 ,  0.8599958 ,  0.7309471 ,  0.30866015,
         0.10747848,  0.1454999 ,  0.4564285 ,  0.83178055,  0.9569013 ,
         0.2805161 ,  0.35286552,  0.58561605,  0.42757383,  0.40403488,
        -0.5502439 ,  0.56439424,  0.21782365,  0.80970615,  0.6189492 ,
         0.9209366 ,  0.01046925, -0.66917616,  0.0304801 , -0.34911576],
       [ 0.99999994,  1.3046595 ,  1.3946912 ,  1.3725231 ,  1.3279371 ,
         1.1379944 ,  1.1232849 ,  1.3168706 ,  1.1987064 ,  0.846475  ,
         1.2190387 ,  1.0319941 ,  0.8385918 ,  0.72406054,  0.4407519 ,
         1.0522529 ,  0.5317534 ,  0.82531404,  0.6055132 ,  0.42970878,
         0.5682917 ,  0.42682788, -0.04017492,  0.32071114,  0.7407263 ,
         0.20112868,  0.28844437, -0.01918357,  0.16105941, -0.9992142 ,
        -0.481242  , -0.3728989 , -0.39303133, -0.556516  , -0.23944338],
       [ 0.99999994,  1.405193  ,  1.3786027 ,  1.3832911 ,  1.3786896 ,
         1.1868116 ,  1.1039548 ,  1.056342  ,  1.253356  ,  1.275163  ,
         1.5149004 ,  0.7893624 ,  1.1212736 ,  0.7551946 ,  0.1665442 ,
         0.31703034, -0.3789813 ,  0.40208268, -0.00154649, -0.22578228,
        -0.754486  ,  0.09544089, -0.7406911 , -1.5187913 , -1.0511639 ,
        -0.9208054 , -0.52502257, -0.79425025,  0.11232897, -0.5873992 ,
        -0.00291769, -1.2490546 ,  0.18622968, -0.4166289 , -0.16232875],
       [ 0.99999994,  1.32483   ,  1.2688403 ,  0.8508245 ,  1.4554728 ,
         1.2448467 ,  0.852745  ,  0.8741474 ,  1.0841464 ,  0.7697048 ,
         1.1911153 ,  0.51762104,  1.1319616 ,  1.3946458 ,  0.82583827,
         0.21972111, -0.16429716, -0.08124515,  0.0241714 , -0.07269649,
         0.04703106,  0.4027557 , -1.1216148 , -0.8540991 , -0.7413664 ,
        -0.35533333, -0.47791988, -0.39957288,  0.1695733 , -0.46430817,
        -0.07995562, -1.0972134 , -0.61197704, -1.1898835 , -0.75323683],
       [ 0.99999994,  1.4217128 ,  1.4090639 ,  1.3527906 ,  1.2788762 ,
         1.0873253 ,  1.0570015 ,  1.1381446 ,  0.73468673,  0.4902846 ,
         0.11609144, -0.43022275, -0.33087614,  0.3467521 ,  0.14698188,
        -0.79639876, -0.7686687 , -1.0865113 , -1.0686133 , -1.0762304 ,
        -0.9354039 , -0.79879427, -0.24612567,  0.01798107, -0.2094559 ,
         0.24940334,  0.12473647,  0.10005763,  0.23591852,  0.33464774,
         0.64543843,  0.24140209,  0.8614289 ,  0.10955815, -0.09307325],
       [ 0.99999994, -0.60270864,  0.3777081 ,  1.0040071 ,  0.5319608 ,
         1.1732529 ,  0.21736576,  1.0385551 ,  0.85155064,  0.8202011 ,
         0.7389486 ,  0.69682765,  0.1181715 ,  0.13482217,  0.7518282 ,
         0.8588988 ,  0.2753361 ,  0.10158755,  0.53366745,  0.5017293 ,
         0.22024332,  0.8345108 ,  0.3317933 ,  0.5323848 ,  0.741613  ,
         0.215265  ,  0.3551328 ,  0.44486073,  0.07836582,  0.00493836,
         0.583493  ,  0.23795973,  0.10176475, -0.08585434, -0.47022513],
       [ 0.99999994,  1.403194  ,  1.3613293 ,  1.2763977 ,  1.0978196 ,
         1.0092797 ,  0.87263453,  0.63493866,  0.3737632 , -0.02474818,
         0.12842114, -0.31487998, -0.18406785, -0.42329717, -0.8819336 ,
        -0.887077  , -0.913117  , -1.1706294 , -1.1096691 , -0.46700883,
        -0.7291215 , -0.20483486, -0.57670075, -0.5173913 ,  0.17409407,
        -0.34383368,  0.11131766,  0.29361913,  0.22329482,  0.4090505 ,
         0.50041765,  1.040421  ,  0.7399761 ,  1.3841617 ,  1.0754173 ],
       [ 0.99999994,  0.5964216 ,  0.46396077,  1.2265164 ,  1.0870706 ,
         1.1584536 ,  0.89783925,  0.7338294 ,  0.7884262 ,  0.41392878,
         0.27348533,  0.60299355, -0.09960458,  0.6036693 , -0.01055456,
         0.32332683, -0.63185304,  0.11284541, -0.30345288, -0.72329307,
        -0.2737094 ,  0.03923929, -0.26043436, -0.5889996 ,  0.09375673,
        -0.27470988, -0.03649841,  0.1934136 , -0.41822934, -0.38939086,
        -0.2009153 , -0.1781136 ,  0.81968015,  0.5067288 ,  0.54687506],
       [ 0.99999994,  1.3862562 ,  1.3533832 ,  1.3327965 ,  1.3019644 ,
         1.3206618 ,  1.3192286 ,  0.97659546,  1.0163264 ,  1.0176893 ,
         0.57915735,  0.7081749 ,  0.7332014 ,  0.5191775 ,  0.07479973,
         0.13503157,  0.25693908, -0.13746639, -0.06378681, -0.2937861 ,
        -0.2938108 ,  0.03345898, -0.45815086, -0.45607626, -0.91071063,
        -0.7797466 , -0.5807737 , -0.34890455, -0.60276383, -0.49033943,
        -0.81330174, -0.4416928 , -0.88592136, -0.7070263 ,  0.02908602]])
        Z_MIN = 0.0
        Z_MAX = 3.0
        BUMP_THRESHOLD = 0.1
        SHARPEN_ALPHA = 1.2
        X_VALS = np.linspace(Z_MIN, Z_MAX, 100)

        cls.test_data = {
                "gen_func": flexzboost,
                "ctor_data": {"weights": WEIGHTS,
                           "basis_system_enum_value": BasisSystem.cosine.value, 
                           "z_min": Z_MIN,
                           "z_max": Z_MAX,
                           "bump_threshold": BUMP_THRESHOLD,
                           "sharpen_alpha": SHARPEN_ALPHA},
                "test_xvals": X_VALS,
                "weights": WEIGHTS
        }


[docs]flexzboost = FlexzboostGen.create
[docs]flexzboost_create_from_basis_coef_object = FlexzboostGen.create_from_basis_coef_object

add_class(FlexzboostGen)