Source code for qp_flexzboost.flexzboost_pdf

"""This implements a PDF sub-class specifically for FlexZBoost"""
from enum import Enum
from typing import List

import numpy as np
from flexcode.basis_functions import BasisCoefs
from qp.factory import add_class
from qp.pdf_gen import Pdf_rows_gen
from qp.plotting import get_axes_and_xlims, plot_pdf_on_axes
from qp.utils import CASE_FACTOR, CASE_PRODUCT, get_eval_case, interpolate_multi_x_y
from scipy.stats import rv_continuous


# pylint: disable=invalid-name
[docs]class BasisSystem(Enum): """This enumerates the various basis systems that FlexCode supports Parameters ---------- Enum : enum This enum inherits from the Enum class. """
[docs] cosine = 1
[docs] Fourier = 2
[docs] db4 = 3
# pylint: disable=too-many-arguments,too-many-instance-attributes
[docs]class FlexzboostGen(Pdf_rows_gen): """Distribution based on weighted basis functions output from FlexZBoost. Notes ----- This class is meant primarily to be a compact storage mechanism for output from FlexCode. """ # pylint: disable=protected-access
[docs] name = "flexzboost"
[docs] version = 0
[docs] _support_mask = rv_continuous._support_mask
def __init__( self, weights: List[List[float]], basis_system_enum_value: int, z_min: float, z_max: float, bump_threshold: float, sharpen_alpha: float, *args, **kwargs ): """This is the primary constructor for this `qp` generator. Parameters ---------- weights : List[List[float]] A list of lists were each element is a floating point value. The weights represent the contribution of each basis function to the final PDF. The shape of `weights` should be N x b, where N = number of PDFs and b = number of basis functions. basis_system_enum_value : int The enum id to define the FlexCode basis system used to produce the results z_min : float The minimum z value considered when producing results with FlexCode z_max : float The maximum z value considered when producing results with FlexCode bump_threshold : float A parameter used by FlexCode to remove small bumps from the results sharpen_alpha : float A parameter used by FlexCode to sharpen peaks in the results Returns ------- flexzboost_gen PDF generator for FlexZBoost distributions Notes ----- The argument list of this constructor is admittedly rather long. This approach makes it easier to interface with the greater `qp` infrastructure. To ease the burden on the user, there is a classmethod that allows passing a single parameter, a `Flexcode:BasisCoefs` object, that is unpacked. See the method `FlexzboostGen:create_from_basis_coef_object`. """ # kwargs['shape'] is used to by the parent class to define the total # number of PDFs stored in this generator object. kwargs["shape"] = np.asarray(weights).shape[:-1] super().__init__(*args, **kwargs) self._weights = np.asarray(weights) self._basis_system_enum_value = basis_system_enum_value self._z_min = z_min self._z_max = z_max self._bump_threshold = None self._sharpen_alpha = None # These two assignments all the use of property.setter functions, which # encapsulate some type checking and will also update the parent class # metadata as needed. self.bump_threshold = bump_threshold self.sharpen_alpha = sharpen_alpha self._basis_coefficients = self._build_basis_coef_object() self._xvals = None self._yvals = None self._ycumul = None self._addmetadata("basis_system_enum_value", self._basis_system_enum_value) self._addmetadata("z_min", self._z_min) self._addmetadata("z_max", self._z_max) self._addmetadata("bump_threshold", self._bump_threshold) self._addmetadata("sharpen_alpha", self._sharpen_alpha) self._addobjdata("weights", self._weights) @property
[docs] def basis_system_enum(self) -> BasisSystem: """Return the BasisSystem enum for this object. Returns ------- BasisSystem The BasisSystem enum that defines the basis system used for these results. """ return BasisSystem(self._basis_system_enum_value)
@property
[docs] def z_min(self) -> float: """Return the minimum z value used for the results stored in this object. Returns ------- float Minimum z value used to predict these results """ return self._z_min
@property
[docs] def z_max(self) -> float: """Return the maximum z value used for the results stored in this object. Returns ------- float Maximum z value used to predict these results """ return self._z_max
@property
[docs] def bump_threshold(self) -> float: """Return the bump threshold used for the results stored in this object. Returns ------- float Bump threshold value used to predict these results """ return self._bump_threshold
@bump_threshold.setter def bump_threshold(self, new_bump_threshold): """This is a setter for bump threshold that allows users to modify the parameter on the fly without rerunning the model. The conditional logic is a byproduct of the way that scipy will pass values to the __init__ method when taking a slice of an ensemble containing this generator. `new_bump_threshold` can be passed in as a 0 dimensional numpy array. For floats this is fine, but the comparison logic in Flexcode breaks when a numpy 0 dimensional array (i.e. scalar) `None` value is passed in. To account for this, we explicitly assign `None` when we detect a None-like input. Parameters ---------- new_bump_threshold : float The new bump threshold to use in the BasisCoefs object. """ # We use the `==` comparison because Numpy will broadcast the contents # of new_bump_threshold appropriately. # pylint: disable-next=singleton-comparison if new_bump_threshold == None: self._bump_threshold = None else: self._bump_threshold = new_bump_threshold # _addmetadata updates the parent class, so that slices into an ensemble # will create new instances of this class with the correct values. self._addmetadata("bump_threshold", self._bump_threshold) self._update_basis_coef_object() @property
[docs] def sharpen_alpha(self) -> float: """Return the sharpen alpha used for the results stored in this object. Returns ------- float Sharpen alpha value used to predict these results """ return self._sharpen_alpha
@sharpen_alpha.setter def sharpen_alpha(self, new_sharpen_alpha): """This is a setter for sharpen alpha that allows users to modify the parameter on the fly without rerunning the model. The conditional logic is a byproduct of the way that scipy will pass values to the __init__ method when taking a slice of an ensemble containing this generator. `new_sharpen_alpha` can be passed in as a 0 dimensional numpy array. For floats this is fine, but the comparison logic in Flexcode breaks when a numpy 0 dimensional array (i.e. scalar) `None` value is passed in. To account for this, we explicitly assign `None` when we detect a None-like input. Parameters ---------- new_sharpen_alpha : float The new sharpen parameter to use in the BasisCoefs object. """ # We use the `==` comparison because Numpy will broadcast the contents # of new_sharpen_alpha appropriately. # pylint: disable-next=singleton-comparison if new_sharpen_alpha == None: self._sharpen_alpha = None else: self._sharpen_alpha = new_sharpen_alpha # _addmetadata updates the parent class, so that slices into an ensemble # will create new instances of this class with the correct values. self._addmetadata("sharpen_alpha", self._sharpen_alpha) self._update_basis_coef_object() @property
[docs] def basis_coefficients(self) -> BasisCoefs: """Return the BasisCoef object that was used to instantiate this object. Returns ------- BasisCoefs Object used to initialize the class instance """ return self._basis_coefficients
[docs] def _build_basis_coef_object(self): """Private method that builds and returns a `FlexCode:BasisCoefs` object from the constructor parameters. Returns ------- BasisCoefs Object used to initialize the class instance """ return BasisCoefs( coefs=None, basis_system=BasisSystem(self._basis_system_enum_value).name, z_min=self._z_min, z_max=self._z_max, bump_threshold=self._bump_threshold, sharpen_alpha=self._sharpen_alpha, )
[docs] def _update_basis_coef_object(self): """Simple method to update the `BasisCoefs` object 'in place'.""" self._basis_coefficients = self._build_basis_coef_object()
[docs] def _calculate_yvals_if_needed(self, xvals: List[float]) -> None: """If self._yvals is None or the xvals have changed, reevaluate the y values. Parameters ---------- xvals : List[float] The x-values to evaluate the basis function. """ if self._yvals is None or xvals is not self._xvals: self._evaluate_basis_coefficients(xvals)
[docs] def _evaluate_basis_coefficients(self, xvals: List[float]) -> None: """Assign the list of x values to self._xvals. Use that grid to evaluate the y_values of PDFs using the weights and parameters stored in self._basis_coefficients. Parameters ---------- xvals : List[float] The x-values to evaluate the analytical PDFs Notes ----- We'll maintain a copy of the x values in memory for this object, but it won't be stored to disk. FlexCode requires that the x values be reshaped, we'll do that in the call to `.evaluate`, but we won't keep the reshaped x values in memory. The `.evaluate` method expects the `BasisCoefs` object to contain the output weights. So we'll add the weights back to the object for evaluation, and then remove them when we've completed `evaluation`. We do this to ensure that the value of weights is not accidentally stored to disk twice. Once as `self._weights`, and once as `self._basis_coefficients.coefs`. If storage to disk wasn't a concern, then this wouldn't be a problem. Due to the way that Python maintains references to values in memory, assigning the same values to `self._weights` and `self._basis_coefficients.coefs` doesn't actually use 2x the memory. """ self._xvals = xvals self._basis_coefficients.coefs = self._weights #! I think that `evaluate` will accept a 2d array of values where each row # is a PDF and each column is an x value) But for now, let's stick with # the simple case of one set of x values for all PDFs. Ultimately we # might need to do something different with the self._xvals.reshape(-1,1) self._yvals = self._basis_coefficients.evaluate(self._xvals.reshape(-1, 1)) self._basis_coefficients.coefs = None
[docs] def _compute_ycumul(self, xvals: List[float]) -> None: """Compute the cumulative values of y given an x grid Parameters ---------- xvals : List[float] The x-values to evaluate the cumulative y value. """ self._evaluate_basis_coefficients(xvals) # Do the magic to calculate cumulative values of y copy_shape = np.array(self._yvals.shape) self._ycumul = np.ndarray(copy_shape) self._ycumul[:, 0] = 0.5 * self._yvals[:, 0] * (self._xvals[1] - self._xvals[0]) self._ycumul[:, 1:] = np.cumsum( (self._xvals[1:] - self._xvals[:-1]) * 0.5 * np.add(self._yvals[:, 1:], self._yvals[:, :-1]), axis=1, )
# pylint: disable-next=arguments-differ
[docs] def _pdf(self, x: List[float], row: List[int]) -> List[List[float]]: """Return the numerical PDFs, evaluated on the grid, `x`. Parameters ---------- x : List[float] The x-values to evaluate the analytical PDFs row : List[int], optional The indices for which numerical PDFs should be generated Returns ------- List[List[float]] A list of lists corresponding to individual PDF's y-values. Each of the outer lists is a single PDF. The elements of the inner list are the resulting y-values corresponding to the input x-values. """ # Calculate yvals for the given x's, if needed case_idx, xx, _ = get_eval_case(x, row) if case_idx in [CASE_PRODUCT, CASE_FACTOR]: self._calculate_yvals_if_needed(xx) return self._yvals.ravel() raise ValueError("Only CASE_PRODUCT and CASE_FACTOR are supported.")
# pylint: disable-next=arguments-differ
[docs] def _cdf(self, x: List[float], row: List[int]) -> List[List[float]]: """Return the numerical CDF, evaluated on the grid, `x`. Parameters ---------- x : List[float] The x-values to evaluate the analytical CDFs row : List[int], optional The indices for which numerical CDFs should be generated Returns ------- List[List[float]] A list of lists corresponding to individual CDF's y-values. Each of the outer lists is a single CDF. The elements of the inner list are the resulting y-values corresponding to the input x-values. """ case_idx, xx, _ = get_eval_case(x, row) if case_idx in [CASE_PRODUCT, CASE_FACTOR]: self._compute_ycumul(xx) else: raise ValueError("Only CASE_PRODUCT and CASE_FACTOR are supported.") return self._ycumul.ravel()
# pylint: disable-next=arguments-differ
[docs] def _ppf(self, x: List[float], row: List[int]) -> List[List[float]]: """Return the numerical PPF, evaluated on the grid, `x`. Parameters ---------- x : List[float] The x-values to evaluate the analytical PPFs row : List[int], optional The indices for which numerical PPFs should be generated Returns ------- List[List[float]] A list of lists corresponding to individual PPF's y-values. Each of the outer lists is a single PPF. The elements of the inner list are the resulting y-values corresponding to the input x-values. """ self._xvals = np.linspace(self._z_min, self._z_max, 100) self._compute_ycumul(self._xvals) return interpolate_multi_x_y( x, row, self._ycumul, self._xvals, bounds_error=False, fill_value=(np.min(x), np.max(x)) ).ravel()
[docs] def _updated_ctor_param(self): """Specify the constructor parameters. This is required by scipy in order extend the rv_continuous class. Returns ------- dct dict Dictionary of the constructor arguments and object instance variables needed to create this object. """ dct = super()._updated_ctor_param() dct["weights"] = self._weights dct["basis_system_enum_value"] = self._basis_system_enum_value dct["z_min"] = self._z_min dct["z_max"] = self._z_max dct["bump_threshold"] = self._bump_threshold dct["sharpen_alpha"] = self._sharpen_alpha return dct
@classmethod
[docs] def create_from_basis_coef_object( cls, weights: List[List[float]], basis_coefficients_object: BasisCoefs, **kwargs ) -> Pdf_rows_gen: """This is a convenience method that allows the user to define a generator by passing a `BasisCoefs` object, instead of the typical 5 additional values. Parameters ---------- weights : List[List[float]] A list of lists were each element is a floating point value. The weights represent the contribution of each basis function to the final PDF. The shape of `weights` should be N x b, where N = number of PDFs and b = number of basis functions. basis_coefficients : BasisCoefs An object that contains the FlexZBoost output weights as well as the parameters required to define the set of basis functions. Returns ------- FlexzboostGen Returns an instance of this class. Note that FlexzboostGen is a subclass of Pdf_rows_gen, the return type defined in the method signature. """ generator_object = cls( weights=weights, basis_system_enum_value=BasisSystem[basis_coefficients_object.basis_system].value, z_min=basis_coefficients_object.z_min, z_max=basis_coefficients_object.z_max, bump_threshold=basis_coefficients_object.bump_threshold, sharpen_alpha=basis_coefficients_object.sharpen_alpha, ) return generator_object(**kwargs)
@classmethod
[docs] def get_allocation_kwds(cls, npdf, **kwargs): """Return the keywords necessary to create an 'empty' hdf5 file with npdf entries for iterative file write out. We only need to allocate the objdata columns, as the metadata can be written when we finalize the file. Parameters ---------- npdf : int The total number of PDFs that will be written out Returns ------- dict A dictionary that defines the storage requirements for this object. """ try: weights = kwargs["weights"] except KeyError as key_error: raise KeyError("Required argument `weights` was not included in kwargs") from key_error num_weights = np.shape(weights)[-1] return {"weights", ((npdf, num_weights), "f4")}
@classmethod
[docs] def plot_native(cls, pdf, **kwargs): """Plot the PDF in a way that is particular to this type of distribution Here we'll use interpolated x,y points derived from the weights and FlexCode evaluation parameters. """ axes, xlim, kwarg = get_axes_and_xlims(**kwargs) xvals = np.linspace(xlim[0], xlim[1], kwarg.pop("npts", 101)) return plot_pdf_on_axes(axes, pdf, xvals, **kwarg)
@classmethod
[docs] def add_mappings(cls): """ Add this classes mappings to the conversion dictionary """ cls._add_creation_method(cls.create, None) cls._add_creation_method(cls.create_from_basis_coef_object, "basis_coef_object")
@classmethod
[docs] def make_test_data(cls): """Make data for unit tests""" WEIGHTS = np.asarray( [ [ 0.99999994, 1.4135911, 1.3578598, 1.3848811, 1.1752609, 1.2507105, 0.96589327, 1.2579455, 1.1328095, 0.9338199, 1.3668357, 0.63097477, 0.19285281, -0.08388292, 0.05250954, -0.5464654, -0.3771514, -0.3948611, 0.13923086, -0.20495746, -0.58977485, -0.6391217, -0.46343976, -0.5011808, -0.01433064, 0.278602, 0.5333237, 0.826034, 0.06464108, 0.9108775, 0.6811071, 0.69773537, -0.11616451, -0.09364327, 0.63583785, ], [ 0.99999994, 1.3128049, 1.4268231, 1.3475941, 1.3009573, 1.1934606, 1.1979764, 1.4587557, 1.0695385, 1.0334687, 0.85049105, 0.6772867, 0.8599958, 0.7309471, 0.30866015, 0.10747848, 0.1454999, 0.4564285, 0.83178055, 0.9569013, 0.2805161, 0.35286552, 0.58561605, 0.42757383, 0.40403488, -0.5502439, 0.56439424, 0.21782365, 0.80970615, 0.6189492, 0.9209366, 0.01046925, -0.66917616, 0.0304801, -0.34911576, ], [ 0.99999994, 1.3046595, 1.3946912, 1.3725231, 1.3279371, 1.1379944, 1.1232849, 1.3168706, 1.1987064, 0.846475, 1.2190387, 1.0319941, 0.8385918, 0.72406054, 0.4407519, 1.0522529, 0.5317534, 0.82531404, 0.6055132, 0.42970878, 0.5682917, 0.42682788, -0.04017492, 0.32071114, 0.7407263, 0.20112868, 0.28844437, -0.01918357, 0.16105941, -0.9992142, -0.481242, -0.3728989, -0.39303133, -0.556516, -0.23944338, ], [ 0.99999994, 1.405193, 1.3786027, 1.3832911, 1.3786896, 1.1868116, 1.1039548, 1.056342, 1.253356, 1.275163, 1.5149004, 0.7893624, 1.1212736, 0.7551946, 0.1665442, 0.31703034, -0.3789813, 0.40208268, -0.00154649, -0.22578228, -0.754486, 0.09544089, -0.7406911, -1.5187913, -1.0511639, -0.9208054, -0.52502257, -0.79425025, 0.11232897, -0.5873992, -0.00291769, -1.2490546, 0.18622968, -0.4166289, -0.16232875, ], [ 0.99999994, 1.32483, 1.2688403, 0.8508245, 1.4554728, 1.2448467, 0.852745, 0.8741474, 1.0841464, 0.7697048, 1.1911153, 0.51762104, 1.1319616, 1.3946458, 0.82583827, 0.21972111, -0.16429716, -0.08124515, 0.0241714, -0.07269649, 0.04703106, 0.4027557, -1.1216148, -0.8540991, -0.7413664, -0.35533333, -0.47791988, -0.39957288, 0.1695733, -0.46430817, -0.07995562, -1.0972134, -0.61197704, -1.1898835, -0.75323683, ], [ 0.99999994, 1.4217128, 1.4090639, 1.3527906, 1.2788762, 1.0873253, 1.0570015, 1.1381446, 0.73468673, 0.4902846, 0.11609144, -0.43022275, -0.33087614, 0.3467521, 0.14698188, -0.79639876, -0.7686687, -1.0865113, -1.0686133, -1.0762304, -0.9354039, -0.79879427, -0.24612567, 0.01798107, -0.2094559, 0.24940334, 0.12473647, 0.10005763, 0.23591852, 0.33464774, 0.64543843, 0.24140209, 0.8614289, 0.10955815, -0.09307325, ], [ 0.99999994, -0.60270864, 0.3777081, 1.0040071, 0.5319608, 1.1732529, 0.21736576, 1.0385551, 0.85155064, 0.8202011, 0.7389486, 0.69682765, 0.1181715, 0.13482217, 0.7518282, 0.8588988, 0.2753361, 0.10158755, 0.53366745, 0.5017293, 0.22024332, 0.8345108, 0.3317933, 0.5323848, 0.741613, 0.215265, 0.3551328, 0.44486073, 0.07836582, 0.00493836, 0.583493, 0.23795973, 0.10176475, -0.08585434, -0.47022513, ], [ 0.99999994, 1.403194, 1.3613293, 1.2763977, 1.0978196, 1.0092797, 0.87263453, 0.63493866, 0.3737632, -0.02474818, 0.12842114, -0.31487998, -0.18406785, -0.42329717, -0.8819336, -0.887077, -0.913117, -1.1706294, -1.1096691, -0.46700883, -0.7291215, -0.20483486, -0.57670075, -0.5173913, 0.17409407, -0.34383368, 0.11131766, 0.29361913, 0.22329482, 0.4090505, 0.50041765, 1.040421, 0.7399761, 1.3841617, 1.0754173, ], [ 0.99999994, 0.5964216, 0.46396077, 1.2265164, 1.0870706, 1.1584536, 0.89783925, 0.7338294, 0.7884262, 0.41392878, 0.27348533, 0.60299355, -0.09960458, 0.6036693, -0.01055456, 0.32332683, -0.63185304, 0.11284541, -0.30345288, -0.72329307, -0.2737094, 0.03923929, -0.26043436, -0.5889996, 0.09375673, -0.27470988, -0.03649841, 0.1934136, -0.41822934, -0.38939086, -0.2009153, -0.1781136, 0.81968015, 0.5067288, 0.54687506, ], [ 0.99999994, 1.3862562, 1.3533832, 1.3327965, 1.3019644, 1.3206618, 1.3192286, 0.97659546, 1.0163264, 1.0176893, 0.57915735, 0.7081749, 0.7332014, 0.5191775, 0.07479973, 0.13503157, 0.25693908, -0.13746639, -0.06378681, -0.2937861, -0.2938108, 0.03345898, -0.45815086, -0.45607626, -0.91071063, -0.7797466, -0.5807737, -0.34890455, -0.60276383, -0.49033943, -0.81330174, -0.4416928, -0.88592136, -0.7070263, 0.02908602, ], ] ) Z_MIN = 0.0 Z_MAX = 3.0 BUMP_THRESHOLD = 0.1 SHARPEN_ALPHA = 1.2 X_VALS = np.linspace(Z_MIN, Z_MAX, 100) cls.test_data = { "gen_func": flexzboost, "ctor_data": { "weights": WEIGHTS, "basis_system_enum_value": BasisSystem.cosine.value, "z_min": Z_MIN, "z_max": Z_MAX, "bump_threshold": BUMP_THRESHOLD, "sharpen_alpha": SHARPEN_ALPHA, }, "test_xvals": X_VALS, "weights": WEIGHTS, }
[docs]flexzboost = FlexzboostGen.create
[docs]flexzboost_create_from_basis_coef_object = FlexzboostGen.create_from_basis_coef_object
add_class(FlexzboostGen)