Source code for madminer.likelihood.manipulate

import logging
import time

import numpy as np

from scipy.stats import chi2
from scipy.optimize import minimize

logger = logging.getLogger(__name__)


[docs]def fix_params(negative_log_likelihood, theta, fixed_components=None):
    """
    Function that reduces the dimensionality of a likelihood function by
    fixing some of the components.

    Parameters
    ----------
    negative_log_likelihood : likelihood
        Function returned by Likelihood class (for example
        NeuralLikelihood.create_expected_negative_log_likelihood()`)
        which takes an n-dimensional input parameter.

    theta : list of float
        m-dimensional vector of coordinate which will be fixed.

    fixed_components : list of int or None, optional.
        m-dimensional vector of coordinate indices provided in theta.
        `fixed_components=[0,1]` will fix the 1st and 2nd
        component of the parameter point. If None, uses [0, ..., m-1].

    Returns
    -------
    constrained_nll_negative_log_likelihood : likelihood
        Constrained likelihood function which takes an
        n-m dimensional input parameter.

    """
    if fixed_components is None:
        fixed_components = list(range(len(theta)))

    def constrained_nll(params):
        # Just return the expected Length
        n_dimension = negative_log_likelihood(None)
        if params is None:
            return n_dimension - len(fixed_components)

        # Process input
        if len(theta) != len(fixed_components):
            raise ValueError("Length of fixed_components and theta should be the same")
        if len(params) + len(fixed_components) != n_dimension:
            raise ValueError(f"Length of params should be {n_dimension-len(fixed_components)}")

        # Initialize full parameters
        params_full = np.zeros(n_dimension)

        # fill fixed components
        for icomp, thetacomp in zip(fixed_components, theta):
            params_full[icomp] = thetacomp

        # fill other components
        iparam = 0
        for i in range(len(params_full)):
            if i not in fixed_components:
                params_full[i] = params[iparam]
                iparam += 1

        # Return
        params_full = np.array(params_full)
        return negative_log_likelihood(params_full)

    return constrained_nll


[docs]def project_log_likelihood(
    negative_log_likelihood,
    remaining_components=None,
    grid_ranges=None,
    grid_resolutions=25,
    dof=None,
    thetas_eval=None,
):
    """
    Takes a likelihood function depending on N parameters, and evaluates
    for a set of M-dimensional parameter points (either grid or explicitly specified)
    while the remaining N-M parameters are set to zero.

    Parameters
    ----------
    negative_log_likelihood : likelihood
        Function returned by Likelihood class (for example
        NeuralLikelihood.create_expected_negative_log_likelihood()`).

    remaining_components : list of int or None , optional
        List with M entries, each an int with 0 <= remaining_components[i] < N.
        Denotes which parameters are kept, and their new order.
        All other parameters or projected out (set to zero). If None, all components
        are kept. Default: None

    grid_ranges : list of (tuple of float) or None, optional
        Specifies the boundaries of the parameter grid on which the p-values
        are evaluated. It should be `[(min, max), (min, max), ..., (min, max)]`,
        where the list goes over all parameters and `min` and `max` are
        float. If None, thetas_eval has to be given. Default: None.

    grid_resolutions : int or list of int, optional
        Resolution of the parameter space grid on which the p-values are
        evaluated. If int, the resolution is the same along every dimension
        of the hypercube. If list of int, the individual entries specify the number of
        points along each parameter individually. Doesn't have any effect if
        grid_ranges is None. Default value: 25.

    dof : int or None, optional
        If not None, sets the number of parameters for the calculation of the p-values.
        If None, the overall number of parameters is used. Default value: None.

    thetas_eval : ndarray or None , optional
        Manually specifies the parameter point at which the likelihood and p-values
        are evaluated. If None, grid_ranges and resolution are used instead to construct
        a regular grid. Default value: None.

    Returns
    -------
    parameter_grid : ndarray
        Parameter points at which the p-values are evaluated with shape
        `(n_grid_points, n_parameters)`.

    p_values : ndarray
        Observed p-values for each parameter point on the grid,
        with shape `(n_grid_points,)`.

    mle : int
        Index of the parameter point with the best fit (largest p-value
        / smallest -2 log likelihood ratio).

    log_likelihood_ratio : ndarray or None
        log likelihood ratio based only on kinematics for each point of the grid,
        with shape `(n_grid_points,)`.

    """

    # Components
    n_parameters = negative_log_likelihood(None)
    if remaining_components is None:
        remaining_components = range(n_parameters)
    m_paramaters = len(remaining_components)

    # DOF
    if dof is None:
        dof = m_paramaters

    # Theta grid
    if thetas_eval is None and grid_resolutions is None:
        raise ValueError("thetas_eval and grid_resolutions cannot both be None")
    elif thetas_eval is not None and grid_resolutions is not None:
        raise ValueError("thetas_eval and grid_resolutions cannot both be set, make up your mind!")
    elif thetas_eval is None:
        if isinstance(grid_resolutions, int):
            grid_resolutions = [grid_resolutions for _ in range(grid_ranges)]
        if len(grid_resolutions) != m_paramaters:
            raise ValueError("Dimension of grid should be the same as number of remaining components!")
        theta_each = []
        for resolution, (theta_min, theta_max) in zip(grid_resolutions, grid_ranges):
            theta_each.append(np.linspace(theta_min, theta_max, resolution))
        theta_grid_each = np.meshgrid(*theta_each, indexing="ij")
        theta_grid_each = [theta.flatten() for theta in theta_grid_each]
        theta_grid_mdim = np.vstack(theta_grid_each).T
    else:
        theta_grid_mdim = thetas_eval

    # Obtain a theta_grid in n dimensions
    theta_grid_ndim = []
    for theta_mdim in theta_grid_mdim:
        theta_ndim = np.zeros([n_parameters])
        for i, theta in zip(remaining_components, theta_mdim):
            theta_ndim[i] = theta
        theta_grid_ndim.append(theta_ndim)

    # evaluate -2 E[log r]
    log_r = np.array([-1.0 * negative_log_likelihood(theta) for theta in theta_grid_ndim])
    i_ml = np.argmax(log_r)
    log_r = log_r[:] - log_r[i_ml]
    m2_log_r = -2.0 * log_r
    p_value = chi2.sf(x=m2_log_r, df=dof)

    return theta_grid_mdim, p_value, i_ml, log_r


[docs]def profile_log_likelihood(
    negative_log_likelihood,
    remaining_components=None,
    grid_ranges=None,
    grid_resolutions=25,
    thetas_eval=None,
    theta_start=None,
    dof=None,
    method="TNC",
):
    """
    Takes a likelihood function depending on N parameters, and evaluates
    for a set of M-dimensional parameter points (either grid or explicitly specified)
    while the remaining N-M parameters are profiled over.

    Parameters
    ----------
    negative_log_likelihood : likelihood
        Function returned by Likelihood class (for example
        NeuralLikelihood.create_expected_negative_log_likelihood()`).

    remaining_components : list of int or None , optional
        List with M entries, each an int with 0 <= remaining_components[i] < N.
        Denotes which parameters are kept, and their new order.
        All other parameters or projected out (set to zero). If None, all components
        are kept. Default: None

    grid_ranges : list of (tuple of float) or None, optional
        Specifies the boundaries of the parameter grid on which the p-values
        are evaluated. It should be `[(min, max), (min, max), ..., (min, max)]`,
        where the list goes over all parameters and `min` and `max` are
        float. If None, thetas_eval has to be given. Default: None.

    grid_resolutions : int or list of int, optional
        Resolution of the parameter space grid on which the p-values are
        evaluated. If int, the resolution is the same along every dimension
        of the hypercube. If list of int, the individual entries specify the number of
        points along each parameter individually. Doesn't have any effect if
        grid_ranges is None. Default value: 25.

    thetas_eval : ndarray or None , optional
        Manually specifies the parameter point at which the likelihood and p-values
        are evaluated. If None, grid_ranges and resolution are used instead to construct
        a regular grid. Default value: None.

    theta_start : ndarray or None , optional
        Manually specifies a parameter point which is the starting point
        for the minimization algorithm which find the maximum likelihood point.
        If None, theta_start = 0 is used.
        Default is None.

    dof : int or None, optional
        If not None, sets the number of parameters for the calculation of the p-values.
        If None, the overall number of parameters is used. Default value: None.

    method : {"TNC", " L-BFGS-B"} , optional
        Minimization method used. Default value: "TNC"

    Returns
    -------
    parameter_grid : ndarray
        Parameter points at which the p-values are evaluated with shape
        `(n_grid_points, n_parameters)`.

    p_values : ndarray
        Observed p-values for each parameter point on the grid,
        with shape `(n_grid_points,)`.

    mle : int
        Index of the parameter point with the best fit (largest p-value
        / smallest -2 log likelihood ratio).

    log_likelihood_ratio : ndarray or None
        log likelihood ratio based only on kinematics for each point of the grid,
        with shape `(n_grid_points,)`.

    """

    # Components
    n_parameters = negative_log_likelihood(None)
    if remaining_components is None:
        remaining_components = range(n_parameters)
    m_parameters = len(remaining_components)

    # DOF
    if dof is None:
        dof = m_parameters

    # Method
    if method not in {"TNC", " L-BFGS-B"}:
        raise ValueError(f"Method {method} unknown.")

    # Initial guess for theta
    if theta_start is None:
        theta_start = np.zeros(n_parameters)

    # Theta grid
    if thetas_eval is None and grid_resolutions is None:
        raise ValueError("thetas_eval and grid_resolutions cannot both be None")
    elif thetas_eval is not None and grid_resolutions is not None:
        raise ValueError("thetas_eval and grid_resolutions cannot both be set, make up your mind!")
    elif thetas_eval is None:
        if isinstance(grid_resolutions, int):
            grid_resolutions = [grid_resolutions for _ in range(grid_ranges)]
        if len(grid_resolutions) != m_parameters:
            raise ValueError("Dimension of grid should be the same as number of remaining components!")
        theta_each = []
        for resolution, (theta_min, theta_max) in zip(grid_resolutions, grid_ranges):
            theta_each.append(np.linspace(theta_min, theta_max, resolution))
        theta_grid_each = np.meshgrid(*theta_each, indexing="ij")
        theta_grid_each = [theta.flatten() for theta in theta_grid_each]
        theta_grid_mdim = np.vstack(theta_grid_each).T
    else:
        theta_grid_mdim = thetas_eval

    # Obtain global minimum - Eq.(59) in 1805.00020
    result = minimize(negative_log_likelihood, x0=theta_start, method=method)
    best_fit_global = result.x

    # scan over grid
    log_r = []
    pscan = 0.01
    start_time = time.time()
    for iscan, theta_mdim in enumerate(theta_grid_mdim):
        # logger output
        if iscan / len(theta_grid_mdim) > pscan:
            elapsed_time = time.time() - start_time
            logger.info("Processed %s %% of parameter points in %.1f seconds.", pscan * 100, elapsed_time)
            while iscan / len(theta_grid_mdim) > pscan:
                if pscan > 0.095:
                    pscan += 0.1
                else:
                    pscan += 0.01

        # fix some parameters
        constrained_negative_log_likelihood = fix_params(
            negative_log_likelihood, theta=theta_mdim, fixed_components=remaining_components
        )

        # obtain starting point
        theta0 = []
        for i, theta in enumerate(theta_start):
            if i not in remaining_components:
                theta0.append(theta)

        # obtain local minimum - Eq.(58) in 1805.00020
        result = minimize(constrained_negative_log_likelihood, x0=np.array(theta0), method=method)
        best_fit_constrained = result.x

        # Expected Log Likelihood - Eq.(57) in 1805.00020
        profiled_logr = -1.0 * (
            constrained_negative_log_likelihood(best_fit_constrained) - negative_log_likelihood(best_fit_global)
        )
        log_r.append(profiled_logr)

    # evaluate p_value and best fit point
    logr = np.array(log_r)
    i_ml = np.argmax(log_r)
    log_r = log_r[:] - log_r[i_ml]
    m2_log_r = -2.0 * log_r
    p_value = chi2.sf(x=m2_log_r, df=dof)

    return theta_grid_mdim, p_value, i_ml, log_r