import logging
import numpy as np
import time
from scipy.stats import chi2
from scipy.optimize import minimize
logger = logging.getLogger(__name__)
[docs]def fix_params(negative_log_likelihood, theta, fixed_components=None):
"""
Function that reduces the dimensionality of a likelihood function by
fixing some of the components.
Parameters
----------
negative_log_likelihood : likelihood
Function returned by Likelihood class (for example
NeuralLikelihood.create_expected_negative_log_likelihood()`)
which takes an n-dimensional input parameter.
theta : list of float
m-dimensional vector of coordinate which will be fixed.
fixed_components : list of int or None, optional.
m-dimensional vector of coordinate indices provided in theta.
`fixed_components=[0,1]` will fix the 1st and 2nd
component of the parameter point. If None, uses [0, ..., m-1].
Returns
-------
constrained_nll_negative_log_likelihood : likelihood
Constrained likelihood function which takes an
n-m dimensional input parameter.
"""
if fixed_components is None:
fixed_components = list(range(len(theta)))
def constrained_nll(params):
# Just return the expected Length
n_dimension = negative_log_likelihood(None)
if params is None:
return n_dimension - len(fixed_components)
# Process input
if len(theta) != len(fixed_components):
raise ValueError("Length of fixed_components and theta should be the same")
if len(params) + len(fixed_components) != n_dimension:
raise ValueError(f"Length of params should be {n_dimension-len(fixed_components)}")
# Initialize full parameters
params_full = np.zeros(n_dimension)
# fill fixed components
for icomp, thetacomp in zip(fixed_components, theta):
params_full[icomp] = thetacomp
# fill other components
iparam = 0
for i in range(len(params_full)):
if i not in fixed_components:
params_full[i] = params[iparam]
iparam += 1
# Return
params_full = np.array(params_full)
return negative_log_likelihood(params_full)
return constrained_nll
[docs]def project_log_likelihood(
negative_log_likelihood,
remaining_components=None,
grid_ranges=None,
grid_resolutions=25,
dof=None,
thetas_eval=None,
):
"""
Takes a likelihood function depending on N parameters, and evaluates
for a set of M-dimensional parameter points (either grid or explicitly specified)
while the remaining N-M parameters are set to zero.
Parameters
----------
negative_log_likelihood : likelihood
Function returned by Likelihood class (for example
NeuralLikelihood.create_expected_negative_log_likelihood()`).
remaining_components : list of int or None , optional
List with M entries, each an int with 0 <= remaining_components[i] < N.
Denotes which parameters are kept, and their new order.
All other parameters or projected out (set to zero). If None, all components
are kept. Default: None
grid_ranges : list of (tuple of float) or None, optional
Specifies the boundaries of the parameter grid on which the p-values
are evaluated. It should be `[(min, max), (min, max), ..., (min, max)]`,
where the list goes over all parameters and `min` and `max` are
float. If None, thetas_eval has to be given. Default: None.
grid_resolutions : int or list of int, optional
Resolution of the parameter space grid on which the p-values are
evaluated. If int, the resolution is the same along every dimension
of the hypercube. If list of int, the individual entries specify the number of
points along each parameter individually. Doesn't have any effect if
grid_ranges is None. Default value: 25.
dof : int or None, optional
If not None, sets the number of parameters for the calculation of the p-values.
If None, the overall number of parameters is used. Default value: None.
thetas_eval : ndarray or None , optional
Manually specifies the parameter point at which the likelihood and p-values
are evaluated. If None, grid_ranges and resolution are used instead to construct
a regular grid. Default value: None.
Returns
-------
parameter_grid : ndarray
Parameter points at which the p-values are evaluated with shape
`(n_grid_points, n_parameters)`.
p_values : ndarray
Observed p-values for each parameter point on the grid,
with shape `(n_grid_points,)`.
mle : int
Index of the parameter point with the best fit (largest p-value
/ smallest -2 log likelihood ratio).
log_likelihood_ratio : ndarray or None
log likelihood ratio based only on kinematics for each point of the grid,
with shape `(n_grid_points,)`.
"""
# Components
n_parameters = negative_log_likelihood(None)
if remaining_components is None:
remaining_components = range(n_parameters)
m_paramaters = len(remaining_components)
# DOF
if dof is None:
dof = m_paramaters
# Theta grid
if thetas_eval is None and grid_resolutions is None:
raise ValueError("thetas_eval and grid_resolutions cannot both be None")
elif thetas_eval is not None and grid_resolutions is not None:
raise ValueError("thetas_eval and grid_resolutions cannot both be set, make up your mind!")
elif thetas_eval is None:
if isinstance(grid_resolutions, int):
grid_resolutions = [grid_resolutions for _ in range(grid_ranges)]
if len(grid_resolutions) != m_paramaters:
raise ValueError("Dimension of grid should be the same as number of remaining components!")
theta_each = []
for resolution, (theta_min, theta_max) in zip(grid_resolutions, grid_ranges):
theta_each.append(np.linspace(theta_min, theta_max, resolution))
theta_grid_each = np.meshgrid(*theta_each, indexing="ij")
theta_grid_each = [theta.flatten() for theta in theta_grid_each]
theta_grid_mdim = np.vstack(theta_grid_each).T
else:
theta_grid_mdim = thetas_eval
# Obtain a theta_grid in n dimensions
theta_grid_ndim = []
for theta_mdim in theta_grid_mdim:
theta_ndim = np.zeros([n_parameters])
for i, theta in zip(remaining_components, theta_mdim):
theta_ndim[i] = theta
theta_grid_ndim.append(theta_ndim)
# evaluate -2 E[log r]
log_r = np.array([-1.0 * negative_log_likelihood(theta) for theta in theta_grid_ndim])
i_ml = np.argmax(log_r)
log_r = log_r[:] - log_r[i_ml]
m2_log_r = -2.0 * log_r
p_value = chi2.sf(x=m2_log_r, df=dof)
return theta_grid_mdim, p_value, i_ml, log_r
[docs]def profile_log_likelihood(
negative_log_likelihood,
remaining_components=None,
grid_ranges=None,
grid_resolutions=25,
thetas_eval=None,
theta_start=None,
dof=None,
method="TNC",
):
"""
Takes a likelihood function depending on N parameters, and evaluates
for a set of M-dimensional parameter points (either grid or explicitly specified)
while the remaining N-M parameters are profiled over.
Parameters
----------
negative_log_likelihood : likelihood
Function returned by Likelihood class (for example
NeuralLikelihood.create_expected_negative_log_likelihood()`).
remaining_components : list of int or None , optional
List with M entries, each an int with 0 <= remaining_components[i] < N.
Denotes which parameters are kept, and their new order.
All other parameters or projected out (set to zero). If None, all components
are kept. Default: None
grid_ranges : list of (tuple of float) or None, optional
Specifies the boundaries of the parameter grid on which the p-values
are evaluated. It should be `[(min, max), (min, max), ..., (min, max)]`,
where the list goes over all parameters and `min` and `max` are
float. If None, thetas_eval has to be given. Default: None.
grid_resolutions : int or list of int, optional
Resolution of the parameter space grid on which the p-values are
evaluated. If int, the resolution is the same along every dimension
of the hypercube. If list of int, the individual entries specify the number of
points along each parameter individually. Doesn't have any effect if
grid_ranges is None. Default value: 25.
thetas_eval : ndarray or None , optional
Manually specifies the parameter point at which the likelihood and p-values
are evaluated. If None, grid_ranges and resolution are used instead to construct
a regular grid. Default value: None.
theta_start : ndarray or None , optional
Manually specifies a parameter point which is the starting point
for the minimization algorithm which find the maximum likelihood point.
If None, theta_start = 0 is used.
Default is None.
dof : int or None, optional
If not None, sets the number of parameters for the calculation of the p-values.
If None, the overall number of parameters is used. Default value: None.
method : {"TNC", " L-BFGS-B"} , optional
Mimization method used. Default value: "TNC"
Returns
-------
parameter_grid : ndarray
Parameter points at which the p-values are evaluated with shape
`(n_grid_points, n_parameters)`.
p_values : ndarray
Observed p-values for each parameter point on the grid,
with shape `(n_grid_points,)`.
mle : int
Index of the parameter point with the best fit (largest p-value
/ smallest -2 log likelihood ratio).
log_likelihood_ratio : ndarray or None
log likelihood ratio based only on kinematics for each point of the grid,
with shape `(n_grid_points,)`.
"""
# Components
n_parameters = negative_log_likelihood(None)
if remaining_components is None:
remaining_components = range(n_parameters)
m_paramaters = len(remaining_components)
# DOF
if dof is None:
dof = m_paramaters
# Method
if method not in {"TNC", " L-BFGS-B"}:
raise ValueError(f"Method {method} unknown.")
# Initial guess for theta
if theta_start is None:
theta_start = np.zeros(n_parameters)
# Theta grid
if thetas_eval is None and grid_resolutions is None:
raise ValueError("thetas_eval and grid_resolutions cannot both be None")
elif thetas_eval is not None and grid_resolutions is not None:
raise ValueError("thetas_eval and grid_resolutions cannot both be set, make up your mind!")
elif thetas_eval is None:
if isinstance(grid_resolutions, int):
grid_resolutions = [grid_resolutions for _ in range(grid_ranges)]
if len(grid_resolutions) != m_paramaters:
raise ValueError("Dimension of grid should be the same as number of remaining components!")
theta_each = []
for resolution, (theta_min, theta_max) in zip(grid_resolutions, grid_ranges):
theta_each.append(np.linspace(theta_min, theta_max, resolution))
theta_grid_each = np.meshgrid(*theta_each, indexing="ij")
theta_grid_each = [theta.flatten() for theta in theta_grid_each]
theta_grid_mdim = np.vstack(theta_grid_each).T
else:
theta_grid_mdim = thetas_eval
# Obtain global minimum - Eq.(59) in 1805.00020
result = minimize(negative_log_likelihood, x0=theta_start, method=method)
best_fit_global = result.x
# scan over grid
log_r = []
pscan = 0.01
start_time = time.time()
for iscan, theta_mdim in enumerate(theta_grid_mdim):
# logger output
if iscan / len(theta_grid_mdim) > pscan:
elapsed_time = time.time() - start_time
logger.info("Processed %s %% of parameter points in %.1f seconds.", pscan * 100, elapsed_time)
while iscan / len(theta_grid_mdim) > pscan:
if pscan > 0.095:
pscan += 0.1
else:
pscan += 0.01
# fix some parameters
constrained_negative_log_likelihood = fix_params(
negative_log_likelihood, theta=theta_mdim, fixed_components=remaining_components
)
# obtain starting point
theta0 = []
for i, theta in enumerate(theta_start):
if i not in remaining_components:
theta0.append(theta)
# obtain local minimum - Eq.(58) in 1805.00020
result = minimize(constrained_negative_log_likelihood, x0=np.array(theta0), method=method)
best_fit_constrained = result.x
# Expected Log Likelihood - Eq.(57) in 1805.00020
profiled_logr = -1.0 * (
constrained_negative_log_likelihood(best_fit_constrained) - negative_log_likelihood(best_fit_global)
)
log_r.append(profiled_logr)
# evaluate p_value and best fit point
logr = np.array(log_r)
i_ml = np.argmax(log_r)
log_r = log_r[:] - log_r[i_ml]
m2_log_r = -2.0 * log_r
p_value = chi2.sf(x=m2_log_r, df=dof)
return theta_grid_mdim, p_value, i_ml, log_r