from __future__ import absolute_import, division, print_function
import six
import logging
import os
import json
import numpy as np
from collections import OrderedDict
import torch
from madminer.utils.ml.models.maf import ConditionalMaskedAutoregressiveFlow
from madminer.utils.ml.models.maf_mog import ConditionalMixtureMaskedAutoregressiveFlow
from madminer.utils.ml.models.ratio import DenseSingleParameterizedRatioModel, DenseDoublyParameterizedRatioModel
from madminer.utils.ml.models.score import DenseLocalScoreModel
from madminer.utils.ml.eval import evaluate_flow_model, evaluate_ratio_model, evaluate_local_score_model
from madminer.utils.ml.utils import get_optimizer, get_loss
from madminer.utils.various import create_missing_folders, load_and_check, shuffle, restrict_samplesize
from madminer.utils.various import separate_information_blocks
from madminer.utils.ml.trainer import SingleParameterizedRatioTrainer, DoubleParameterizedRatioTrainer
from madminer.utils.ml.trainer import LocalScoreTrainer, FlowTrainer
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
logger = logging.getLogger(__name__)
[docs]class Estimator(object):
"""
Abstract class for any ML estimator. Subclassed by ParameterizedRatioEstimator, DoubleParameterizedRatioEstimator,
ScoreEstimator, and LikelihoodEstimator.
Each instance of this class represents one neural estimator. The most important functions are:
* `Estimator.train()` to train an estimator. The keyword `method` determines the inference technique
and whether a class instance represents a single-parameterized likelihood ratio estimator, a doubly-parameterized
likelihood ratio estimator, or a local score estimator.
* `Estimator.evaluate()` to evaluate the estimator.
* `Estimator.save()` to save the trained model to files.
* `Estimator.load()` to load the trained model from files.
Please see the tutorial for a detailed walk-through.
"""
def __init__(self, features=None, n_hidden=(100,), activation="tanh", dropout_prob=0.0):
self.features = features
self.n_hidden = n_hidden
self.activation = activation
self.dropout_prob = dropout_prob
self.model = None
self.n_observables = None
self.n_parameters = None
self.x_scaling_means = None
self.x_scaling_stds = None
[docs] def train(self, *args, **kwargs):
raise NotImplementedError
[docs] def evaluate_log_likelihood(self, *args, **kwargs):
"""
Log likelihood estimation. Signature depends on the type of estimator. The first returned value is the log
likelihood with shape `(n_thetas, n_x)`.
"""
raise NotImplementedError
[docs] def evaluate_log_likelihood_ratio(self, *args, **kwargs):
"""
Log likelihood ratio estimation. Signature depends on the type of estimator. The first returned value is the log
likelihood ratio with shape `(n_thetas, n_x)` or `(n_x)`.
"""
raise NotImplementedError
[docs] def evaluate_score(self, *args, **kwargs):
"""
Score estimation. Signature depends on the type of estimator. The only returned value is the score with shape
`(n_x)`.
"""
raise NotImplementedError
[docs] def evaluate(self, *args, **kwargs):
raise NotImplementedError
[docs] def save(self, filename, save_model=False):
"""
Saves the trained model to four files: a JSON file with the settings, a pickled pyTorch state dict
file, and numpy files for the mean and variance of the inputs (used for input scaling).
Parameters
----------
filename : str
Path to the files. '_settings.json' and '_state_dict.pl' will be added.
save_model : bool, optional
If True, the whole model is saved in addition to the state dict. This is not necessary for loading it
again with Estimator.load(), but can be useful for debugging, for instance to plot the computational graph.
Returns
-------
None
"""
logger.info("Saving model to %s", filename)
if self.model is None:
raise ValueError("No model -- train or load model before saving!")
# Check paths
create_missing_folders([os.path.dirname(filename)])
# Save settings
logger.debug("Saving settings to %s_settings.json", filename)
settings = self._wrap_settings()
with open(filename + "_settings.json", "w") as f:
json.dump(settings, f)
# Save scaling
if self.x_scaling_stds is not None and self.x_scaling_means is not None:
logger.debug("Saving input scaling information to %s_x_means.npy and %s_x_stds.npy", filename, filename)
np.save(filename + "_x_means.npy", self.x_scaling_means)
np.save(filename + "_x_stds.npy", self.x_scaling_stds)
# Save state dict
logger.debug("Saving state dictionary to %s_state_dict.pt", filename)
torch.save(self.model.state_dict(), filename + "_state_dict.pt")
# Save model
if save_model:
logger.debug("Saving model to %s_model.pt", filename)
torch.save(self.model, filename + "_model.pt")
[docs] def load(self, filename):
"""
Loads a trained model from files.
Parameters
----------
filename : str
Path to the files. '_settings.json' and '_state_dict.pl' will be added.
Returns
-------
None
"""
logger.info("Loading model from %s", filename)
# Load settings and create model
logger.debug("Loading settings from %s_settings.json", filename)
with open(filename + "_settings.json", "r") as f:
settings = json.load(f)
self._unwrap_settings(settings)
self._create_model()
# Load scaling
try:
self.x_scaling_means = np.load(filename + "_x_means.npy")
self.x_scaling_stds = np.load(filename + "_x_stds.npy")
logger.debug(
" Found input scaling information: means %s, stds %s", self.x_scaling_means, self.x_scaling_stds
)
except FileNotFoundError:
logger.warning("Scaling information not found in %s", filename)
self.x_scaling_means = None
self.x_scaling_stds = None
# Load state dict
logger.debug("Loading state dictionary from %s_state_dict.pt", filename)
self.model.load_state_dict(torch.load(filename + "_state_dict.pt", map_location="cpu"))
def _transform_inputs(self, x):
if self.x_scaling_means is not None and self.x_scaling_stds is not None:
x_scaled = x - self.x_scaling_means
x_scaled /= self.x_scaling_stds
else:
x_scaled = x
return x_scaled
def _wrap_settings(self):
settings = {
"n_observables": self.n_observables,
"n_parameters": self.n_parameters,
"features": self.features,
"n_hidden": list(self.n_hidden),
"activation": self.activation,
"dropout_prob": self.dropout_prob,
}
return settings
def _unwrap_settings(self, settings):
try:
_ = str(settings["estimator_type"])
except KeyError:
raise RuntimeError(
"Can't find estimator type information in file. Maybe this file was created with"
" an incompatible MadMiner version < v0.3.0?"
)
self.n_observables = int(settings["n_observables"])
self.n_parameters = int(settings["n_parameters"])
self.n_hidden = tuple([int(item) for item in settings["n_hidden"]])
self.activation = str(settings["activation"])
self.features = settings["features"]
if self.features == "None":
self.features = None
if self.features is not None:
self.features = list([int(item) for item in self.features])
try:
self.dropout_prob = float(settings["dropout_prob"])
except KeyError:
self.dropout_prob = 0.0
logger.info(
"Can't find dropout probability in model file. Probably this file was created with an older"
" MadMiner version < 0.6.1. That's totally fine, we'll just stick to the default of 0 (no"
" dropout)."
)
def _create_model(self):
raise NotImplementedError
[docs]class ConditionalEstimator(Estimator):
"""
Abstract class for estimator that is conditional on theta. Subclassed by ParameterizedRatioEstimator,
DoubleParameterizedRatioEstimator, and LikelihoodEstimator (but not ScoreEstimator).
Adds functionality to rescale parameters.
"""
def __init__(self, features=None, n_hidden=(100,), activation="tanh", dropout_prob=0.0):
super(ConditionalEstimator, self).__init__(features, n_hidden, activation, dropout_prob)
self.theta_scaling_means = None
self.theta_scaling_stds = None
[docs] def save(self, filename, save_model=False):
"""
Saves the trained model to four files: a JSON file with the settings, a pickled pyTorch state dict
file, and numpy files for the mean and variance of the inputs (used for input scaling).
Parameters
----------
filename : str
Path to the files. '_settings.json' and '_state_dict.pl' will be added.
save_model : bool, optional
If True, the whole model is saved in addition to the state dict. This is not necessary for loading it
again with Estimator.load(), but can be useful for debugging, for instance to plot the computational graph.
Returns
-------
None
"""
super(ConditionalEstimator, self).save(filename, save_model)
# Save param scaling
if self.theta_scaling_stds is not None and self.theta_scaling_means is not None:
logger.debug(
"Saving parameter scaling information to %s_theta_means.npy and %s_theta_stds.npy", filename, filename
)
np.save(filename + "_theta_means.npy", self.theta_scaling_means)
np.save(filename + "_theta_stds.npy", self.theta_scaling_stds)
[docs] def load(self, filename):
"""
Loads a trained model from files.
Parameters
----------
filename : str
Path to the files. '_settings.json' and '_state_dict.pl' will be added.
Returns
-------
None
"""
super(ConditionalEstimator, self).load(filename)
# Load param scaling
try:
self.theta_scaling_means = np.load(filename + "_theta_means.npy")
self.theta_scaling_stds = np.load(filename + "_theta_stds.npy")
logger.debug(
" Found parameter scaling information: means %s, stds %s",
self.theta_scaling_means,
self.theta_scaling_stds,
)
except FileNotFoundError:
logger.warning("Parameter scaling information not found in %s", filename)
self.theta_scaling_means = None
self.theta_scaling_stds = None
def _transform_parameters(self, theta):
if self.theta_scaling_means is not None and self.theta_scaling_stds is not None:
theta_scaled = theta - self.theta_scaling_means[np.newaxis, :]
theta_scaled /= self.theta_scaling_stds[np.newaxis, :]
else:
theta_scaled = theta
return theta_scaled
def _transform_score(self, t_xz, inverse=False):
if self.theta_scaling_means is not None and self.theta_scaling_stds is not None and t_xz is not None:
if inverse:
t_xz_scaled = t_xz / self.theta_scaling_stds[np.newaxis, :]
else:
t_xz_scaled = t_xz * self.theta_scaling_stds[np.newaxis, :]
else:
t_xz_scaled = t_xz
return t_xz_scaled
[docs]class ParameterizedRatioEstimator(ConditionalEstimator):
"""
A neural estimator of the likelihood ratio as a function of the observation x as well as
the numerator hypothesis theta. The reference (denominator) hypothesis is kept fixed at some
reference value and NOT modeled by the network.
Parameters
----------
features : list of int or None, optional
Indices of observables (features) that are used as input to the neural networks. If None, all observables
are used. Default value: None.
n_hidden : tuple of int, optional
Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the
setup of each individual MADE layer. Default value: (100,).
activation : {'tanh', 'sigmoid', 'relu'}, optional
Activation function. Default value: 'tanh'.
"""
[docs] def train(
self,
method,
x,
y,
theta,
r_xz=None,
t_xz=None,
x_val=None,
y_val=None,
theta_val=None,
r_xz_val=None,
t_xz_val=None,
alpha=1.0,
optimizer="amsgrad",
n_epochs=50,
batch_size=128,
initial_lr=0.001,
final_lr=0.0001,
nesterov_momentum=None,
validation_split=0.25,
early_stopping=True,
scale_inputs=True,
shuffle_labels=False,
limit_samplesize=None,
memmap=False,
verbose="some",
scale_parameters=True,
n_workers=8,
):
"""
Trains the network.
Parameters
----------
method : str
The inference method used for training. Allowed values are 'alice', 'alices', 'carl', 'cascal', 'rascal',
and 'rolr'.
x : ndarray or str
Observations, or filename of a pickled numpy array.
y : ndarray or str
Class labels (0 = numeerator, 1 = denominator), or filename of a pickled numpy array.
theta : ndarray or str
Numerator parameter point, or filename of a pickled numpy array.
r_xz : ndarray or str or None, optional
Joint likelihood ratio, or filename of a pickled numpy array. Default value: None.
t_xz : ndarray or str or None, optional
Joint scores at theta, or filename of a pickled numpy array. Default value: None.
x_val : ndarray or str or None, optional
Validation observations, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
y_val : ndarray or str or None, optional
Validation labels (0 = numerator, 1 = denominator), or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
theta_val : ndarray or str or None, optional
Validation numerator parameter points, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
r_xz_val : ndarray or str or None, optional
Validation joint likelihood ratio, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
t_xz_val : ndarray or str or None, optional
Validation joint scores at theta, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
alpha : float, optional
Hyperparameter weighting the score error in the loss function of the 'alices', 'rascal', and 'cascal'
methods. Default value: 1.
optimizer : {"adam", "amsgrad", "sgd"}, optional
Optimization algorithm. Default value: "amsgrad".
n_epochs : int, optional
Number of epochs. Default value: 50.
batch_size : int, optional
Batch size. Default value: 128.
initial_lr : float, optional
Learning rate during the first epoch, after which it exponentially decays to final_lr. Default value:
0.001.
final_lr : float, optional
Learning rate during the last epoch. Default value: 0.0001.
nesterov_momentum : float or None, optional
If trainer is "sgd", sets the Nesterov momentum. Default value: None.
validation_split : float or None, optional
Fraction of samples used for validation and early stopping (if early_stopping is True). If None, the entire
sample is used for training and early stopping is deactivated. Default value: 0.25.
early_stopping : bool, optional
Activates early stopping based on the validation loss (only if validation_split is not None). Default value:
True.
scale_inputs : bool, optional
Scale the observables to zero mean and unit variance. Default value: True.
shuffle_labels : bool, optional
If True, the labels (`y`, `r_xz`, `t_xz`) are shuffled, while the observations (`x`) remain in their
normal order. This serves as a closure test, in particular as cross-check against overfitting: an estimator
trained with shuffle_labels=True should predict to likelihood ratios around 1 and scores around 0.
limit_samplesize : int or None, optional
If not None, only this number of samples (events) is used to train the estimator. Default value: None.
memmap : bool, optional.
If True, training files larger than 1 GB will not be loaded into memory at once. Default value: False.
verbose : {"all", "many", "some", "few", "none}, optional
Determines verbosity of training. Default value: "some".
scale_parameters : bool, optional
Whether parameters are rescaled to mean zero and unit variance before going into the neural network.
Default value: True.
Returns
-------
results: ndarray
Results from SingleParameterizedRatioTrainer.train or DoubleParameterizedRatioTrainer.train for example
"""
logger.info("Starting training")
logger.info(" Method: %s", method)
if method in ["cascal", "rascal", "alices"]:
logger.info(" alpha: %s", alpha)
logger.info(" Batch size: %s", batch_size)
logger.info(" Optimizer: %s", optimizer)
logger.info(" Epochs: %s", n_epochs)
logger.info(" Learning rate: %s initially, decaying to %s", initial_lr, final_lr)
if optimizer == "sgd":
logger.info(" Nesterov momentum: %s", nesterov_momentum)
logger.info(" Validation split: %s", validation_split)
logger.info(" Early stopping: %s", early_stopping)
logger.info(" Scale inputs: %s", scale_inputs)
logger.info(" Scale parameters: %s", scale_parameters)
logger.info(" Shuffle labels %s", shuffle_labels)
if limit_samplesize is None:
logger.info(" Samples: all")
else:
logger.info(" Samples: %s", limit_samplesize)
# Load training data
logger.info("Loading training data")
memmap_threshold = 1.0 if memmap else None
theta = load_and_check(theta, memmap_files_larger_than_gb=memmap_threshold)
x = load_and_check(x, memmap_files_larger_than_gb=memmap_threshold)
y = load_and_check(y, memmap_files_larger_than_gb=memmap_threshold)
r_xz = load_and_check(r_xz, memmap_files_larger_than_gb=memmap_threshold)
t_xz = load_and_check(t_xz, memmap_files_larger_than_gb=memmap_threshold)
self._check_required_data(method, r_xz, t_xz)
# Infer dimensions of problem
n_samples = x.shape[0]
n_observables = x.shape[1]
n_parameters = theta.shape[1]
logger.info("Found %s samples with %s parameters and %s observables", n_samples, n_parameters, n_observables)
# Limit sample size
if limit_samplesize is not None and limit_samplesize < n_samples:
logger.info("Only using %s of %s training samples", limit_samplesize, n_samples)
x, theta, y, r_xz, t_xz = restrict_samplesize(limit_samplesize, x, theta, y, r_xz, t_xz)
# Validation data
external_validation = x_val is not None and y_val is not None and theta_val is not None
if external_validation:
theta_val = load_and_check(theta_val, memmap_files_larger_than_gb=memmap_threshold)
x_val = load_and_check(x_val, memmap_files_larger_than_gb=memmap_threshold)
y_val = load_and_check(y_val, memmap_files_larger_than_gb=memmap_threshold)
r_xz_val = load_and_check(r_xz_val, memmap_files_larger_than_gb=memmap_threshold)
t_xz_val = load_and_check(t_xz_val, memmap_files_larger_than_gb=memmap_threshold)
logger.info("Found %s separate validation samples", x_val.shape[0])
assert x_val.shape[1] == n_observables
assert theta_val.shape[1] == n_parameters
if r_xz is not None:
assert r_xz_val is not None, "When providing r_xz and sep. validation data, also provide r_xz_val"
if t_xz is not None:
assert t_xz_val is not None, "When providing t_xz and sep. validation data, also provide t_xz_val"
# Scale features
if scale_inputs:
self.initialize_input_transform(x, overwrite=False)
x = self._transform_inputs(x)
if external_validation:
x_val = self._transform_inputs(x_val)
else:
self.initialize_input_transform(x, False, overwrite=False)
# Scale parameters
if scale_parameters:
logger.info("Rescaling parameters")
self.initialize_parameter_transform(theta)
theta = self._transform_parameters(theta)
t_xz = self._transform_score(t_xz, inverse=False)
if external_validation:
t_xz_val = self._transform_score(t_xz_val, inverse=False)
else:
self.initialize_parameter_transform(theta, False)
# Shuffle labels
if shuffle_labels:
logger.info("Shuffling labels")
y, r_xz, t_xz = shuffle(y, r_xz, t_xz)
# Features
if self.features is not None:
x = x[:, self.features]
logger.info("Only using %s of %s observables", x.shape[1], n_observables)
n_observables = x.shape[1]
if external_validation:
x_val = x_val[:, self.features]
# Check consistency of input with model
if self.n_observables is None:
self.n_observables = n_observables
if self.n_parameters is None:
self.n_parameters = n_parameters
if n_parameters != self.n_parameters:
raise RuntimeError(
"Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters)
)
if n_observables != self.n_observables:
raise RuntimeError(
"Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables)
)
# Data
data = self._package_training_data(method, x, theta, y, r_xz, t_xz)
if external_validation:
data_val = self._package_training_data(method, x_val, theta_val, y_val, r_xz_val, t_xz_val)
else:
data_val = None
# Create model
if self.model is None:
logger.info("Creating model")
self._create_model()
# Losses
loss_functions, loss_labels, loss_weights = get_loss(method, alpha)
# Optimizer
opt, opt_kwargs = get_optimizer(optimizer, nesterov_momentum)
# Train model
logger.info("Training model")
trainer = SingleParameterizedRatioTrainer(self.model, n_workers=n_workers)
result = trainer.train(
data=data,
data_val=data_val,
loss_functions=loss_functions,
loss_weights=loss_weights,
loss_labels=loss_labels,
epochs=n_epochs,
batch_size=batch_size,
optimizer=opt,
optimizer_kwargs=opt_kwargs,
initial_lr=initial_lr,
final_lr=final_lr,
validation_split=validation_split,
early_stopping=early_stopping,
verbose=verbose,
)
return result
[docs] def evaluate_log_likelihood_ratio(self, x, theta, test_all_combinations=True, evaluate_score=False):
"""
Evaluates the log likelihood ratio for given observations x betwen the given parameter point theta and the
reference hypothesis.
Parameters
----------
x : str or ndarray
Observations or filename of a pickled numpy array.
theta : ndarray or str
Parameter points or filename of a pickled numpy array.
test_all_combinations : bool, optional
If False, the number of samples in the observable and theta
files has to match, and the likelihood ratio is evaluated only for the combinations
`r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j`
are evaluated. Default value: True.
evaluate_score : bool, optional
Sets whether in addition to the likelihood ratio the score is evaluated. Default value: False.
Returns
-------
log_likelihood_ratio : ndarray
The estimated log likelihood ratio. If test_all_combinations is True, the result has shape
`(n_thetas, n_x)`. Otherwise, it has shape `(n_samples,)`.
score : ndarray or None
None if evaluate_score is False. Otherwise the derived estimated score at `theta0`. If test_all_combinations
is True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape
`(n_samples, n_parameters)`.
"""
if self.model is None:
raise ValueError("No model -- train or load model before evaluating it!")
# Load training data
logger.info("Loading evaluation data")
x = load_and_check(x)
theta = load_and_check(theta)
# Scale observables
x = self._transform_inputs(x)
theta = self._transform_parameters(theta)
# Restrict features
if self.features is not None:
x = x[:, self.features]
all_log_r_hat = []
all_t_hat = []
if test_all_combinations:
logger.info("Starting ratio evaluation for %s x-theta combinations", len(theta) * len(x))
for i, this_theta in enumerate(theta):
logger.debug("Starting ratio evaluation for thetas %s / %s: %s", i + 1, len(theta), this_theta)
_, log_r_hat, t_hat, _ = evaluate_ratio_model(
model=self.model,
method_type="parameterized_ratio",
theta0s=[this_theta],
theta1s=None,
xs=x,
evaluate_score=evaluate_score,
)
t_hat = self._transform_score(t_hat, inverse=True)
all_log_r_hat.append(log_r_hat)
all_t_hat.append(t_hat)
all_log_r_hat = np.array(all_log_r_hat)
all_t_hat = np.array(all_t_hat)
else:
logger.info("Starting ratio evaluation")
_, all_log_r_hat, all_t_hat, _ = evaluate_ratio_model(
model=self.model,
method_type="parameterized_ratio",
theta0s=theta,
theta1s=None,
xs=x,
evaluate_score=evaluate_score,
)
all_t_hat = self._transform_score(all_t_hat, inverse=True)
logger.info("Evaluation done")
return all_log_r_hat, all_t_hat
[docs] def evaluate_log_likelihood(self, *args, **kwargs):
raise TheresAGoodReasonThisDoesntWork(
"This estimator can only estimate likelihood ratios, not the likelihood " "itself!"
)
[docs] def evaluate_score(self, *args, **kwargs):
raise NotImplementedError("Please use evaluate_log_likelihood_ratio(evaluate_score=True).")
[docs] def evaluate(self, *args, **kwargs):
return self.evaluate_log_likelihood_ratio(*args, **kwargs)
def _create_model(self):
self.model = DenseSingleParameterizedRatioModel(
n_observables=self.n_observables,
n_parameters=self.n_parameters,
n_hidden=self.n_hidden,
activation=self.activation,
dropout_prob=self.dropout_prob,
)
@staticmethod
def _check_required_data(method, r_xz, t_xz):
if method in ["cascal", "alices", "rascal"] and t_xz is None:
raise RuntimeError("Method {} requires joint score information".format(method))
if method in ["rolr", "alice", "alices", "rascal"] and r_xz is None:
raise RuntimeError("Method {} requires joint likelihood ratio information".format(method))
@staticmethod
def _package_training_data(method, x, theta, y, r_xz, t_xz):
data = OrderedDict()
data["x"] = x
data["theta"] = theta
data["y"] = y
if method in ["rolr", "alice", "alices", "rascal"]:
data["r_xz"] = r_xz
if method in ["cascal", "alices", "rascal"]:
data["t_xz"] = t_xz
return data
def _wrap_settings(self):
settings = super(ParameterizedRatioEstimator, self)._wrap_settings()
settings["estimator_type"] = "parameterized_ratio"
return settings
def _unwrap_settings(self, settings):
super(ParameterizedRatioEstimator, self)._unwrap_settings(settings)
estimator_type = str(settings["estimator_type"])
if estimator_type != "parameterized_ratio":
raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type))
[docs]class DoubleParameterizedRatioEstimator(ConditionalEstimator):
"""
A neural estimator of the likelihood ratio as a function of the observation x, the numerator hypothesis theta0, and
the denominator hypothesis theta1.
Parameters
----------
features : list of int or None, optional
Indices of observables (features) that are used as input to the neural networks. If None, all observables
are used. Default value: None.
n_hidden : tuple of int, optional
Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the
setup of each individual MADE layer. Default value: (100,).
activation : {'tanh', 'sigmoid', 'relu'}, optional
Activation function. Default value: 'tanh'.
"""
[docs] def train(
self,
method,
x,
y,
theta0,
theta1,
r_xz=None,
t_xz0=None,
t_xz1=None,
x_val=None,
y_val=None,
theta0_val=None,
theta1_val=None,
r_xz_val=None,
t_xz0_val=None,
t_xz1_val=None,
alpha=1.0,
optimizer="amsgrad",
n_epochs=50,
batch_size=128,
initial_lr=0.001,
final_lr=0.0001,
nesterov_momentum=None,
validation_split=0.25,
early_stopping=True,
scale_inputs=True,
shuffle_labels=False,
limit_samplesize=None,
memmap=False,
verbose="some",
scale_parameters=True,
n_workers=8,
):
"""
Trains the network.
Parameters
----------
method : str
The inference method used for training. Allowed values are 'alice', 'alices', 'carl', 'cascal', 'rascal',
and 'rolr'.
x : ndarray or str
Observations, or filename of a pickled numpy array.
y : ndarray or str
Class labels (0 = numeerator, 1 = denominator), or filename of a pickled numpy array.
theta0 : ndarray or str
Numerator parameter point, or filename of a pickled numpy array.
theta1 : ndarray or str
Denominator parameter point, or filename of a pickled numpy array.
r_xz : ndarray or str or None, optional
Joint likelihood ratio, or filename of a pickled numpy array. Default value: None.
t_xz0 : ndarray or str or None, optional
Joint scores at theta0, or filename of a pickled numpy array. Default value: None.
t_xz1 : ndarray or str or None, optional
Joint scores at theta1, or filename of a pickled numpy array. Default value: None.
x_val : ndarray or str or None, optional
Validation observations, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
y_val : ndarray or str or None, optional
Validation labels (0 = numerator, 1 = denominator), or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
theta0_val : ndarray or str or None, optional
Validation numerator parameter points, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
theta1_val : ndarray or str or None, optional
Validation denominator parameter points, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
r_xz_val : ndarray or str or None, optional
Validation joint likelihood ratio, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
t_xz0_val : ndarray or str or None, optional
Validation joint scores at theta0, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
t_xz1_val : ndarray or str or None, optional
Validation joint scores at theta1, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
alpha : float, optional
Hyperparameter weighting the score error in the loss function of the 'alices', 'rascal', and 'cascal'
methods. Default value: 1.
optimizer : {"adam", "amsgrad", "sgd"}, optional
Optimization algorithm. Default value: "amsgrad".
n_epochs : int, optional
Number of epochs. Default value: 50.
batch_size : int, optional
Batch size. Default value: 128.
initial_lr : float, optional
Learning rate during the first epoch, after which it exponentially decays to final_lr. Default value:
0.001.
final_lr : float, optional
Learning rate during the last epoch. Default value: 0.0001.
nesterov_momentum : float or None, optional
If trainer is "sgd", sets the Nesterov momentum. Default value: None.
validation_split : float or None, optional
Fraction of samples used for validation and early stopping (if early_stopping is True). If None, the entire
sample is used for training and early stopping is deactivated. Default value: 0.25.
early_stopping : bool, optional
Activates early stopping based on the validation loss (only if validation_split is not None). Default value:
True.
scale_inputs : bool, optional
Scale the observables to zero mean and unit variance. Default value: True.
shuffle_labels : bool, optional
If True, the labels (`y`, `r_xz`, `t_xz`) are shuffled, while the observations (`x`) remain in their
normal order. This serves as a closure test, in particular as cross-check against overfitting: an estimator
trained with shuffle_labels=True should predict to likelihood ratios around 1 and scores around 0.
limit_samplesize : int or None, optional
If not None, only this number of samples (events) is used to train the estimator. Default value: None.
memmap : bool, optional.
If True, training files larger than 1 GB will not be loaded into memory at once. Default value: False.
verbose : {"all", "many", "some", "few", "none}, optional
Determines verbosity of training. Default value: "some".
Returns
-------
None
"""
logger.info("Starting training")
logger.info(" Method: %s", method)
if method in ["cascal", "rascal", "alices"]:
logger.info(" alpha: %s", alpha)
logger.info(" Batch size: %s", batch_size)
logger.info(" Optimizer: %s", optimizer)
logger.info(" Epochs: %s", n_epochs)
logger.info(" Learning rate: %s initially, decaying to %s", initial_lr, final_lr)
if optimizer == "sgd":
logger.info(" Nesterov momentum: %s", nesterov_momentum)
logger.info(" Validation split: %s", validation_split)
logger.info(" Early stopping: %s", early_stopping)
logger.info(" Scale inputs: %s", scale_inputs)
logger.info(" Shuffle labels %s", shuffle_labels)
if limit_samplesize is None:
logger.info(" Samples: all")
else:
logger.info(" Samples: %s", limit_samplesize)
# Load training data
logger.info("Loading training data")
memmap_threshold = 1.0 if memmap else None
theta0 = load_and_check(theta0, memmap_files_larger_than_gb=memmap_threshold)
theta1 = load_and_check(theta1, memmap_files_larger_than_gb=memmap_threshold)
x = load_and_check(x, memmap_files_larger_than_gb=memmap_threshold)
y = load_and_check(y, memmap_files_larger_than_gb=memmap_threshold)
r_xz = load_and_check(r_xz, memmap_files_larger_than_gb=memmap_threshold)
t_xz0 = load_and_check(t_xz0, memmap_files_larger_than_gb=memmap_threshold)
t_xz1 = load_and_check(t_xz1, memmap_files_larger_than_gb=memmap_threshold)
self._check_required_data(method, r_xz, t_xz0, t_xz1)
# Infer dimensions of problem
n_samples = x.shape[0]
n_observables = x.shape[1]
n_parameters = theta0.shape[1]
logger.info("Found %s samples with %s parameters and %s observables", n_samples, n_parameters, n_observables)
# Limit sample size
if limit_samplesize is not None and limit_samplesize < n_samples:
logger.info("Only using %s of %s training samples", limit_samplesize, n_samples)
x, theta0, theta1, y, r_xz, t_xz0, t_xz1 = restrict_samplesize(
limit_samplesize, x, theta0, theta1, y, r_xz, t_xz0, t_xz1
)
# Validation data
external_validation = (
x_val is not None and y_val is not None and theta0_val is not None and theta1_val is not None
)
if external_validation:
theta0_val = load_and_check(theta0_val, memmap_files_larger_than_gb=memmap_threshold)
theta1_val = load_and_check(theta1_val, memmap_files_larger_than_gb=memmap_threshold)
x_val = load_and_check(x_val, memmap_files_larger_than_gb=memmap_threshold)
y_val = load_and_check(y_val, memmap_files_larger_than_gb=memmap_threshold)
r_xz_val = load_and_check(r_xz_val, memmap_files_larger_than_gb=memmap_threshold)
t_xz0_val = load_and_check(t_xz0_val, memmap_files_larger_than_gb=memmap_threshold)
t_xz1_val = load_and_check(t_xz1_val, memmap_files_larger_than_gb=memmap_threshold)
logger.info("Found %s separate validation samples", x_val.shape[0])
assert x_val.shape[1] == n_observables
assert theta0_val.shape[1] == n_parameters
assert theta1_val.shape[1] == n_parameters
if r_xz is not None:
assert r_xz_val is not None, "When providing r_xz and sep. validation data, also provide r_xz_val"
if t_xz0 is not None:
assert t_xz0_val is not None, "When providing t_xz0 and sep. validation data, also provide t_xz0_val"
if t_xz1 is not None:
assert t_xz1_val is not None, "When providing t_xz1 and sep. validation data, also provide t_xz1_val"
# Scale features
if scale_inputs:
self.initialize_input_transform(x, overwrite=False)
x = self._transform_inputs(x)
if external_validation:
x_val = self._transform_inputs(x_val)
else:
self.initialize_input_transform(x, False, overwrite=False)
# Scale parameters
if scale_parameters:
logger.info("Rescaling parameters")
self.initialize_parameter_transform(np.concatenate((theta0, theta1), 0))
theta0 = self._transform_parameters(theta0)
theta1 = self._transform_parameters(theta1)
t_xz0 = self._transform_score(t_xz0, inverse=False)
t_xz1 = self._transform_score(t_xz1, inverse=False)
if external_validation:
t_xz0_val = self._transform_score(t_xz0_val, inverse=False)
t_xz1_val = self._transform_score(t_xz1_val, inverse=False)
else:
self.initialize_parameter_transform(np.concatenate((theta0, theta1), 0), False)
# Shuffle labels
if shuffle_labels:
logger.info("Shuffling labels")
y, r_xz, t_xz0, t_xz1 = shuffle(y, r_xz, t_xz0, t_xz1)
# Features
if self.features is not None:
x = x[:, self.features]
logger.info("Only using %s of %s observables", x.shape[1], n_observables)
n_observables = x.shape[1]
if external_validation:
x_val = x_val[:, self.features]
# Check consistency of input with model
if self.n_observables is None:
self.n_observables = n_observables
if self.n_parameters is None:
self.n_parameters = n_parameters
if n_parameters != self.n_parameters:
raise RuntimeError(
"Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters)
)
if n_observables != self.n_observables:
raise RuntimeError(
"Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables)
)
# Data
data = self._package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1)
if external_validation:
data_val = self._package_training_data(
method, x_val, theta0_val, theta1_val, y_val, r_xz_val, t_xz0_val, t_xz1_val
)
else:
data_val = None
# Create model
if self.model is None:
logger.info("Creating model")
self._create_model()
# Losses
loss_functions, loss_labels, loss_weights = get_loss(method + "2", alpha)
# Optimizer
opt, opt_kwargs = get_optimizer(optimizer, nesterov_momentum)
# Train model
logger.info("Training model")
trainer = DoubleParameterizedRatioTrainer(self.model, n_workers=n_workers)
result = trainer.train(
data=data,
data_val=data_val,
loss_functions=loss_functions,
loss_weights=loss_weights,
loss_labels=loss_labels,
epochs=n_epochs,
batch_size=batch_size,
optimizer=opt,
optimizer_kwargs=opt_kwargs,
initial_lr=initial_lr,
final_lr=final_lr,
validation_split=validation_split,
early_stopping=early_stopping,
verbose=verbose,
)
return result
[docs] def evaluate_log_likelihood_ratio(self, x, theta0, theta1, test_all_combinations=True, evaluate_score=False):
"""
Evaluates the log likelihood ratio as a function of the observation x, the numerator hypothesis theta0, and
the denominator hypothesis theta1.
Parameters
----------
x : str or ndarray
Observations or filename of a pickled numpy array.
theta0 : ndarray or str
Numerator parameter points or filename of a pickled numpy array.
theta1 : ndarray or str
Denominator parameter points or filename of a pickled numpy array.
test_all_combinations : bool, optional
If False, the number of samples in the observable and theta
files has to match, and the likelihood ratio is evaluated only for the combinations
`r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j`
are evaluated. Default value: True.
evaluate_score : bool, optional
Sets whether in addition to the likelihood ratio the score is evaluated. Default value: False.
Returns
-------
log_likelihood_ratio : ndarray
The estimated log likelihood ratio. If test_all_combinations is True, the result has shape
`(n_thetas, n_x)`. Otherwise, it has shape `(n_samples,)`.
score0 : ndarray or None
None if evaluate_score is False. Otherwise the derived estimated score at `theta0`. If test_all_combinations
is True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape
`(n_samples, n_parameters)`.
score1 : ndarray or None
None if evaluate_score is False. Otherwise the derived estimated score at `theta1`. If test_all_combinations
is True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape
`(n_samples, n_parameters)`.
"""
if self.model is None:
raise ValueError("No model -- train or load model before evaluating it!")
# Load training data
logger.info("Loading evaluation data")
x = load_and_check(x)
theta0 = load_and_check(theta0)
theta1 = load_and_check(theta1)
# Scale observables
x = self._transform_inputs(x)
# Restrict features
if self.features is not None:
x = x[:, self.features]
# Balance thetas
if len(theta1) > len(theta0):
theta0 = [theta0[i % len(theta0)] for i in range(len(theta1))]
elif len(theta1) < len(theta0):
theta1 = [theta1[i % len(theta1)] for i in range(len(theta0))]
all_log_r_hat = []
all_t_hat0 = []
all_t_hat1 = []
if test_all_combinations:
logger.info("Starting ratio evaluation for %s x-theta combinations", len(theta0) * len(x))
for i, (this_theta0, this_theta1) in enumerate(zip(theta0, theta1)):
logger.debug(
"Starting ratio evaluation for thetas %s / %s: %s vs %s",
i + 1,
len(theta0),
this_theta0,
this_theta1,
)
_, log_r_hat, t_hat0, t_hat1 = evaluate_ratio_model(
model=self.model,
method_type="double_parameterized_ratio",
theta0s=[this_theta0],
theta1s=[this_theta1],
xs=x,
evaluate_score=evaluate_score,
)
all_log_r_hat.append(log_r_hat)
all_t_hat0.append(t_hat0)
all_t_hat1.append(t_hat1)
all_log_r_hat = np.array(all_log_r_hat)
all_t_hat0 = np.array(all_t_hat0)
all_t_hat1 = np.array(all_t_hat1)
else:
logger.info("Starting ratio evaluation")
_, all_log_r_hat, all_t_hat0, all_t_hat1 = evaluate_ratio_model(
model=self.model,
method_type="double_parameterized_ratio",
theta0s=theta0,
theta1s=theta1,
xs=x,
evaluate_score=evaluate_score,
)
logger.info("Evaluation done")
return all_log_r_hat, all_t_hat0, all_t_hat1
[docs] def evaluate_log_likelihood(self, *args, **kwargs):
raise TheresAGoodReasonThisDoesntWork(
"This estimator can only estimate likelihood ratios, not the likelihood " "itself!"
)
[docs] def evaluate_score(self, *args, **kwargs):
raise NotImplementedError("Please use evaluate_log_likelihood_ratio(evaluate_score=True).")
[docs] def evaluate(self, *args, **kwargs):
return self.evaluate_log_likelihood_ratio(*args, **kwargs)
def _create_model(self):
self.model = DenseDoublyParameterizedRatioModel(
n_observables=self.n_observables,
n_parameters=self.n_parameters,
n_hidden=self.n_hidden,
activation=self.activation,
dropout_prob=self.dropout_prob,
)
@staticmethod
def _check_required_data(method, r_xz, t_xz0, t_xz1):
if method in ["cascal", "alices", "rascal"] and (t_xz0 is None or t_xz1 is None):
raise RuntimeError("Method {} requires joint score information".format(method))
if method in ["rolr", "alice", "alices", "rascal"] and r_xz is None:
raise RuntimeError("Method {} requires joint likelihood ratio information".format(method))
@staticmethod
def _package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1):
data = OrderedDict()
data["x"] = x
data["theta0"] = theta0
data["theta1"] = theta1
data["y"] = y
if method in ["rolr", "alice", "alices", "rascal"]:
data["r_xz"] = r_xz
if method in ["cascal", "alices", "rascal"]:
data["t_xz0"] = t_xz0
data["t_xz1"] = t_xz1
return data
def _wrap_settings(self):
settings = super(DoubleParameterizedRatioEstimator, self)._wrap_settings()
settings["estimator_type"] = "double_parameterized_ratio"
return settings
def _unwrap_settings(self, settings):
super(DoubleParameterizedRatioEstimator, self)._unwrap_settings(settings)
estimator_type = str(settings["estimator_type"])
if estimator_type != "double_parameterized_ratio":
raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type))
[docs]class ScoreEstimator(Estimator):
""" A neural estimator of the score evaluated at a fixed reference hypothesis as a function of the
observation x.
Parameters
----------
features : list of int or None, optional
Indices of observables (features) that are used as input to the neural networks. If None, all observables
are used. Default value: None.
n_hidden : tuple of int, optional
Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the
setup of each individual MADE layer. Default value: (100,).
activation : {'tanh', 'sigmoid', 'relu'}, optional
Activation function. Default value: 'tanh'.
"""
def __init__(self, features=None, n_hidden=(100,), activation="tanh", dropout_prob=0.0):
super(ScoreEstimator, self).__init__(features, n_hidden, activation, dropout_prob)
self.nuisance_profile_matrix = None
self.nuisance_project_matrix = None
self.nuisance_mode_default = "keep"
[docs] def train(
self,
method,
x,
t_xz,
x_val=None,
t_xz_val=None,
optimizer="amsgrad",
n_epochs=50,
batch_size=128,
initial_lr=0.001,
final_lr=0.0001,
nesterov_momentum=None,
validation_split=0.25,
early_stopping=True,
scale_inputs=True,
shuffle_labels=False,
limit_samplesize=None,
memmap=False,
verbose="some",
n_workers=8,
):
"""
Trains the network.
Parameters
----------
method : str
The inference method used for training. Currently values are 'sally' and 'sallino', but at the training
stage they are identical. So right now it doesn't matter which one you use.
x : ndarray or str
Path to an unweighted sample of observations, as saved by the `madminer.sampling.SampleAugmenter` functions.
Required for all inference methods.
t_xz : ndarray or str
Joint scores at the reference hypothesis, or filename of a pickled numpy array.
optimizer : {"adam", "amsgrad", "sgd"}, optional
Optimization algorithm. Default value: "amsgrad".
n_epochs : int, optional
Number of epochs. Default value: 50.
batch_size : int, optional
Batch size. Default value: 128.
initial_lr : float, optional
Learning rate during the first epoch, after which it exponentially decays to final_lr. Default value:
0.001.
final_lr : float, optional
Learning rate during the last epoch. Default value: 0.0001.
nesterov_momentum : float or None, optional
If trainer is "sgd", sets the Nesterov momentum. Default value: None.
validation_split : float or None, optional
Fraction of samples used for validation and early stopping (if early_stopping is True). If None, the entire
sample is used for training and early stopping is deactivated. Default value: 0.25.
early_stopping : bool, optional
Activates early stopping based on the validation loss (only if validation_split is not None). Default value:
True.
scale_inputs : bool, optional
Scale the observables to zero mean and unit variance. Default value: True.
shuffle_labels : bool, optional
If True, the labels (`y`, `r_xz`, `t_xz`) are shuffled, while the observations (`x`) remain in their
normal order. This serves as a closure test, in particular as cross-check against overfitting: an estimator
trained with shuffle_labels=True should predict to likelihood ratios around 1 and scores around 0.
limit_samplesize : int or None, optional
If not None, only this number of samples (events) is used to train the estimator. Default value: None.
memmap : bool, optional.
If True, training files larger than 1 GB will not be loaded into memory at once. Default value: False.
verbose : {"all", "many", "some", "few", "none}, optional
Determines verbosity of training. Default value: "some".
Returns
-------
None
"""
if method not in ["sally", "sallino"]:
logger.warning("Method %s not allowed for score estimators. Using 'sally' instead.", method)
method = "sally"
logger.info("Starting training")
logger.info(" Batch size: %s", batch_size)
logger.info(" Optimizer: %s", optimizer)
logger.info(" Epochs: %s", n_epochs)
logger.info(" Learning rate: %s initially, decaying to %s", initial_lr, final_lr)
if optimizer == "sgd":
logger.info(" Nesterov momentum: %s", nesterov_momentum)
logger.info(" Validation split: %s", validation_split)
logger.info(" Early stopping: %s", early_stopping)
logger.info(" Scale inputs: %s", scale_inputs)
logger.info(" Shuffle labels %s", shuffle_labels)
if limit_samplesize is None:
logger.info(" Samples: all")
else:
logger.info(" Samples: %s", limit_samplesize)
# Load training data
logger.info("Loading training data")
memmap_threshold = 1.0 if memmap else None
x = load_and_check(x, memmap_files_larger_than_gb=memmap_threshold)
t_xz = load_and_check(t_xz, memmap_files_larger_than_gb=memmap_threshold)
# Infer dimensions of problem
n_samples = x.shape[0]
n_observables = x.shape[1]
n_parameters = t_xz.shape[1]
logger.info("Found %s samples with %s parameters and %s observables", n_samples, n_parameters, n_observables)
# Limit sample size
if limit_samplesize is not None and limit_samplesize < n_samples:
logger.info("Only using %s of %s training samples", limit_samplesize, n_samples)
x, t_xz = restrict_samplesize(limit_samplesize, x, t_xz)
# Validation data
external_validation = x_val is not None and t_xz_val is not None
if external_validation:
x_val = load_and_check(x_val, memmap_files_larger_than_gb=memmap_threshold)
t_xz_val = load_and_check(t_xz_val, memmap_files_larger_than_gb=memmap_threshold)
logger.info("Found %s separate validation samples", x_val.shape[0])
assert x_val.shape[1] == n_observables
assert t_xz_val.shape[1] == n_parameters
# Scale features
if scale_inputs:
self.initialize_input_transform(x, overwrite=False)
x = self._transform_inputs(x)
if external_validation:
x_val = self._transform_inputs(x_val)
else:
self.initialize_input_transform(x, False, overwrite=False)
# Shuffle labels
if shuffle_labels:
logger.info("Shuffling labels")
logger.warning("Are you sure you want this?")
t_xz = shuffle(t_xz)
# Features
if self.features is not None:
x = x[:, self.features]
logger.info("Only using %s of %s observables", x.shape[1], n_observables)
n_observables = x.shape[1]
if external_validation:
x_val = x_val[:, self.features]
# Check consistency of input with model
if self.n_observables is None:
self.n_observables = n_observables
if self.n_parameters is None:
self.n_parameters = n_parameters
if n_parameters != self.n_parameters:
raise RuntimeError(
"Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters)
)
if n_observables != self.n_observables:
raise RuntimeError(
"Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables)
)
# Data
data = self._package_training_data(x, t_xz)
if external_validation:
data_val = self._package_training_data(x_val, t_xz_val)
else:
data_val = None
# Create model
if self.model is None:
logger.info("Creating model")
self._create_model()
# Losses
loss_functions, loss_labels, loss_weights = get_loss(method, None)
# Optimizer
opt, opt_kwargs = get_optimizer(optimizer, nesterov_momentum)
# Train model
logger.info("Training model")
trainer = LocalScoreTrainer(self.model, n_workers=n_workers)
result = trainer.train(
data=data,
data_val=data_val,
loss_functions=loss_functions,
loss_weights=loss_weights,
loss_labels=loss_labels,
epochs=n_epochs,
batch_size=batch_size,
optimizer=opt,
optimizer_kwargs=opt_kwargs,
initial_lr=initial_lr,
final_lr=final_lr,
validation_split=validation_split,
early_stopping=early_stopping,
verbose=verbose,
)
return result
[docs] def set_nuisance(self, fisher_information, parameters_of_interest):
"""
Prepares the calculation of profiled scores, see https://arxiv.org/pdf/1903.01473.pdf.
Parameters
----------
fisher_information : ndarray
Fisher informatioin with shape `(n_parameters, n_parameters)`.
parameters_of_interest : list of int
List of int, with 0 <= remaining_compoinents[i] < n_parameters. Denotes which parameters are kept in the
profiling, and their new order.
Returns
-------
None
"""
if fisher_information.shape != (self.n_parameters, self.n_parameters):
raise ValueError(
"Fisher information has wrong shape {}, expected {}".format(
fisher_information.shape, (self.n_parameters, self.n_parameters)
)
)
n_parameters_of_interest = len(parameters_of_interest)
# Separate Fisher information parts
nuisance_parameters, information_phys, information_mix, information_nuisance = separate_information_blocks(
fisher_information, parameters_of_interest
)
# Calculate projection matrix
self.nuisance_project_matrix = np.zeros((n_parameters_of_interest, self.n_parameters)) # (n_phys, n_all)
for theta_new, theta_old in enumerate(parameters_of_interest):
self.nuisance_project_matrix[theta_new, theta_old] = 1.0
logger.debug("Nuisance projection matrix:/n%s", self.nuisance_project_matrix)
# Calculate profiling matrix
inverse_information_nuisance = np.linalg.inv(information_nuisance) # (n_nuisance, n_nuisance)
profiling_matrix = -information_mix.T.dot(inverse_information_nuisance) # (n_phys, n_nuisance)
self.nuisance_profile_matrix = np.copy(self.nuisance_project_matrix) # (n_phys, n_all)
for theta_new, theta_old in enumerate(parameters_of_interest):
for nuis_new, nuis_old in enumerate(nuisance_parameters):
self.nuisance_profile_matrix[theta_new, nuis_old] += profiling_matrix[theta_new, nuis_new]
logger.debug("Nuisance profiling matrix:/n%s", self.nuisance_project_matrix)
[docs] def evaluate_score(self, x, nuisance_mode="auto"):
"""
Evaluates the score.
Parameters
----------
x : str or ndarray
Observations, or filename of a pickled numpy array.
nuisance_mode : {"auto", "keep", "profile", "project"}
Decides how nuisance parameters are treated. If nuisance_mode is "auto", the returned score is the (n+k)-
dimensional score in the space of n parameters of interest and k nuisance parameters if `set_profiling`
has not been called, and the n-dimensional profiled score in the space of the parameters of interest
if it has been called. For "keep", the returned score is always (n+k)-dimensional. For "profile", it is
the n-dimensional profiled score. For "project", it is the n-dimensional projected score, i.e. ignoring
the nuisance parameters.
Returns
-------
score : ndarray
Estimated score with shape `(n_observations, n_parameters)`.
"""
if self.model is None:
raise ValueError("No model -- train or load model before evaluating it!")
if nuisance_mode == "auto":
logger.debug("Using nuisance mode %s", self.nuisance_mode_default)
nuisance_mode = self.nuisance_mode_default
# Load training data
if isinstance(x, str):
logger.info("Loading evaluation data")
x = load_and_check(x)
# Scale observables
x = self._transform_inputs(x)
# Restrict featuers
if self.features is not None:
x = x[:, self.features]
# Evaluation
logger.info("Starting score evaluation")
t_hat = evaluate_local_score_model(model=self.model, xs=x)
# Treatment of nuisance paramters
if nuisance_mode == "keep":
logger.debug("Keeping nuisance parameter in score")
elif nuisance_mode == "project":
if self.nuisance_project_matrix is None:
raise ValueError(
"evaluate_score() was called with nuisance_mode = project, but nuisance parameters "
"have not been set up yet. Please call set_nuisance() first!"
)
logger.info("Projecting nuisance parameter from score")
t_hat = np.einsum("ij,xj->xi", self.nuisance_project_matrix, t_hat)
elif nuisance_mode == "profile":
if self.nuisance_profile_matrix is None:
raise ValueError(
"evaluate_score() was called with nuisance_mode = profile, but nuisance parameters "
"have not been set up yet. Please call set_nuisance() first!"
)
logger.info("Profiling nuisance parameter from score")
t_hat = np.einsum("ij,xj->xi", self.nuisance_profile_matrix, t_hat)
else:
raise ValueError("Unknown nuisance_mode {}".format(nuisance_mode))
return t_hat
[docs] def evaluate_log_likelihood(self, *args, **kwargs):
raise TheresAGoodReasonThisDoesntWork("This estimator can only estimate the score, not the likelihood!")
[docs] def evaluate_log_likelihood_ratio(self, *args, **kwargs):
raise TheresAGoodReasonThisDoesntWork("This estimator can only estimate the score, not the likelihood ratio!")
[docs] def evaluate(self, *args, **kwargs):
return self.evaluate_score(*args, **kwargs)
[docs] def save(self, filename, save_model=False):
super(ScoreEstimator, self).save(filename, save_model)
# Also save Fisher information information for profiling / projections
if self.nuisance_profile_matrix is not None and self.nuisance_project_matrix is not None:
logger.debug(
"Saving nuisance profiling / projection information to %s_nuisance_profile_matrix.npy and "
"%s_nuisance_project_matrix.npy",
filename,
filename,
)
np.save(filename + "_nuisance_profile_matrix.npy", self.nuisance_profile_matrix)
np.save(filename + "_nuisance_project_matrix.npy", self.nuisance_project_matrix)
[docs] def load(self, filename):
super(ScoreEstimator, self).load(filename)
# Load scaling
try:
self.nuisance_profile_matrix = np.load(filename + "_nuisance_profile_matrix.npy")
self.nuisance_project_matrix = np.load(filename + "_nuisance_project_matrix.npy")
logger.debug(
" Found nuisance profiling / projection matrices:\nProfiling:\n%s\nProjection:\n%s",
self.nuisance_profile_matrix,
self.nuisance_project_matrix,
)
except:
logger.debug("Did not find nuisance profiling / projection setup in %s", filename)
self.nuisance_profile_matrix = None
self.nuisance_project_matrix = None
def _create_model(self):
self.model = DenseLocalScoreModel(
n_observables=self.n_observables,
n_parameters=self.n_parameters,
n_hidden=self.n_hidden,
activation=self.activation,
dropout_prob=self.dropout_prob,
)
@staticmethod
def _package_training_data(x, t_xz):
data = OrderedDict()
data["x"] = x
data["t_xz"] = t_xz
return data
def _wrap_settings(self):
settings = super(ScoreEstimator, self)._wrap_settings()
settings["estimator_type"] = "score"
settings["nuisance_mode_default"] = self.nuisance_mode_default
return settings
def _unwrap_settings(self, settings):
super(ScoreEstimator, self)._unwrap_settings(settings)
estimator_type = str(settings["estimator_type"])
if estimator_type != "score":
raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type))
try:
self.nuisance_mode_default = str(settings["nuisance_mode_default"])
except KeyError:
self.nuisance_mode_default = "keep"
logger.warning("Did not find entry nuisance_mode_default in saved model, using default 'keep'.")
[docs]class LikelihoodEstimator(ConditionalEstimator):
""" A neural estimator of the density or likelihood evaluated at a reference hypothesis as a function
of the observation x.
Parameters
----------
features : list of int or None, optional
Indices of observables (features) that are used as input to the neural networks. If None, all observables
are used. Default value: None.
n_components : int, optional
The number of Gaussian base components in a MADE MoG. If 1, a plain MADE is used.
Default value: 1.
n_mades : int, optional
The number of MADE layers. Default value: 3.
n_hidden : tuple of int, optional
Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the
setup of each individual MADE layer. Default value: (100,).
activation : {'tanh', 'sigmoid', 'relu'}, optional
Activation function. Default value: 'tanh'.
batch_norm : None or floar, optional
If not None, batch normalization is used, where this value sets the alpha parameter in the calculation
of the running average of the mean and variance. Default value: None.
"""
def __init__(self, features=None, n_components=1, n_mades=5, n_hidden=(100,), activation="tanh", batch_norm=None):
super(LikelihoodEstimator, self).__init__(features, n_hidden, activation, dropout_prob=0.0)
self.n_components = n_components
self.n_mades = n_mades
self.batch_norm = batch_norm
[docs] def train(
self,
method,
x,
theta,
t_xz=None,
x_val=None,
theta_val=None,
t_xz_val=None,
alpha=1.0,
optimizer="amsgrad",
n_epochs=50,
batch_size=128,
initial_lr=0.001,
final_lr=0.0001,
nesterov_momentum=None,
validation_split=0.25,
early_stopping=True,
scale_inputs=True,
shuffle_labels=False,
limit_samplesize=None,
memmap=False,
verbose="some",
scale_parameters=True,
n_workers=8,
):
"""
Trains the network.
Parameters
----------
method : str
The inference method used for training. Allowed values are 'nde' and 'scandal'.
x : ndarray or str
Observations, or filename of a pickled numpy array.
theta : ndarray or str
Numerator parameter point, or filename of a pickled numpy array.
t_xz : ndarray or str or None, optional
Joint scores at theta, or filename of a pickled numpy array. Default value: None.
x_val : ndarray or str or None, optional
Validation observations, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
theta_val : ndarray or str or None, optional
Validation numerator parameter points, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
t_xz_val : ndarray or str or None, optional
Validation joint scores at theta, or filename of a pickled numpy array. If None
and validation_split > 0, validation data will be randomly selected from the training data.
Default value: None.
alpha : float, optional
Hyperparameter weighting the score error in the loss function of the 'alices', 'rascal', and 'cascal'
methods. Default value: 1.
optimizer : {"adam", "amsgrad", "sgd"}, optional
Optimization algorithm. Default value: "amsgrad".
n_epochs : int, optional
Number of epochs. Default value: 50.
batch_size : int, optional
Batch size. Default value: 128.
initial_lr : float, optional
Learning rate during the first epoch, after which it exponentially decays to final_lr. Default value:
0.001.
final_lr : float, optional
Learning rate during the last epoch. Default value: 0.0001.
nesterov_momentum : float or None, optional
If trainer is "sgd", sets the Nesterov momentum. Default value: None.
validation_split : float or None, optional
Fraction of samples used for validation and early stopping (if early_stopping is True). If None, the entire
sample is used for training and early stopping is deactivated. Default value: 0.25.
early_stopping : bool, optional
Activates early stopping based on the validation loss (only if validation_split is not None). Default value:
True.
scale_inputs : bool, optional
Scale the observables to zero mean and unit variance. Default value: True.
shuffle_labels : bool, optional
If True, the labels (`y`, `r_xz`, `t_xz`) are shuffled, while the observations (`x`) remain in their
normal order. This serves as a closure test, in particular as cross-check against overfitting: an estimator
trained with shuffle_labels=True should predict to likelihood ratios around 1 and scores around 0.
limit_samplesize : int or None, optional
If not None, only this number of samples (events) is used to train the estimator. Default value: None.
memmap : bool, optional.
If True, training files larger than 1 GB will not be loaded into memory at once. Default value: False.
verbose : {"all", "many", "some", "few", "none}, optional
Determines verbosity of training. Default value: "some".
scale_parameters : bool, optional
Whether parameters are rescaled to mean zero and unit variance before going into the neural network.
Default value: True.
Returns
-------
None
"""
logger.info("Starting training")
logger.info(" Method: %s", method)
if method == "scandal":
logger.info(" alpha: %s", alpha)
logger.info(" Batch size: %s", batch_size)
logger.info(" Optimizer: %s", optimizer)
logger.info(" Epochs: %s", n_epochs)
logger.info(" Learning rate: %s initially, decaying to %s", initial_lr, final_lr)
if optimizer == "sgd":
logger.info(" Nesterov momentum: %s", nesterov_momentum)
logger.info(" Validation split: %s", validation_split)
logger.info(" Early stopping: %s", early_stopping)
logger.info(" Scale inputs: %s", scale_inputs)
logger.info(" Shuffle labels %s", shuffle_labels)
if limit_samplesize is None:
logger.info(" Samples: all")
else:
logger.info(" Samples: %s", limit_samplesize)
# Load training data
logger.info("Loading training data")
memmap_threshold = 1.0 if memmap else None
theta = load_and_check(theta, memmap_files_larger_than_gb=memmap_threshold)
x = load_and_check(x, memmap_files_larger_than_gb=memmap_threshold)
t_xz = load_and_check(t_xz, memmap_files_larger_than_gb=memmap_threshold)
self._check_required_data(method, t_xz)
# Infer dimensions of problem
n_samples = x.shape[0]
n_observables = x.shape[1]
n_parameters = theta.shape[1]
logger.info("Found %s samples with %s parameters and %s observables", n_samples, n_parameters, n_observables)
# Limit sample size
if limit_samplesize is not None and limit_samplesize < n_samples:
logger.info("Only using %s of %s training samples", limit_samplesize, n_samples)
x, theta, t_xz = restrict_samplesize(limit_samplesize, x, theta, t_xz)
# Validation data
external_validation = x_val is not None and theta_val is not None
if external_validation:
theta_val = load_and_check(theta_val, memmap_files_larger_than_gb=memmap_threshold)
x_val = load_and_check(x_val, memmap_files_larger_than_gb=memmap_threshold)
t_xz_val = load_and_check(t_xz_val, memmap_files_larger_than_gb=memmap_threshold)
logger.info("Found %s separate validation samples", x_val.shape[0])
assert x_val.shape[1] == n_observables
assert theta_val.shape[1] == n_parameters
if t_xz is not None:
assert t_xz_val is not None, "When providing t_xz and sep. validation data, also provide t_xz_val"
# Scale features
if scale_inputs:
self.initialize_input_transform(x, overwrite=False)
x = self._transform_inputs(x)
if external_validation:
x_val = self._transform_inputs(x_val)
else:
self.initialize_input_transform(x, False, overwrite=False)
# Scale parameters
if scale_parameters:
logger.info("Rescaling parameters")
self.initialize_parameter_transform(theta)
theta = self._transform_parameters(theta)
t_xz = self._transform_score(t_xz, inverse=False)
if external_validation:
t_xz_val = self._transform_score(t_xz_val, inverse=False)
else:
self.initialize_parameter_transform(theta, False)
# Shuffle labels
if shuffle_labels:
logger.info("Shuffling labels")
t_xz = shuffle(t_xz)
# Features
if self.features is not None:
x = x[:, self.features]
logger.info("Only using %s of %s observables", x.shape[1], n_observables)
n_observables = x.shape[1]
if external_validation:
x_val = x_val[:, self.features]
# Check consistency of input with model
if self.n_observables is None:
self.n_observables = n_observables
if self.n_parameters is None:
self.n_parameters = n_parameters
if n_parameters != self.n_parameters:
raise RuntimeError(
"Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters)
)
if n_observables != self.n_observables:
raise RuntimeError(
"Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables)
)
# Data
data = self._package_training_data(method, x, theta, t_xz)
if external_validation:
data_val = self._package_training_data(method, x_val, theta_val, t_xz_val)
else:
data_val = None
# Create model
if self.model is None:
self._create_model()
# Losses
loss_functions, loss_labels, loss_weights = get_loss(method, alpha)
# Optimizer
opt, opt_kwargs = get_optimizer(optimizer, nesterov_momentum)
# Train model
logger.info("Training model")
trainer = FlowTrainer(self.model, n_workers=n_workers)
result = trainer.train(
data=data,
data_val=data_val,
loss_functions=loss_functions,
loss_weights=loss_weights,
loss_labels=loss_labels,
epochs=n_epochs,
batch_size=batch_size,
optimizer=opt,
optimizer_kwargs=opt_kwargs,
initial_lr=initial_lr,
final_lr=final_lr,
validation_split=validation_split,
early_stopping=early_stopping,
verbose=verbose,
)
return result
[docs] def evaluate_log_likelihood(self, x, theta, test_all_combinations=True, evaluate_score=False):
"""
Evaluates the log likelihood as a function of the observation x and the parameter point theta.
Parameters
----------
x : ndarray or str
Sample of observations, or path to numpy file with observations.
theta : ndarray or str
Parameter points, or path to numpy file with parameter points.
test_all_combinations : bool, optional
If method is not 'sally' and not 'sallino': If False, the number of samples in the observable and theta
files has to match, and the likelihood ratio is evaluated only for the combinations
`r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j`
are evaluated. Default value: True.
evaluate_score : bool, optional
If method is not 'sally' and not 'sallino', this sets whether in addition to the likelihood ratio the score
is evaluated. Default value: False.
Returns
-------
log_likelihood : ndarray
The estimated log likelihood. If test_all_combinations is True, the result has shape `(n_thetas, n_x)`.
Otherwise, it has shape `(n_samples,)`.
score : ndarray or None
None if
evaluate_score is False. Otherwise the derived estimated score at `theta`. If test_all_combinations is
True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape
`(n_samples, n_parameters)`.
"""
if self.model is None:
raise ValueError("No model -- train or load model before evaluating it!")
# Load training data
if isinstance(x, str):
logger.info("Loading evaluation data")
theta = load_and_check(theta)
x = load_and_check(x)
# Scale observables
x = self._transform_inputs(x)
# Restrict featuers
if self.features is not None:
x = x[:, self.features]
# Evaluation for all other methods
all_log_p_hat = []
all_t_hat = []
if test_all_combinations:
logger.info("Starting ratio evaluation for %s x-theta combinations", len(theta) * len(x))
for i, this_theta in enumerate(theta):
logger.debug("Starting log likelihood evaluation for thetas %s / %s: %s", i + 1, len(theta), this_theta)
log_p_hat, t_hat = evaluate_flow_model(
model=self.model, thetas=[this_theta], xs=x, evaluate_score=evaluate_score
)
all_log_p_hat.append(log_p_hat)
all_t_hat.append(t_hat)
all_log_p_hat = np.array(all_log_p_hat)
all_t_hat = np.array(all_t_hat)
else:
logger.info("Starting log likelihood evaluation")
all_log_p_hat, all_t_hat = evaluate_flow_model(
model=self.model, thetas=theta, xs=x, evaluate_score=evaluate_score
)
logger.info("Evaluation done")
return all_log_p_hat, all_t_hat
[docs] def evaluate_log_likelihood_ratio(self, x, theta0, theta1, test_all_combinations, evaluate_score=False):
"""
Evaluates the log likelihood ratio as a function of the observation x, the numerator parameter point theta0,
and the denominator parameter point theta1.
Parameters
----------
x : ndarray or str
Sample of observations, or path to numpy file with observations.
theta0 : ndarray or str
Numerator parameters, or path to numpy file.
theta1 : ndarray or str
Denominator parameters, or path to numpy file.
test_all_combinations : bool, optional
If method is not 'sally' and not 'sallino': If False, the number of samples in the observable and theta
files has to match, and the likelihood ratio is evaluated only for the combinations
`r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j`
are evaluated. Default value: True.
evaluate_score : bool, optional
If method is not 'sally' and not 'sallino', this sets whether in addition to the likelihood ratio the score
is evaluated. Default value: False.
Returns
-------
log_likelihood : ndarray
The estimated log likelihood. If test_all_combinations is True, the result has shape `(n_thetas, n_x)`.
Otherwise, it has shape `(n_samples,)`.
score : ndarray or None
None if
evaluate_score is False. Otherwise the derived estimated score at `theta`. If test_all_combinations is
True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape
`(n_samples, n_parameters)`.
"""
if self.model is None:
raise ValueError("No model -- train or load model before evaluating it!")
# Load training data
logger.info("Loading evaluation data")
x = load_and_check(x)
theta0 = load_and_check(theta0)
theta1 = load_and_check(theta1)
# Scale observables
x = self._transform_inputs(x)
# Restrict features
if self.features is not None:
x = x[:, self.features]
# Balance thetas
if len(theta1) > len(theta0):
theta0 = [theta0[i % len(theta0)] for i in range(len(theta1))]
elif len(theta1) < len(theta0):
theta1 = [theta1[i % len(theta1)] for i in range(len(theta0))]
log_p_hat0, t_hat0 = self.evaluate_log_likelihood(
x, theta0, test_all_combinations=test_all_combinations, evaluate_score=evaluate_score
)
log_p_hat1, t_hat1 = self.evaluate_log_likelihood(
x, theta1, test_all_combinations=test_all_combinations, evaluate_score=evaluate_score
)
log_r_hat = log_p_hat0 - log_p_hat1
return log_r_hat, t_hat0, t_hat1
[docs] def evaluate_score(self, *args, **kwargs):
raise NotImplementedError("Please use evaluate_log_likelihood(evaluate_score=True).")
[docs] def evaluate(self, *args, **kwargs):
return self.evaluate_log_likelihood(*args, **kwargs)
def _create_model(self):
if self.n_components > 1:
self.model = ConditionalMixtureMaskedAutoregressiveFlow(
n_conditionals=self.n_parameters,
n_inputs=self.n_observables,
n_components=self.n_components,
n_hiddens=self.n_hidden,
n_mades=self.n_mades,
activation=self.activation,
batch_norm=self.batch_norm is not None,
alpha=self.batch_norm,
)
else:
self.model = ConditionalMaskedAutoregressiveFlow(
n_conditionals=self.n_parameters,
n_inputs=self.n_observables,
n_hiddens=self.n_hidden,
n_mades=self.n_mades,
activation=self.activation,
batch_norm=self.batch_norm is not None,
alpha=self.batch_norm,
)
@staticmethod
def _check_required_data(method, t_xz):
if method == ["scandal"] and t_xz is None:
raise RuntimeError("Method {} requires joint score information".format(method))
@staticmethod
def _package_training_data(method, x, theta, t_xz):
data = OrderedDict()
data["x"] = x
data["theta"] = theta
if method in ["scandal"]:
data["t_xz"] = t_xz
return data
def _wrap_settings(self):
settings = super(LikelihoodEstimator, self)._wrap_settings()
settings["estimator_type"] = "likelihood"
settings["n_components"] = self.n_components
settings["batch_norm"] = self.batch_norm
settings["n_mades"] = self.n_mades
return settings
def _unwrap_settings(self, settings):
super(LikelihoodEstimator, self)._unwrap_settings(settings)
estimator_type = str(settings["estimator_type"])
if estimator_type != "likelihood":
raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type))
self.n_components = int(settings["n_components"])
self.n_mades = int(settings["n_mades"])
self.batch_norm = settings["batch_norm"]
if self.batch_norm == "None":
self.batch_norm = None
if self.batch_norm is not None:
self.batch_norm = float(self.batch_norm)
[docs]class Ensemble:
"""
Ensemble methods for likelihood, likelihood ratio, and score estimation.
Generally, Ensemble instances can be used very similarly to Estimator instances:
* The initialization of Ensemble takes a list of (trained or untrained) Estimator instances.
* The methods `Ensemble.train_one()` and `Ensemble.train_all()` train the estimators (this can also be
done outside of Ensemble).
* `Ensemble.calculate_expectation()` can be used to calculate the expectation of the estimation likelihood
ratio or the expected estimated score over a validation sample. Ideally (and assuming the correct sampling),
these expectation values should be close to zero. Deviations from zero therefore point out that the estimator
is probably inaccurate.
* `Ensemble.evaluate_log_likelihood()`, `Ensemble.evaluate_log_likelihood_ratio()`, `Ensemble.evaluate_score()`,
and `Ensemble.calculate_fisher_information()` can then be used to calculate
ensemble predictions.
* `Ensemble.save()` and `Ensemble.load()` can store all estimators in one folder.
The individual estimators in the ensemble can be trained with different methods, but they have to be of the same
type: either all estimators are ParameterizedRatioEstimator instances, or all estimators are
DoubleParameterizedRatioEstimator instances, or all estimators are ScoreEstimator instances, or all estimators are
LikelihoodEstimator instances..
Parameters
----------
estimators : None or list of Estimator, optional
If int, sets the number of estimators that will be created as new MLForge instances. If list, sets
the estimators directly, either from MLForge instances or filenames (that are then loaded with
`MLForge.load()`). If None, the ensemble is initialized without estimators. Note that the estimators have
to be consistent: either all of them are trained with a local score method ('sally' or 'sallino'); or all of
them are trained with a single-parameterized method ('carl', 'rolr', 'rascal', 'scandal', 'alice', or 'alices');
or all of them are trained with a doubly parameterized method ('carl2', 'rolr2', 'rascal2', 'alice2', or
'alices2'). Mixing estimators of different types within one of these three categories is supported, but mixing
estimators from different categories is not and will raise a RuntimeException. Default value: None.
Attributes
----------
estimators : list of Estimator
The estimators in the form of MLForge instances.
"""
def __init__(self, estimators=None):
self.n_parameters = None
self.n_observables = None
self.estimator_type = None
# Initialize estimators
if estimators is None:
self.estimators = []
else:
self.estimators = []
for estimator in estimators:
if isinstance(estimator, Estimator):
self.estimators.append(estimator)
else:
raise ValueError("Entry {} in estimators is neither str nor Estimator instance")
self.n_estimators = len(self.estimators)
self._check_consistency()
[docs] def add_estimator(self, estimator):
"""
Adds an estimator to the ensemble.
Parameters
----------
estimator : Estimator
The estimator.
Returns
-------
None
"""
if not isinstance(estimator, Estimator):
raise ValueError("Entry {} in estimators is neither str nor Estimator instance")
self.estimators.append(estimator)
self.n_estimators = len(self.estimators)
self._check_consistency()
[docs] def train_one(self, i, **kwargs):
"""
Trains an individual estimator. See `Estimator.train()`.
Parameters
----------
i : int
The index `0 <= i < n_estimators` of the estimator to be trained.
kwargs : dict
Parameters for `Estimator.train()`.
Returns
-------
None
"""
self.estimators[i].train(**kwargs)
[docs] def train_all(self, **kwargs):
"""
Trains all estimators. See `Estimator.train()`.
Parameters
----------
kwargs : dict
Parameters for `Estimator.train()`. If a value in this dict is a list, it has to have length `n_estimators`
and contain one value of this parameter for each of the estimators. Otherwise the value is used as parameter
for the training of all the estimators.
Returns
-------
None
"""
logger.info("Training %s estimators in ensemble", self.n_estimators)
for key, value in six.iteritems(kwargs):
if not isinstance(value, list):
kwargs[key] = [value for _ in range(self.n_estimators)]
assert len(kwargs[key]) == self.n_estimators, "Keyword {} has wrong length {}".format(key, len(value))
for i, estimator in enumerate(self.estimators):
kwargs_this_estimator = {}
for key, value in six.iteritems(kwargs):
kwargs_this_estimator[key] = value[i]
logger.info("Training estimator %s / %s in ensemble", i + 1, self.n_estimators)
estimator.train(**kwargs_this_estimator)
[docs] def evaluate_log_likelihood(self, estimator_weights=None, calculate_covariance=False, **kwargs):
"""
Estimates the log likelihood from each estimator and returns the ensemble mean (and, if calculate_covariance is
True, the covariance between them).
Parameters
----------
estimator_weights : ndarray or None, optional
Weights for each estimator in the ensemble. If None, all estimators have an equal vote. Default value: None.
calculate_covariance : bool, optional
If True, the covariance between the different estimators is calculated. Default value: False.
kwargs
Arguments for the evaluation. See the documentation of the relevant Estimator class.
Returns
-------
log_likelihood : ndarray
Mean prediction for the log likelihood.
covariance : ndarray or None
If calculate_covariance is True, the covariance matrix between the estimators. Otherwise None.
"""
logger.info("Evaluating %s estimators in ensemble", self.n_estimators)
# Calculate weights of each estimator in vote
if estimator_weights is None:
estimator_weights = np.ones(self.n_estimators)
assert len(estimator_weights) == self.n_estimators
estimator_weights /= np.sum(estimator_weights)
logger.debug("Estimator weights: %s", estimator_weights)
# Calculate estimator predictions
predictions = []
for i, estimator in enumerate(self.estimators):
logger.info("Starting evaluation for estimator %s / %s in ensemble", i + 1, self.n_estimators)
predictions.append(estimator.evaluate_log_likelihood(**kwargs)[0])
predictions = np.array(predictions)
# Calculate weighted means and covariance matrices
mean = np.average(predictions, axis=0, weights=estimator_weights)
if calculate_covariance:
predictions_flat = predictions.reshape((predictions.shape[0], -1))
covariance = np.cov(predictions_flat.T, aweights=estimator_weights)
covariance = covariance.reshape(list(predictions.shape) + list(predictions.shape))
else:
covariance = None
return mean, covariance
[docs] def evaluate_log_likelihood_ratio(self, estimator_weights=None, calculate_covariance=False, **kwargs):
"""
Estimates the log likelihood ratio from each estimator and returns the ensemble mean (and, if
calculate_covariance is True, the covariance between them).
Parameters
----------
estimator_weights : ndarray or None, optional
Weights for each estimator in the ensemble. If None, all estimators have an equal vote. Default value: None.
calculate_covariance : bool, optional
If True, the covariance between the different estimators is calculated. Default value: False.
kwargs
Arguments for the evaluation. See the documentation of the relevant Estimator class.
Returns
-------
log_likelihood_ratio : ndarray
Mean prediction for the log likelihood ratio.
covariance : ndarray or None
If calculate_covariance is True, the covariance matrix between the estimators. Otherwise None.
"""
logger.info("Evaluating %s estimators in ensemble", self.n_estimators)
# Calculate weights of each estimator in vote
if estimator_weights is None:
estimator_weights = np.ones(self.n_estimators)
assert len(estimator_weights) == self.n_estimators
estimator_weights /= np.sum(estimator_weights)
logger.debug("Estimator weights: %s", estimator_weights)
# Calculate estimator predictions
predictions = []
for i, estimator in enumerate(self.estimators):
logger.info("Starting evaluation for estimator %s / %s in ensemble", i + 1, self.n_estimators)
predictions.append(estimator.evaluate_log_likelihood_ratio(**kwargs)[0])
predictions = np.array(predictions)
# Calculate weighted means and covariance matrices
mean = np.average(predictions, axis=0, weights=estimator_weights)
if calculate_covariance:
predictions_flat = predictions.reshape((predictions.shape[0], -1))
covariance = np.cov(predictions_flat.T, aweights=estimator_weights)
covariance = covariance.reshape(list(predictions.shape) + list(predictions.shape))
else:
covariance = None
return mean, covariance
[docs] def evaluate_score(self, estimator_weights=None, calculate_covariance=False, **kwargs):
"""
Estimates the score from each estimator and returns the ensemble mean (and, if
calculate_covariance is True, the covariance between them).
Parameters
----------
estimator_weights : ndarray or None, optional
Weights for each estimator in the ensemble. If None, all estimators have an equal vote. Default value: None.
calculate_covariance : bool, optional
If True, the covariance between the different estimators is calculated. Default value: False.
kwargs
Arguments for the evaluation. See the documentation of the relevant Estimator class.
Returns
-------
log_likelihood_ratio : ndarray
Mean prediction for the log likelihood ratio.
covariance : ndarray or None
If calculate_covariance is True, the covariance matrix between the estimators. Otherwise None.
"""
logger.info("Evaluating %s estimators in ensemble", self.n_estimators)
# Calculate weights of each estimator in vote
if estimator_weights is None:
estimator_weights = np.ones(self.n_estimators)
assert len(estimator_weights) == self.n_estimators
estimator_weights /= np.sum(estimator_weights)
logger.debug("Estimator weights: %s", estimator_weights)
# Calculate estimator predictions
predictions = []
for i, estimator in enumerate(self.estimators):
logger.info("Starting evaluation for estimator %s / %s in ensemble", i + 1, self.n_estimators)
predictions.append(estimator.evaluate_score(**kwargs))
predictions = np.array(predictions)
# Calculate weighted means and covariance matrices
mean = np.average(predictions, axis=0, weights=estimator_weights)
if calculate_covariance:
predictions_flat = predictions.reshape((predictions.shape[0], -1))
covariance = np.cov(predictions_flat.T, aweights=estimator_weights)
covariance = covariance.reshape(list(predictions.shape) + list(predictions.shape))
else:
covariance = None
return mean, covariance
[docs] def save(self, folder, save_model=False):
"""
Saves the estimator ensemble to a folder.
Parameters
----------
folder : str
Path to the folder.
save_model : bool, optional
If True, the whole model is saved in addition to the state dict. This is not necessary for loading it
again with Ensemble.load(), but can be useful for debugging, for instance to plot the computational
graph.
Returns
-------
None
"""
# Check paths
create_missing_folders([folder])
# Save ensemble settings
logger.debug("Saving ensemble setup to %s/ensemble.json", folder)
settings = {"estimator_type": self.estimator_type, "n_estimators": self.n_estimators}
with open(folder + "/ensemble.json", "w") as f:
json.dump(settings, f)
# Save estimators
for i, estimator in enumerate(self.estimators):
estimator.save(folder + "/estimator_" + str(i), save_model=save_model)
[docs] def load(self, folder):
"""
Loads the estimator ensemble from a folder.
Parameters
----------
folder : str
Path to the folder.
Returns
-------
None
"""
# Load ensemble settings
logger.debug("Loading ensemble setup from %s/ensemble.json", folder)
with open(folder + "/ensemble.json", "r") as f:
settings = json.load(f)
self.n_estimators = int(settings["n_estimators"])
try:
estimator_type = str(settings["estimator_type"])
except KeyError:
raise RuntimeError(
"Can't find estimator type information in file. Maybe this file was created with"
" an incompatible MadMiner version < v0.3.0?"
)
logger.info("Found %s ensemble with %s estimators", estimator_type, self.n_estimators)
# Load estimators
self.estimators = []
for i in range(self.n_estimators):
estimator = self._get_estimator_class(estimator_type)()
estimator.load(folder + "/estimator_" + str(i))
self.estimators.append(estimator)
self._check_consistency()
def _check_consistency(self):
"""
Internal function that checks if all estimators belong to the same category
(local score regression, single-parameterized likelihood ratio estimator,
doubly parameterized likelihood ratio estimator).
Raises
------
RuntimeError
Estimators are inconsistent.
"""
# Accumulate methods of all estimators
all_types = [self._get_estimator_type(estimator) for estimator in self.estimators]
all_n_parameters = [estimator.n_parameters for estimator in self.estimators]
all_n_observables = [estimator.n_observables for estimator in self.estimators]
# Check consistency of methods
self.estimator_type = None
for estimator_type in all_types:
if self.estimator_type is None:
self.estimator_type = estimator_type
if self.estimator_type != estimator_type:
raise RuntimeError(
"Ensemble with inconsistent estimator methods! All methods have to be either"
" single-parameterized ratio estimators, doubly parameterized ratio estimators,"
" or local score estimators. Found types " + ", ".join(all_types) + "."
)
# Check consistency of parameter and observable numnbers
self.n_parameters = None
self.n_observables = None
for estimator_n_parameters, estimator_n_observables in zip(all_n_parameters, all_n_observables):
if self.n_parameters is None:
self.n_parameters = estimator_n_parameters
if self.n_observables is None:
self.n_observables = estimator_n_observables
if self.n_parameters is not None and self.n_parameters != estimator_n_parameters:
raise RuntimeError(
"Ensemble with inconsistent numbers of parameters for different estimators: %s", all_n_parameters
)
if self.n_observables is not None and self.n_observables != estimator_n_observables:
raise RuntimeError(
"Ensemble with inconsistent numbers of parameters for different estimators: %s", all_n_observables
)
@staticmethod
def _get_estimator_type(estimator):
if not isinstance(estimator, Estimator):
raise RuntimeError("Estimator is not an Estimator instance!")
if isinstance(estimator, ParameterizedRatioEstimator):
return "parameterized_ratio"
elif isinstance(estimator, DoubleParameterizedRatioEstimator):
return "double_parameterized_ratio"
elif isinstance(estimator, ScoreEstimator):
return "score"
elif isinstance(estimator, LikelihoodEstimator):
return "likelihood"
else:
raise RuntimeError("Estimator is an unknown Estimator type!")
@staticmethod
def _get_estimator_class(estimator_type):
if estimator_type == "parameterized_ratio":
return ParameterizedRatioEstimator
elif estimator_type == "double_parameterized_ratio":
return DoubleParameterizedRatioEstimator
elif estimator_type == "score":
return ScoreEstimator
elif estimator_type == "likelihood":
return LikelihoodEstimator
else:
raise RuntimeError("Unknown estimator type {}!".format(estimator_type))
[docs]def load_estimator(filename):
if os.path.isdir(filename):
model = Ensemble()
model.load(filename)
else:
with open(filename + "_settings.json", "r") as f:
settings = json.load(f)
try:
estimator_type = settings["estimator_type"]
except KeyError:
raise RuntimeError("Undefined estimator type")
if estimator_type == "parameterized_ratio":
model = ParameterizedRatioEstimator()
elif estimator_type == "double_parameterized_ratio":
model = DoubleParameterizedRatioEstimator()
elif estimator_type == "score":
model = ScoreEstimator()
elif estimator_type == "likelihood":
model = LikelihoodEstimator()
else:
raise RuntimeError("Unknown estimator type {}!".format(estimator_type))
model.load(filename)
return model
[docs]class TheresAGoodReasonThisDoesntWork(Exception):
pass