Source code for madminer.plotting.uncertainties

from __future__ import absolute_import, division, print_function, unicode_literals

import six
import logging
import numpy as np
from matplotlib import pyplot as plt, gridspec

from ..utils.morphing import NuisanceMorpher
from ..utils.various import mdot, shuffle, sanitize_array
from ..sampling import SampleAugmenter

logger = logging.getLogger(__name__)


[docs]def plot_uncertainty( filename, theta, observable, obs_label, obs_range, n_bins=50, systematics=None, n_events=None, n_toys=100, linecolor="black", bandcolor1="#CC002E", bandcolor2="orange", ratio_range=(0.8, 1.2), ): """ Plots absolute and relative uncertainty bands in a histogram of one observable in a MadMiner file. Parameters ---------- filename : str Filename of a MadMiner HDF5 file. theta : ndarray, optional Which parameter points to use for histogramming the data. observable : str Which observable to plot, given by its name in the MadMiner file. obs_label : str x-axis label naming the observable. obs_range : tuple of two float Range to be plotted for the observable. n_bins : int Number of bins. Default value: 50. systematics : None or list of str, optional This can restrict which nuisance parameters are used to draw the uncertainty bands. Each entry of this list is the name of a systematic uncertainty (see `MadMiner.add_systematics()`). n_events : None or int, optional If not None, sets the number of events from the MadMiner file that will be analyzed and plotted. Default value: None. n_toys : int, optional Number of toy nuisance parameter vectors used to estimate the systematic uncertainties. Default value: 100. linecolor : str, optional Line color for central prediction. Default value: "black". bandcolor1 : str, optional Error band color for 1 sigma uncertainty. Default value: "#CC002E". bandcolor2 : str, optional Error band color for 2 sigma uncertainty. Default value: "orange". ratio_range : tuple of two floar y-axis range for the plots of the ratio to the central prediction. Default value: (0.8, 1.2). Returns ------- figure : Figure Plot as Matplotlib Figure instance. """ # Load data sa = SampleAugmenter(filename, include_nuisance_parameters=True) nuisance_morpher = NuisanceMorpher( sa.nuisance_parameters, list(sa.benchmarks.keys()), reference_benchmark=sa.reference_benchmark ) # Observable index obs_idx = list(sa.observables.keys()).index(observable) # Get event data (observations and weights) x, weights_benchmarks = sa.weighted_events() x = x[:, obs_idx] # Theta matrix theta_matrix = sa._get_theta_benchmark_matrix(theta) weights = mdot(theta_matrix, weights_benchmarks) # Remove negative weights x = x[weights >= 0.0] weights_benchmarks = weights_benchmarks[weights >= 0.0] weights = weights[weights >= 0.0] # Shuffle events x, weights, weights_benchmarks = shuffle(x, weights, weights_benchmarks) # Only analyze n_events if n_events is not None and n_events < x.shape[0]: x = x[:n_events] weights_benchmarks = weights_benchmarks[:n_events] weights = weights[:n_events] # Nuisance parameters n_nuisance_params = sa.n_nuisance_parameters nuisance_toys = np.random.normal(loc=0.0, scale=1.0, size=n_nuisance_params * n_toys) nuisance_toys = nuisance_toys.reshape(n_toys, n_nuisance_params) # Restrict nuisance parameters if systematics is not None: nuisance_parameters = [] for npar, (npar_syst, _, _) in six.iteritems(sa.nuisance_parameters): if npar_syst in systematics: nuisance_parameters.append(npar) for i in range(n_nuisance_params): if i not in nuisance_parameters: nuisance_toys[:, i] = 0.0 nuisance_toy_factors = np.array( [ nuisance_morpher.calculate_nuisance_factors(nuisance_toy, weights_benchmarks) for nuisance_toy in nuisance_toys ] ) # Shape (n_toys, n_events) nuisance_toy_factors = sanitize_array(nuisance_toy_factors, min_value=1.0e-2, max_value=100.0) # Shape (n_toys, n_events) # Calculate histogram for central prediction, not normalized histo, bin_edges = np.histogram(x, bins=n_bins, range=obs_range, weights=weights, density=False) # Calculate toy histograms, not normalized histos_toys_this_theta = [] for i_toy, nuisance_toy_factors_this_toy in enumerate(nuisance_toy_factors): toy_histo, _ = np.histogram( x, bins=n_bins, range=obs_range, weights=weights * nuisance_toy_factors_this_toy, density=False ) histos_toys_this_theta.append(toy_histo) histo_plus2sigma = np.percentile(histos_toys_this_theta, 97.5, axis=0) histo_plus1sigma = np.percentile(histos_toys_this_theta, 84.0, axis=0) histo_minus1sigma = np.percentile(histos_toys_this_theta, 16.0, axis=0) histo_minus2sigma = np.percentile(histos_toys_this_theta, 2.5, axis=0) # Calculate histogram for central prediction, normalized histo_norm, bin_edges_norm = np.histogram(x, bins=n_bins, range=obs_range, weights=weights, density=True) # Calculate toy histograms, normalized histos_toys_this_theta = [] for i_toy, nuisance_toy_factors_this_toy in enumerate(nuisance_toy_factors): toy_histo, _ = np.histogram( x, bins=n_bins, range=obs_range, weights=weights * nuisance_toy_factors_this_toy, density=True ) histos_toys_this_theta.append(toy_histo) histo_plus2sigma_norm = np.percentile(histos_toys_this_theta, 97.5, axis=0) histo_plus1sigma_norm = np.percentile(histos_toys_this_theta, 84.0, axis=0) histo_minus1sigma_norm = np.percentile(histos_toys_this_theta, 16.0, axis=0) histo_minus2sigma_norm = np.percentile(histos_toys_this_theta, 2.5, axis=0) # Prepare plotting def plot_mc(edges, histo_central, histo_m2, histo_m1, histo_p1, histo_p2, relative=False): bin_edges_ = np.repeat(edges, 2)[1:-1] histo_ = np.repeat(histo_central, 2) histo_m2_ = np.repeat(histo_m2, 2) histo_m1_ = np.repeat(histo_m1, 2) histo_p1_ = np.repeat(histo_p1, 2) histo_p2_ = np.repeat(histo_p2, 2) if relative: histo_m2_ /= histo_ histo_m1_ /= histo_ histo_p1_ /= histo_ histo_p2_ /= histo_ histo_ /= histo_ plt.fill_between(bin_edges_, histo_m2_, histo_p2_, facecolor=bandcolor2, edgecolor="none") plt.fill_between(bin_edges_, histo_m1_, histo_p1_, facecolor=bandcolor1, edgecolor="none") plt.plot(bin_edges_, histo_, color=linecolor, lw=1.5, ls="-") # Make plot fig = plt.figure(figsize=(10, 7)) gs = gridspec.GridSpec(2, 2, height_ratios=[2, 1]) # MC, absolute residuals ax = plt.subplot(gs[2]) plot_mc(bin_edges, histo, histo_minus2sigma, histo_minus1sigma, histo_plus1sigma, histo_plus2sigma, relative=True) plt.xlabel(obs_label) plt.ylabel(r"Relative to central pred.") plt.xlim(obs_range[0], obs_range[1]) plt.ylim(ratio_range[0], ratio_range[1]) # MC, absolute ax = plt.subplot(gs[0], sharex=ax) plot_mc(bin_edges, histo, histo_minus2sigma, histo_minus1sigma, histo_plus1sigma, histo_plus2sigma) plt.ylabel(r"Differential cross section [pb/bin]") plt.ylim(0.0, None) plt.setp(ax.get_xticklabels(), visible=False) # MC, relative residuals ax = plt.subplot(gs[3]) plot_mc( bin_edges_norm, histo_norm, histo_minus2sigma_norm, histo_minus1sigma_norm, histo_plus1sigma_norm, histo_plus2sigma_norm, relative=True, ) plt.xlabel(r"$p_{T,\gamma}$ [GeV]") plt.ylabel(r"Relative to central pred.") plt.xlim(obs_range[0], obs_range[1]) plt.ylim(ratio_range[0], ratio_range[1]) # MC, relative ax = plt.subplot(gs[1], sharex=ax) plot_mc( bin_edges_norm, histo_norm, histo_minus2sigma_norm, histo_minus1sigma_norm, histo_plus1sigma_norm, histo_plus2sigma_norm, ) plt.ylabel(r"Normalized distribution") plt.ylim(0.0, None) plt.setp(ax.get_xticklabels(), visible=False) # Return plt.tight_layout() return fig
[docs]def plot_systematics( filename, theta, observable, obs_label, obs_range, n_bins=50, n_events=None, n_toys=100, linecolor="black", bandcolors=None, band_alpha=0.2, ratio_range=(0.8, 1.2), ): """ Plots absolute and relative uncertainty bands for all systematic uncertainties in a histogram of one observable in a MadMiner file. Parameters ---------- filename : str Filename of a MadMiner HDF5 file. theta : ndarray, optional Which parameter points to use for histogramming the data. observable : str Which observable to plot, given by its name in the MadMiner file. obs_label : str x-axis label naming the observable. obs_range : tuple of two float Range to be plotted for the observable. n_bins : int Number of bins. Default value: 50. n_events : None or int, optional If not None, sets the number of events from the MadMiner file that will be analyzed and plotted. Default value: None. n_toys : int, optional Number of toy nuisance parameter vectors used to estimate the systematic uncertainties. Default value: 100. linecolor : str, optional Line color for central prediction. Default value: "black". bandcolors : None or list of str, optional Error band colors. Default value: None. ratio_range : tuple of two floar y-axis range for the plots of the ratio to the central prediction. Default value: (0.8, 1.2). Returns ------- figure : Figure Plot as Matplotlib Figure instance. """ # Colors if bandcolors is None: bandcolors = ["C{}".format(i) for i in range(10)] # Load data sa = SampleAugmenter(filename, include_nuisance_parameters=True) nuisance_morpher = NuisanceMorpher( sa.nuisance_parameters, list(sa.benchmarks.keys()), reference_benchmark=sa.reference_benchmark ) # Observable index obs_idx = list(sa.observables.keys()).index(observable) # Get event data (observations and weights) x, weights_benchmarks = sa.weighted_events() x = x[:, obs_idx] # Theta matrix theta_matrix = sa._get_theta_benchmark_matrix(theta) weights = mdot(theta_matrix, weights_benchmarks) # Remove negative weights x = x[weights >= 0.0] weights_benchmarks = weights_benchmarks[weights >= 0.0] weights = weights[weights >= 0.0] # Shuffle events x, weights, weights_benchmarks = shuffle(x, weights, weights_benchmarks) # Only analyze n_events if n_events is not None and n_events < x.shape[0]: x = x[:n_events] weights_benchmarks = weights_benchmarks[:n_events] weights = weights[:n_events] # Systematics n_systematics = len(sa.systematics) + 1 labels = list(six.iterkeys(sa.systematics)) + ["combined"] # Nuisance parameters n_nuisance_params = sa.n_nuisance_parameters nuisance_toys = np.random.normal(loc=0.0, scale=1.0, size=n_systematics * n_nuisance_params * n_toys) nuisance_toys = nuisance_toys.reshape(n_systematics, n_toys, n_nuisance_params) # Restrict nuisance parameters all_nuisance_parameters = list(six.iterkeys(sa.nuisance_parameters)) for i_syst, syst_name in enumerate(six.iterkeys(sa.systematics)): n_used = n_nuisance_params used_nuisance_parameters = [] for npar, (npar_syst, _, _) in six.iteritems(sa.nuisance_parameters): if npar_syst == syst_name: used_nuisance_parameters.append(npar) for i in range(n_nuisance_params): if all_nuisance_parameters[i] not in used_nuisance_parameters: nuisance_toys[i_syst, :, i] = 0.0 n_used -= 1 logger.debug("Systematics %s based on %s nuisance parameters", syst_name, n_used) nuisance_toys = nuisance_toys.reshape(n_systematics * n_toys, n_nuisance_params) nuisance_toy_factors = np.array( [ nuisance_morpher.calculate_nuisance_factors(nuisance_toy, weights_benchmarks) for nuisance_toy in nuisance_toys ] ) # Shape (n_systematics*n_toys, n_events) nuisance_toy_factors = sanitize_array(nuisance_toy_factors, min_value=1.0e-2, max_value=100.0) # Shape (n_systematics*n_toys, n_events) # Calculate histogram for central prediction, not normalized histo, bin_edges = np.histogram(x, bins=n_bins, range=obs_range, weights=weights, density=False) # Calculate toy histograms, not normalized histos_toys_this_theta = [] for i_toy, nuisance_toy_factors_this_toy in enumerate(nuisance_toy_factors): toy_histo, _ = np.histogram( x, bins=n_bins, range=obs_range, weights=weights * nuisance_toy_factors_this_toy, density=False ) histos_toys_this_theta.append(toy_histo) histos_toys_this_theta = np.array(histos_toys_this_theta) histos_toys_this_theta = histos_toys_this_theta.reshape(n_systematics, n_toys, -1) # Shape (n_systematics, n_toys, v) histo_plus1sigma = np.percentile(histos_toys_this_theta, 84.0, axis=1) histo_minus1sigma = np.percentile(histos_toys_this_theta, 16.0, axis=1) # Calculate histogram for central prediction, normalized histo_norm, bin_edges_norm = np.histogram(x, bins=n_bins, range=obs_range, weights=weights, density=True) # Calculate toy histograms, normalized histos_toys_this_theta = [] for i_toy, nuisance_toy_factors_this_toy in enumerate(nuisance_toy_factors): toy_histo, _ = np.histogram( x, bins=n_bins, range=obs_range, weights=weights * nuisance_toy_factors_this_toy, density=True ) histos_toys_this_theta.append(toy_histo) histos_toys_this_theta = np.array(histos_toys_this_theta) histos_toys_this_theta = histos_toys_this_theta.reshape(n_systematics, n_toys, -1) # Shape (n_systematics, n_toys, n_bins) histo_plus1sigma_norm = np.percentile(histos_toys_this_theta, 84.0, axis=1) histo_minus1sigma_norm = np.percentile(histos_toys_this_theta, 16.0, axis=1) # Prepare plotting def plot_mc(edges, histo_central, histos_minus, histos_plus, relative=False): bin_edges_ = np.repeat(edges, 2)[1:-1] histo_central_ = np.repeat(histo_central, 2) histos_minus_ = [np.repeat(h, 2) for h in histos_minus] histos_plus_ = [np.repeat(h, 2) for h in histos_plus] if relative: histos_minus_ = [h / histo_central_ for h in histos_minus_] histos_plus_ = [h / histo_central_ for h in histos_plus_] histo_central_ /= histo_central_ for i, (histo_minus_, histo_plus_) in enumerate(zip(histos_minus_, histos_plus_)): plt.fill_between( bin_edges_, histo_minus_, histo_plus_, facecolor=bandcolors[i % len(bandcolors)], alpha=band_alpha, edgecolor="none", label=labels[i], ) plt.plot(bin_edges_, histo_minus_, color=bandcolors[i % len(bandcolors)], lw=1.0, ls="-") plt.plot(bin_edges_, histo_plus_, color=bandcolors[i % len(bandcolors)], lw=1.0, ls="-") plt.plot(bin_edges_, histo_central_, color=linecolor, lw=1.5, ls="-") # Make plot fig = plt.figure(figsize=(10, 7)) gs = gridspec.GridSpec(2, 2, height_ratios=[2, 1]) # MC, absolute residuals ax = plt.subplot(gs[2]) plot_mc(bin_edges, histo, histo_minus1sigma, histo_plus1sigma, relative=True) plt.xlabel(obs_label) plt.ylabel(r"Relative to central pred.") plt.xlim(obs_range[0], obs_range[1]) plt.ylim(ratio_range[0], ratio_range[1]) # MC, absolute ax = plt.subplot(gs[0], sharex=ax) plot_mc(bin_edges, histo, histo_minus1sigma, histo_plus1sigma) plt.legend() plt.ylabel(r"Differential cross section [pb/bin]") plt.ylim(0.0, None) plt.setp(ax.get_xticklabels(), visible=False) # MC, relative residuals ax = plt.subplot(gs[3]) plot_mc(bin_edges_norm, histo_norm, histo_minus1sigma_norm, histo_plus1sigma_norm, relative=True) plt.xlabel(obs_label) plt.ylabel(r"Relative to central pred.") plt.xlim(obs_range[0], obs_range[1]) plt.ylim(ratio_range[0], ratio_range[1]) # MC, relative ax = plt.subplot(gs[1], sharex=ax) plot_mc(bin_edges_norm, histo_norm, histo_minus1sigma_norm, histo_plus1sigma_norm) plt.legend() plt.ylabel(r"Normalized distribution") plt.ylim(0.0, None) plt.setp(ax.get_xticklabels(), visible=False) # Return plt.tight_layout() return fig