import logging
import numpy as np
from matplotlib import pyplot as plt, gridspec
from ..sampling import SampleAugmenter
from ..utils.morphing import NuisanceMorpher
from ..utils.various import mdot, shuffle, sanitize_array
logger = logging.getLogger(__name__)
[docs]def plot_uncertainty(
filename,
theta,
observable,
obs_label,
obs_range,
n_bins=50,
systematics=None,
n_events=None,
n_toys=100,
linecolor="black",
bandcolor1="#CC002E",
bandcolor2="orange",
ratio_range=(0.8, 1.2),
):
"""
Plots absolute and relative uncertainty bands in a histogram of one observable in a MadMiner file.
Parameters
----------
filename : str
Filename of a MadMiner HDF5 file.
theta : ndarray, optional
Which parameter points to use for histogramming the data.
observable : str
Which observable to plot, given by its name in the MadMiner file.
obs_label : str
x-axis label naming the observable.
obs_range : tuple of two float
Range to be plotted for the observable.
n_bins : int
Number of bins. Default value: 50.
systematics : None or list of str, optional
This can restrict which nuisance parameters are used to draw the uncertainty
bands. Each entry of this list is the name of a systematic uncertainty (see `MadMiner.add_systematics()`).
n_events : None or int, optional
If not None, sets the number of events from the MadMiner file that will be analyzed and plotted. Default value:
None.
n_toys : int, optional
Number of toy nuisance parameter vectors used to estimate the systematic uncertainties. Default value: 100.
linecolor : str, optional
Line color for central prediction. Default value: "black".
bandcolor1 : str, optional
Error band color for 1 sigma uncertainty. Default value: "#CC002E".
bandcolor2 : str, optional
Error band color for 2 sigma uncertainty. Default value: "orange".
ratio_range : tuple of two floar
y-axis range for the plots of the ratio to the central prediction. Default value: (0.8, 1.2).
Returns
-------
figure : Figure
Plot as Matplotlib Figure instance.
"""
# Load data
sa = SampleAugmenter(filename, include_nuisance_parameters=True)
nuisance_morpher = NuisanceMorpher(
sa.nuisance_parameters, list(sa.benchmarks.keys()), reference_benchmark=sa.reference_benchmark
)
# Observable index
obs_idx = list(sa.observables.keys()).index(observable)
# Get event data (observations and weights)
x, weights_benchmarks = sa.weighted_events()
x = x[:, obs_idx]
# Theta matrix
theta_matrix = sa._get_theta_benchmark_matrix(theta)
weights = mdot(theta_matrix, weights_benchmarks)
# Remove negative weights
x = x[weights >= 0.0]
weights_benchmarks = weights_benchmarks[weights >= 0.0]
weights = weights[weights >= 0.0]
# Shuffle events
x, weights, weights_benchmarks = shuffle(x, weights, weights_benchmarks)
# Only analyze n_events
if n_events is not None and n_events < x.shape[0]:
x = x[:n_events]
weights_benchmarks = weights_benchmarks[:n_events]
weights = weights[:n_events]
# Nuisance parameters
n_nuisance_params = sa.n_nuisance_parameters
nuisance_toys = np.random.normal(loc=0.0, scale=1.0, size=n_nuisance_params * n_toys)
nuisance_toys = nuisance_toys.reshape(n_toys, n_nuisance_params)
# Restrict nuisance parameters
if systematics is not None:
nuisance_parameters = []
for npar, (npar_syst, _, _) in sa.nuisance_parameters.items():
if npar_syst in systematics:
nuisance_parameters.append(npar)
for i in range(n_nuisance_params):
if i not in nuisance_parameters:
nuisance_toys[:, i] = 0.0
nuisance_toy_factors = np.array(
[
nuisance_morpher.calculate_nuisance_factors(nuisance_toy, weights_benchmarks)
for nuisance_toy in nuisance_toys
]
) # Shape (n_toys, n_events)
nuisance_toy_factors = sanitize_array(nuisance_toy_factors, min_value=1.0e-2, max_value=100.0)
# Shape (n_toys, n_events)
# Calculate histogram for central prediction, not normalized
histo, bin_edges = np.histogram(x, bins=n_bins, range=obs_range, weights=weights, density=False)
# Calculate toy histograms, not normalized
histos_toys_this_theta = []
for i_toy, nuisance_toy_factors_this_toy in enumerate(nuisance_toy_factors):
toy_histo, _ = np.histogram(
x, bins=n_bins, range=obs_range, weights=weights * nuisance_toy_factors_this_toy, density=False
)
histos_toys_this_theta.append(toy_histo)
histo_plus2sigma = np.percentile(histos_toys_this_theta, 97.5, axis=0)
histo_plus1sigma = np.percentile(histos_toys_this_theta, 84.0, axis=0)
histo_minus1sigma = np.percentile(histos_toys_this_theta, 16.0, axis=0)
histo_minus2sigma = np.percentile(histos_toys_this_theta, 2.5, axis=0)
# Calculate histogram for central prediction, normalized
histo_norm, bin_edges_norm = np.histogram(x, bins=n_bins, range=obs_range, weights=weights, density=True)
# Calculate toy histograms, normalized
histos_toys_this_theta = []
for i_toy, nuisance_toy_factors_this_toy in enumerate(nuisance_toy_factors):
toy_histo, _ = np.histogram(
x, bins=n_bins, range=obs_range, weights=weights * nuisance_toy_factors_this_toy, density=True
)
histos_toys_this_theta.append(toy_histo)
histo_plus2sigma_norm = np.percentile(histos_toys_this_theta, 97.5, axis=0)
histo_plus1sigma_norm = np.percentile(histos_toys_this_theta, 84.0, axis=0)
histo_minus1sigma_norm = np.percentile(histos_toys_this_theta, 16.0, axis=0)
histo_minus2sigma_norm = np.percentile(histos_toys_this_theta, 2.5, axis=0)
# Prepare plotting
def plot_mc(edges, histo_central, histo_m2, histo_m1, histo_p1, histo_p2, relative=False):
bin_edges_ = np.repeat(edges, 2)[1:-1]
histo_ = np.repeat(histo_central, 2)
histo_m2_ = np.repeat(histo_m2, 2)
histo_m1_ = np.repeat(histo_m1, 2)
histo_p1_ = np.repeat(histo_p1, 2)
histo_p2_ = np.repeat(histo_p2, 2)
if relative:
histo_m2_ /= histo_
histo_m1_ /= histo_
histo_p1_ /= histo_
histo_p2_ /= histo_
histo_ /= histo_
plt.fill_between(bin_edges_, histo_m2_, histo_p2_, facecolor=bandcolor2, edgecolor="none")
plt.fill_between(bin_edges_, histo_m1_, histo_p1_, facecolor=bandcolor1, edgecolor="none")
plt.plot(bin_edges_, histo_, color=linecolor, lw=1.5, ls="-")
# Make plot
fig = plt.figure(figsize=(10, 7))
gs = gridspec.GridSpec(2, 2, height_ratios=[2, 1])
# MC, absolute residuals
ax = plt.subplot(gs[2])
plot_mc(bin_edges, histo, histo_minus2sigma, histo_minus1sigma, histo_plus1sigma, histo_plus2sigma, relative=True)
plt.xlabel(obs_label)
plt.ylabel(r"Relative to central pred.")
plt.xlim(obs_range[0], obs_range[1])
plt.ylim(ratio_range[0], ratio_range[1])
# MC, absolute
ax = plt.subplot(gs[0], sharex=ax)
plot_mc(bin_edges, histo, histo_minus2sigma, histo_minus1sigma, histo_plus1sigma, histo_plus2sigma)
plt.ylabel(r"Differential cross section [pb/bin]")
plt.ylim(0.0, None)
plt.setp(ax.get_xticklabels(), visible=False)
# MC, relative residuals
ax = plt.subplot(gs[3])
plot_mc(
bin_edges_norm,
histo_norm,
histo_minus2sigma_norm,
histo_minus1sigma_norm,
histo_plus1sigma_norm,
histo_plus2sigma_norm,
relative=True,
)
plt.xlabel(r"$p_{T,\gamma}$ [GeV]")
plt.ylabel(r"Relative to central pred.")
plt.xlim(obs_range[0], obs_range[1])
plt.ylim(ratio_range[0], ratio_range[1])
# MC, relative
ax = plt.subplot(gs[1], sharex=ax)
plot_mc(
bin_edges_norm,
histo_norm,
histo_minus2sigma_norm,
histo_minus1sigma_norm,
histo_plus1sigma_norm,
histo_plus2sigma_norm,
)
plt.ylabel(r"Normalized distribution")
plt.ylim(0.0, None)
plt.setp(ax.get_xticklabels(), visible=False)
# Return
plt.tight_layout()
return fig
[docs]def plot_systematics(
filename,
theta,
observable,
obs_label,
obs_range,
n_bins=50,
n_events=None,
n_toys=100,
linecolor="black",
bandcolors=None,
band_alpha=0.2,
ratio_range=(0.8, 1.2),
):
"""
Plots absolute and relative uncertainty bands for all systematic uncertainties in a histogram of one observable in
a MadMiner file.
Parameters
----------
filename : str
Filename of a MadMiner HDF5 file.
theta : ndarray, optional
Which parameter points to use for histogramming the data.
observable : str
Which observable to plot, given by its name in the MadMiner file.
obs_label : str
x-axis label naming the observable.
obs_range : tuple of two float
Range to be plotted for the observable.
n_bins : int
Number of bins. Default value: 50.
n_events : None or int, optional
If not None, sets the number of events from the MadMiner file that will be analyzed and plotted. Default value:
None.
n_toys : int, optional
Number of toy nuisance parameter vectors used to estimate the systematic uncertainties. Default value: 100.
linecolor : str, optional
Line color for central prediction. Default value: "black".
bandcolors : None or list of str, optional
Error band colors. Default value: None.
ratio_range : tuple of two float
y-axis range for the plots of the ratio to the central prediction. Default value: (0.8, 1.2).
Returns
-------
figure : Figure
Plot as Matplotlib Figure instance.
"""
# Colors
if bandcolors is None:
bandcolors = [f"C{i}" for i in range(10)]
# Load data
sa = SampleAugmenter(filename, include_nuisance_parameters=True)
nuisance_morpher = NuisanceMorpher(
sa.nuisance_parameters, list(sa.benchmarks.keys()), reference_benchmark=sa.reference_benchmark
)
# Observable index
obs_idx = list(sa.observables.keys()).index(observable)
# Get event data (observations and weights)
x, weights_benchmarks = sa.weighted_events()
x = x[:, obs_idx]
# Theta matrix
theta_matrix = sa._get_theta_benchmark_matrix(theta)
weights = mdot(theta_matrix, weights_benchmarks)
# Remove negative weights
x = x[weights >= 0.0]
weights_benchmarks = weights_benchmarks[weights >= 0.0]
weights = weights[weights >= 0.0]
# Shuffle events
x, weights, weights_benchmarks = shuffle(x, weights, weights_benchmarks)
# Only analyze n_events
if n_events is not None and n_events < x.shape[0]:
x = x[:n_events]
weights_benchmarks = weights_benchmarks[:n_events]
weights = weights[:n_events]
# Systematics
n_systematics = len(sa.systematics) + 1
labels = list(sa.systematics.keys()) + ["combined"]
# Nuisance parameters
n_nuisance_params = sa.n_nuisance_parameters
nuisance_toys = np.random.normal(loc=0.0, scale=1.0, size=n_systematics * n_nuisance_params * n_toys)
nuisance_toys = nuisance_toys.reshape(n_systematics, n_toys, n_nuisance_params)
# Restrict nuisance parameters
all_nuisance_parameters = list(sa.nuisance_parameters.keys())
for i_syst, syst_name in enumerate(sa.systematics.keys()):
n_used = n_nuisance_params
used_nuisance_parameters = []
for npar, (npar_syst, _, _) in sa.nuisance_parameters.items():
if npar_syst == syst_name:
used_nuisance_parameters.append(npar)
for i in range(n_nuisance_params):
if all_nuisance_parameters[i] not in used_nuisance_parameters:
nuisance_toys[i_syst, :, i] = 0.0
n_used -= 1
logger.debug("Systematics %s based on %s nuisance parameters", syst_name, n_used)
nuisance_toys = nuisance_toys.reshape(n_systematics * n_toys, n_nuisance_params)
nuisance_toy_factors = np.array(
[
nuisance_morpher.calculate_nuisance_factors(nuisance_toy, weights_benchmarks)
for nuisance_toy in nuisance_toys
]
) # Shape (n_systematics*n_toys, n_events)
nuisance_toy_factors = sanitize_array(nuisance_toy_factors, min_value=1.0e-2, max_value=100.0)
# Shape (n_systematics*n_toys, n_events)
# Calculate histogram for central prediction, not normalized
histo, bin_edges = np.histogram(x, bins=n_bins, range=obs_range, weights=weights, density=False)
# Calculate toy histograms, not normalized
histos_toys_this_theta = []
for i_toy, nuisance_toy_factors_this_toy in enumerate(nuisance_toy_factors):
toy_histo, _ = np.histogram(
x, bins=n_bins, range=obs_range, weights=weights * nuisance_toy_factors_this_toy, density=False
)
histos_toys_this_theta.append(toy_histo)
histos_toys_this_theta = np.array(histos_toys_this_theta)
histos_toys_this_theta = histos_toys_this_theta.reshape(n_systematics, n_toys, -1)
# Shape (n_systematics, n_toys, v)
histo_plus1sigma = np.percentile(histos_toys_this_theta, 84.0, axis=1)
histo_minus1sigma = np.percentile(histos_toys_this_theta, 16.0, axis=1)
# Calculate histogram for central prediction, normalized
histo_norm, bin_edges_norm = np.histogram(x, bins=n_bins, range=obs_range, weights=weights, density=True)
# Calculate toy histograms, normalized
histos_toys_this_theta = []
for i_toy, nuisance_toy_factors_this_toy in enumerate(nuisance_toy_factors):
toy_histo, _ = np.histogram(
x, bins=n_bins, range=obs_range, weights=weights * nuisance_toy_factors_this_toy, density=True
)
histos_toys_this_theta.append(toy_histo)
histos_toys_this_theta = np.array(histos_toys_this_theta)
histos_toys_this_theta = histos_toys_this_theta.reshape(n_systematics, n_toys, -1)
# Shape (n_systematics, n_toys, n_bins)
histo_plus1sigma_norm = np.percentile(histos_toys_this_theta, 84.0, axis=1)
histo_minus1sigma_norm = np.percentile(histos_toys_this_theta, 16.0, axis=1)
# Prepare plotting
def plot_mc(edges, histo_central, histos_minus, histos_plus, relative=False):
bin_edges_ = np.repeat(edges, 2)[1:-1]
histo_central_ = np.repeat(histo_central, 2)
histos_minus_ = [np.repeat(h, 2) for h in histos_minus]
histos_plus_ = [np.repeat(h, 2) for h in histos_plus]
if relative:
histos_minus_ = [h / histo_central_ for h in histos_minus_]
histos_plus_ = [h / histo_central_ for h in histos_plus_]
histo_central_ /= histo_central_
for i, (histo_minus_, histo_plus_) in enumerate(zip(histos_minus_, histos_plus_)):
plt.fill_between(
bin_edges_,
histo_minus_,
histo_plus_,
facecolor=bandcolors[i % len(bandcolors)],
alpha=band_alpha,
edgecolor="none",
label=labels[i],
)
plt.plot(bin_edges_, histo_minus_, color=bandcolors[i % len(bandcolors)], lw=1.0, ls="-")
plt.plot(bin_edges_, histo_plus_, color=bandcolors[i % len(bandcolors)], lw=1.0, ls="-")
plt.plot(bin_edges_, histo_central_, color=linecolor, lw=1.5, ls="-")
# Make plot
fig = plt.figure(figsize=(10, 7))
gs = gridspec.GridSpec(2, 2, height_ratios=[2, 1])
# MC, absolute residuals
ax = plt.subplot(gs[2])
plot_mc(bin_edges, histo, histo_minus1sigma, histo_plus1sigma, relative=True)
plt.xlabel(obs_label)
plt.ylabel(r"Relative to central pred.")
plt.xlim(obs_range[0], obs_range[1])
plt.ylim(ratio_range[0], ratio_range[1])
# MC, absolute
ax = plt.subplot(gs[0], sharex=ax)
plot_mc(bin_edges, histo, histo_minus1sigma, histo_plus1sigma)
plt.legend()
plt.ylabel(r"Differential cross section [pb/bin]")
plt.ylim(0.0, None)
plt.setp(ax.get_xticklabels(), visible=False)
# MC, relative residuals
ax = plt.subplot(gs[3])
plot_mc(bin_edges_norm, histo_norm, histo_minus1sigma_norm, histo_plus1sigma_norm, relative=True)
plt.xlabel(obs_label)
plt.ylabel(r"Relative to central pred.")
plt.xlim(obs_range[0], obs_range[1])
plt.ylim(ratio_range[0], ratio_range[1])
# MC, relative
ax = plt.subplot(gs[1], sharex=ax)
plot_mc(bin_edges_norm, histo_norm, histo_minus1sigma_norm, histo_plus1sigma_norm)
plt.legend()
plt.ylabel(r"Normalized distribution")
plt.ylim(0.0, None)
plt.setp(ax.get_xticklabels(), visible=False)
# Return
plt.tight_layout()
return fig