Source code for madminer.core.madminer

import os
import logging
import tempfile

from collections import OrderedDict
from pathlib import Path
from typing import Dict
from typing import List
from typing import Union

from madminer.models import AnalysisParameter
from madminer.models import Benchmark
from madminer.models import Systematic
from madminer.models import SystematicScale
from madminer.models import SystematicType
from madminer.utils.morphing import PhysicsMorpher
from madminer.utils.interfaces.hdf5 import load_madminer_settings
from madminer.utils.interfaces.hdf5 import save_madminer_settings
from madminer.utils.interfaces.mg_cards import export_param_card
from madminer.utils.interfaces.mg_cards import export_reweight_card
from madminer.utils.interfaces.mg_cards import export_run_card
from madminer.utils.interfaces.mg import generate_mg_process
from madminer.utils.interfaces.mg import setup_mg_with_scripts
from madminer.utils.interfaces.mg import run_mg
from madminer.utils.interfaces.mg import create_master_script
from madminer.utils.interfaces.mg import setup_mg_reweighting_with_scripts
from madminer.utils.interfaces.mg import run_mg_reweighting
from madminer.utils.various import copy_file

logger = logging.getLogger(__name__)


[docs]class MadMiner:
    """
    The central class to manage parameter spaces, benchmarks, and the generation of events through MadGraph and
    Pythia.

    An instance of this class is the starting point of most MadMiner applications. It is typically used in four steps:

    * Defining the parameter space through `MadMiner.add_parameter`
    * Defining the benchmarks, i.e. the points at which the squared matrix elements will be evaluated in MadGraph, with
      `MadMiner.add_benchmark()` or, if operator morphing is used, with `MadMiner.set_benchmarks_from_morphing()`
    * Saving this setup with `MadMiner.save()` (it can be loaded in a new instance with `MadMiner.load()`)
    * Running MadGraph and Pythia with the appropriate settings with `MadMiner.run()` or `MadMiner.run_multiple()` (the
      latter allows the user to combine runs from multiple run cards and sampling points)

    Please see the tutorial for a hands-on introduction to its methods.

    """

    def __init__(self):
        self.parameters = OrderedDict()
        self.benchmarks = OrderedDict()
        self.default_benchmark = None
        self.morpher = None
        self.export_morphing = False
        self.systematics = OrderedDict()
        self.finite_difference_benchmarks = OrderedDict()
        self.finite_difference_epsilon = 0.0

    def _reset_systematics(self):
        self.systematics = OrderedDict()

    def _reset_benchmarks(self):
        self.benchmarks = OrderedDict()
        self.default_benchmark = None

    def _reset_morpher(self):
        self.morpher = None
        self.export_morphing = False

[docs]    def add_parameter(
        self,
        lha_block,
        lha_id,
        parameter_name=None,
        param_card_transform=None,
        morphing_max_power=2,
        parameter_range=(0.0, 1.0),
    ):
        """
        Adds an individual parameter.

        Parameters
        ----------
        lha_block :  str
            The name of the LHA block as used in the param_card. Case-sensitive.

        lha_id : int
            The LHA id as used in the param_card.

        parameter_name : str or None
            An internal name for the parameter. If None, a the default 'benchmark_i' is used.

        morphing_max_power : int
            The maximal power with which this parameter contributes to the
            squared matrix element of the process of interest. Typically at tree level,
            this maximal number is 2 for parameters that affect one vertex (e.g. only production
            or only decay of a particle), and 4 for parameters that affect two vertices (e.g.
            production and decay). Default value: 2.

        param_card_transform : None or str
            Represents a one-parameter function mapping the parameter
            (`"theta"`) to the value that should be written in the parameter cards. This
            str is parsed by Python's `eval()` function, and `"theta"` is parsed as the
            parameter value. Default value: None.

        parameter_range : tuple of float
            The range of parameter values of primary interest. Only affects the
            basis optimization. Default value: (0., 1.).

        Returns
        -------
            None
        """

        # Default names
        if parameter_name is None:
            parameter_name = f"parameter_{len(self.parameters)}"
        if param_card_transform is None:
            param_card_transform = "_"

        # Check and sanitize input
        assert isinstance(lha_block, str), f"LHA block is not a string: {lha_block}"
        assert isinstance(lha_id, int), f"LHA id is not an integer: {lha_id}"
        assert isinstance(parameter_name, str), f"Parameter name is not a string: {parameter_name}"
        assert isinstance(morphing_max_power, int), f"Morphing max power is not an integer: {morphing_max_power}"

        parameter_name = parameter_name.replace(" ", "_")
        parameter_name = parameter_name.replace("-", "_")

        assert parameter_name not in self.parameters, f"Parameter already exists: {parameter_name}"

        parameter = AnalysisParameter(
            parameter_name,
            lha_block,
            lha_id,
            morphing_max_power,
            parameter_range,
            param_card_transform,
        )

        # Add parameter
        logger.info("Adding parameter: %s", parameter)
        self.parameters[parameter_name] = parameter

        # The morphing information is not accurate anymore
        logger.warning("Resetting benchmarks and morphing")
        self._reset_benchmarks()
        self._reset_morpher()

[docs]    def set_parameters(self, parameters: Union[Dict[str, AnalysisParameter], List[tuple]]):
        """
        Manually sets all parameters, overwriting previously added parameters.

        Parameters
        ----------
        parameters : dict or list
             If parameters is an dict, the keys should be str and give the parameter names, and the values are
             AnalysisParameter model instances. If parameters is a list, the items should be tuples of the
             form (LHA_block, LHA_ID).

        Returns
        -------
            None
        """

        self.parameters = OrderedDict()

        if isinstance(parameters, dict):
            for param in parameters.values():
                self.add_parameter(
                    lha_block=param.lha_block,
                    lha_id=param.lha_id,
                    parameter_name=param.name,
                    morphing_max_power=param.max_power,
                    parameter_range=param.val_range,
                )
        elif isinstance(parameters, list):
            for values in parameters:
                self.add_parameter(values[0], values[1])
        else:
            raise RuntimeError(f"Invalid set of parameters: {parameters}")

        # The morphing information is not accurate anymore
        logger.warning("Resetting benchmarks and morphing")
        self._reset_benchmarks()
        self._reset_morpher()

[docs]    def add_benchmark(self, parameter_values: Dict[str, float], benchmark_name: str = None, verbose: float = True):
        """
        Manually adds an individual benchmark, that is, a parameter point that will be evaluated by MadGraph.

        Parameters
        ----------
        parameter_values : dict
            The keys of this dict should be the parameter names and the values the corresponding parameter values.
        benchmark_name : str or None, optional
            Name of benchmark. If None, a default name is used. Default value: None.
        verbose : bool, optional
            If True, prints output about each benchmark. Default value: True.

        Returns
        -------
            None

        Raises
        ------
        RuntimeError
            If a benchmark with the same name already exists, if parameter_values is not a dict, or if a key of
            parameter_values does not correspond to a defined parameter.
        """

        # Default names
        if benchmark_name is None:
            benchmark_name = f"benchmark_{len(self.benchmarks)}"

        # Check input
        if not isinstance(parameter_values, dict):
            raise RuntimeError(f"Parameter values are not a dict: {parameter_values}")

        for p_name in parameter_values.keys():
            if p_name not in self.parameters.keys():
                raise RuntimeError(f"Unknown parameter: {p_name}")

        if benchmark_name in self.benchmarks.keys():
            raise RuntimeError(f"Benchmark {benchmark_name} exists already")

        # Add benchmark
        self.benchmarks[benchmark_name] = Benchmark(
            name=benchmark_name,
            values=parameter_values,
        )

        # If first benchmark, this will be the default for sampling
        if len(self.benchmarks) == 1:
            self.default_benchmark = benchmark_name

        if verbose:
            logger.info("Added benchmark %s", self.benchmarks[benchmark_name])
        else:
            logger.debug("Added benchmark %s", self.benchmarks[benchmark_name])

[docs]    def set_benchmarks(self, benchmarks: Union[Dict[str, dict], List[dict]], verbose: bool = True):
        """
        Manually sets all benchmarks, that is, parameter points that will be evaluated by MadGraph. Calling this
        function overwrites all previously defined benchmarks.

        Parameters
        ----------
        benchmarks : dict or list
            Specifies all benchmarks. If None, all benchmarks are reset. If dict, the keys are the benchmark names and
            the values the Benchmark instances. If list, the entries are dicts {parameter_name:value}
            (and the benchmark names are chosen automatically). Default value: None.

        verbose : bool, optional
            If True, prints output about each benchmark. Default value: True.

        Returns
        -------
            None
        """

        self.benchmarks = OrderedDict()
        self.default_benchmark = None

        if isinstance(benchmarks, dict):
            for name, values in benchmarks.items():
                self.add_benchmark(values, name, verbose=verbose)
        elif isinstance(benchmarks, list):
            for values in benchmarks:
                self.add_benchmark(values)
        else:
            raise RuntimeError(f"Invalid set of benchmarks: {benchmarks}")

        # After manually adding benchmarks, the morphing information is not accurate anymore
        if self.morpher is not None:
            logger.warning("Reset morphing")
            self.morpher = None
            self.export_morphing = False

[docs]    def set_morphing(
        self,
        max_overall_power=4,
        n_bases=1,
        include_existing_benchmarks=True,
        n_trials=100,
        n_test_thetas=100,
    ):
        """
        Sets up the morphing environment.

        Sets benchmarks, i.e. parameter points that will be evaluated by MadGraph, for a morphing algorithm, and
        calculates all information required for morphing. Morphing is a technique that allows MadMax to infer the full
        probability distribution `p(x_i | theta)` for each simulated event `x_i` and any `theta`, not just the
        benchmarks.

        The morphing basis is optimized with respect to the expected mean squared morphing weights over the parameter
        region of interest. If keep_existing_benchmarks=True, benchmarks defined previously will be incorporated in the
        morphing basis and only the remaining basis points will be optimized.

        Note that any subsequent call to `set_benchmarks` or `add_benchmark` will overwrite the morphing setup. The
        correct order is therefore to manually define benchmarks first, using `set_benchmarks` or `add_benchmark`, and
        then to create the morphing setup and complete the basis by calling
        `set_benchmarks_from_morphing(keep_existing_benchmarks=True)`.

        Parameters
        ----------
        max_overall_power : int, optional
            The maximal sum of powers of all parameters contributing to the squared matrix element.
            Typically, if parameters can affect the couplings at n vertices, this number is 2n. Default value: 4.

        n_bases : int, optional
            The number of morphing bases generated. If n_bases > 1, multiple bases are combined, and the
            weights for each basis are reduced by a factor 1 / n_bases. Currently only the default choice of 1 is
            fully implemented. Do not use any other value for now. Default value: 1.

        include_existing_benchmarks : bool, optional
            If True, the previously defined benchmarks are included in the morphing basis. In that case, the number of
            free parameters in the optimization routine is reduced. If False, the existing benchmarks will still be
            simulated, but are not part of the morphing routine. Default value: True.

        n_trials : int, optional
            Number of random basis configurations tested in the optimization procedure. A larger number will increase
            the run time of the optimization, but lead to better results. Default value: 100.

        n_test_thetas : int, optional
            Number of random parameter points used to evaluate the expected mean squared morphing weights. A larger
            number will increase the run time of the optimization, but lead to better results. Default value: 100.

        Returns
        -------
            None

        """

        logger.info("Optimizing basis for morphing")

        morpher = PhysicsMorpher(parameters_from_madminer=self.parameters)
        morpher.find_components(max_overall_power)

        if include_existing_benchmarks:
            n_predefined_benchmarks = len(self.benchmarks)
            basis = morpher.optimize_basis(
                n_bases=n_bases,
                benchmarks_from_madminer=self.benchmarks,
                n_trials=n_trials,
                n_test_thetas=n_test_thetas,
            )
        else:
            n_predefined_benchmarks = 0
            basis = morpher.optimize_basis(
                n_bases=n_bases,
                benchmarks_from_madminer=None,
                n_trials=n_trials,
                n_test_thetas=n_test_thetas,
            )

            basis.update(self.benchmarks)

        self.set_benchmarks(basis, verbose=False)
        self.morpher = morpher
        self.export_morphing = True

        logger.info(
            "Set up morphing with %s parameters, %s morphing components, %s predefined basis points, and %s "
            "new basis points",
            morpher.n_parameters,
            morpher.n_components,
            n_predefined_benchmarks,
            morpher.n_components - n_predefined_benchmarks,
        )

[docs]    def finite_differences(self, epsilon=0.01):
        """
        Adds benchmarks so that the score can be computed from finite differences

        Don't add any more benchmarks or parameters after calling this!
        """

        logger.info("Adding finite-differences benchmarks with epsilon = %s", epsilon)

        self.finite_difference_epsilon = epsilon

        # Copy is necessary to avoid endless loop :/
        for b_name, benchmark in self.benchmarks.copy().items():
            fd_keys = {}

            for param_name, param_value in benchmark.values.items():
                fd_key = f"{b_name}_plus_{param_name}"
                fd_obj = benchmark.copy()
                fd_obj.values[param_name] += epsilon

                self.add_benchmark(fd_obj, fd_key)
                fd_keys[param_name] = fd_key

            self.finite_difference_benchmarks[b_name].shift_names = fd_keys

[docs]    def add_systematics(
        self,
        effect,
        systematic_name=None,
        norm_variation=1.1,
        scale="mu",
        scale_variations=(0.5, 1.0, 2.0),
        pdf_variation="CT10",
    ):
        """

        Parameters
        ----------
        effect : {"norm", "scale", "pdf"}
            Type of the nuisance parameter. If "norm", it will affect the overall normalization of one or multiple
            samples in the process. If "scale", the nuisance parameter effect will be determined by varying
            factorization or regularization scales (depending on scale_variation and scales). If "pdf", the effect
            of the nuisance parameters will be determined by varying the PDF used.

        systematic_name : None or str, optional

        scale : {"mu", "mur", "muf"}, optional
            If type is "scale", this sets whether only the regularization scale ("mur"), only the factorization scale
            ("muf"), or both simultaneously ("mu") are varied. Default value: "mu".

        norm_variation : float, optional
            If type is "norm", this sets the relative effect of the nuisance parameter on the cross section at the
            "plus 1 sigma" variation. 1.1 corresponds to a 10% increase, 0.9 to a 10% decrease relative to the nominal
            cross section. Default value: 1.1.

        scale_variations : tuple of float, optional
            If type is "scale", this sets how the regularization and / or factorization scales are varied. A tuple
            like (0.5, 1.0, 2.0) specifies the factors with which they are varied. Default value: (0.5, 1.0, 2.0).

        pdf_variation : str, optional
            If type is "pdf", defines the PDF set for the variation. The option is passed along to the `--pdf` option
            of MadGraph's systematics module. See https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics for a
            list. The option "CT10" would, as an example, run over all the eigenvectors of the CTEQ10 set.
            Default value: "CT10".

        Returns
        -------
            None
        """

        assert scale in ["mu", "mur", "muf"]

        # Default name
        if systematic_name is None:
            i = 0
            while f"{effect}_{i}" in list(self.systematics.keys()):
                i += 1
            systematic_name = f"{type}_{i}"

        systematic_name = systematic_name.replace(" ", "_")
        systematic_name = systematic_name.replace("-", "_")

        scale = SystematicScale.from_str(scale)
        effect = SystematicType.from_str(effect)

        if effect is SystematicType.PDF:
            self.systematics[systematic_name] = Systematic(
                systematic_name,
                SystematicType.PDF,
                pdf_variation,
            )
        elif effect is SystematicType.SCALE:
            scale_variation_string = ",".join((str(factor) for factor in scale_variations))
            self.systematics[systematic_name] = Systematic(
                systematic_name,
                SystematicType.SCALE,
                scale_variation_string,
                scale,
            )
        elif effect is SystematicType.NORM:
            self.systematics[systematic_name] = Systematic(
                systematic_name,
                SystematicType.NORM,
                norm_variation,
            )

[docs]    def load(self, filename, disable_morphing=False):
        """
        Loads MadMiner setup from a file. All parameters, benchmarks, and morphing settings are overwritten.
        See `save` for more details.

        Parameters
        ----------
        filename : str
            Path to the MadMiner file.

        disable_morphing : bool, optional
            If True, the morphing setup is not loaded from the file. Default value: False.

        Returns
        -------
            None
        """

        # Load data
        (
            self.parameters,
            self.benchmarks,
            _,
            morphing_components,
            morphing_matrix,
            _,
            _,
            self.systematics,
            _,
            _,
            _,
            _,
            self.finite_difference_benchmarks,
            self.finite_difference_epsilon,
        ) = load_madminer_settings(filename, include_nuisance_benchmarks=False)

        logger.info("Found %s parameters:", len(self.parameters))
        for param in self.parameters.values():
            logger.info("   %s", param)

        logger.info("Found %s benchmarks:", len(self.benchmarks))
        for benchmark in self.benchmarks.values():
            logger.info("   %s", benchmark)

            if self.default_benchmark is None:
                self.default_benchmark = benchmark.name

        # Morphing
        self.morpher = None
        self.export_morphing = False

        if morphing_matrix is not None and morphing_components is not None and not disable_morphing:
            self.morpher = PhysicsMorpher(self.parameters)
            self.morpher.set_components(morphing_components)
            self.morpher.set_basis(self.benchmarks, morphing_matrix=morphing_matrix)
            self.export_morphing = True

            logger.info("Found morphing setup with %s components", len(morphing_components))
        else:
            logger.info("Did not find morphing setup.")

        # Systematics setup
        if len(self.systematics) == 0:
            logger.info("Did not find systematics setup.")
        else:
            logger.info("Found systematics setup with %s groups", len(self.systematics))
            for name, systematic in self.systematics.items():
                logger.debug("  %s: %s", name, systematic)

[docs]    def save(self, filename):
        """
        Saves MadMiner setup into a file.

        The file format follows the HDF5 standard. The saved information includes:

        * the parameter definitions,
        * the benchmark points,
        * the systematics setup (if defined), and
        * the morphing setup (if defined).

        This file is an important input to later stages in the analysis chain, including the processing of generated
        events, extraction of training samples, and calculation of Fisher information matrices. In these downstream
        tasks, additional information will be written to the MadMiner file, including the observations and event
        weights.

        Parameters
        ----------
        filename : str
            Path to the MadMiner file.

        Returns
        -------
            None

        """

        Path(filename).parent.mkdir(parents=True, exist_ok=True)

        if self.morpher is not None:
            logger.info("Saving setup (including morphing) to %s", filename)

            save_madminer_settings(
                file_name=filename,
                file_override=True,
                parameters=self.parameters,
                benchmarks=self.benchmarks,
                morphing_components=self.morpher.components,
                morphing_matrix=self.morpher.morphing_matrix,
                systematics=self.systematics,
                finite_differences=self.finite_difference_benchmarks,
                finite_differences_epsilon=self.finite_difference_epsilon,
            )
        else:
            logger.info("Saving setup (without morphing) to %s", filename)

            save_madminer_settings(
                file_name=filename,
                file_override=True,
                parameters=self.parameters,
                benchmarks=self.benchmarks,
                systematics=self.systematics,
                finite_differences=self.finite_difference_benchmarks,
                finite_differences_epsilon=self.finite_difference_epsilon,
            )

    def _export_cards(
        self,
        param_card_template_file,
        mg_process_directory,
        sample_benchmark=None,
        param_card_filename=None,
        reweight_card_filename=None,
        include_param_card=True,
        benchmarks=None,
    ):

        """
        Writes out a param_card and reweight_card for MadGraph. Instead of this low-level function, it is recommended to
        use `run` or `run_multiple`.

        Parameters
        ----------
        param_card_template_file : str
            Path to a param_card.dat of the used model.

        mg_process_directory : str
            Path to the directory of the MG process.

        sample_benchmark : str or None, optional
            Name of the benchmark used for sampling. If None, the very first defined benchmark is used. Default value:
            None.

        param_card_filename : str or None, optional
            Output filename for the generated param card. If None, a default filename in the MG process folder is used.
            Default value: None.

        reweight_card_filename : str or None, optional
            str or None. Output filename for the generated reweight card. If None, a default filename in the MG process
            folder is used. Default value: None.

        include_param_card : bool, optional
            If False, no param card is exported, only a reweight card

        benchmarks : None or OrderedDict, optional
            If None, uses all benchmarks. Otherwise uses these benchmarks.

        Returns
        -------
            None

        """

        if param_card_filename is None or reweight_card_filename is None:
            logger.info("Creating param and reweight cards in %s", mg_process_directory)
        else:
            logger.info("Creating param and reweight cards in %s, %s", param_card_filename, reweight_card_filename)

        if benchmarks is None:
            benchmarks = self.benchmarks

        # Check status
        assert self.default_benchmark is not None
        assert len(self.benchmarks) > 0

        # Default benchmark
        if sample_benchmark is None:
            sample_benchmark = self.default_benchmark

        # Export param card
        if include_param_card:
            export_param_card(
                benchmark=benchmarks[sample_benchmark],
                parameters=self.parameters,
                param_card_template_file=param_card_template_file,
                mg_process_directory=mg_process_directory,
                param_card_filename=param_card_filename,
            )

        # Export reweight card
        export_reweight_card(
            sample_benchmark=sample_benchmark,
            benchmarks=benchmarks,
            parameters=self.parameters,
            mg_process_directory=mg_process_directory,
            reweight_card_filename=reweight_card_filename,
        )

[docs]    def run(
        self,
        mg_directory,
        proc_card_file,
        param_card_template_file,
        run_card_file=None,
        mg_process_directory=None,
        pythia8_card_file=None,
        configuration_file=None,
        sample_benchmark=None,
        is_background=False,
        only_prepare_script=False,
        ufo_model_directory=None,
        log_directory=None,
        temp_directory=None,
        initial_command=None,
        systematics=None,
        order="LO",
        python_executable=None,
    ):

        """
        High-level function that creates the the MadGraph process, all required cards, and prepares or runs the event
        generation for one combination of cards.

        If `only_prepare_scripts=True`, the event generation is not run
        directly, but a bash script is created in `<process_folder>/madminer/run.sh` that will start the event
        generation with the correct settings.

        High-level function that creates the the MadGraph process, all required cards, and prepares or runs the event
        generation for multiple combinations of run_cards or importance samplings (`sample_benchmarks`).

        If `only_prepare_scripts=True`, the event generation is not run
        directly, but a bash script is created in `<process_folder>/madminer/run.sh` that will start the event
        generation with the correct settings.

        Parameters
        ----------
        mg_directory : str
            Path to the MadGraph 5 base directory.

        proc_card_file : str
            Path to the process card that tells MadGraph how to generate the process.

        param_card_template_file : str
            Path to a param card that will be used as template to create the
            appropriate param cards for these runs.

        run_card_file : str
            Paths to the MadGraph run card. If None, the default run_card is used.

        mg_process_directory : str or None, optional
            Path to the MG process directory. If None, MadMiner uses ./MG_process. Default value: None.

        pythia8_card_file : str or None, optional
            Path to the MadGraph Pythia8 card. If None, the card present in the process folder is used.
            Default value: None.

        configuration_file : str, optional
            Path to the MadGraph me5_configuration card. If None, the card present in the process folder
            is used. Default value: None.

        sample_benchmark : list of str or None, optional
            Lists the names of benchmarks that should be used to sample events. A different sampling does not change
            the expected differential cross sections, but will change which regions of phase space have many events
            (small variance) or few events (high variance). If None, the benchmark added first is used. Default value:
            None.

        is_background : bool, optional
            Should be True for background processes, i.e. process in which the differential cross section does not
            depend on the parameters (i.e. is the same for all benchmarks). In this case, no reweighting is run, which
            can substantially speed up the event generation. Default value: False.

        only_prepare_script : bool, optional
            If True, the event generation is not started, but instead a run.sh script is created in the process
            directory. Default value: False.

        ufo_model_directory : str or None, optional
            Path to an UFO model directory that should be used, but is not yet installed in mg_directory/models. The
            model will be copied to the MadGraph model directory before the process directory is generated. (Default
            value = None.

        log_directory : str or None, optional
            Directory for log files with the MadGraph output. If None, ./logs is used. Default value: None.

        temp_directory : str or None, optional
            Path to a temporary directory. If None, a system default is used. Default value: None.

        initial_command : str or None, optional
            Initial shell commands that have to be executed before MG is run (e.g. to load a virtual environment).
            Default value: None.

        systematics : None or list of str, optional
            If list of str, defines which systematics are used for this run.

        order : 'LO' or 'NLO', optional
            Differentiates between LO and NLO order runs. Minor changes to writing, reading and naming cards.
            Default value: 'LO'

        python_executable : None or str, optional
            Provides a path to the Python executable that should be used to call MadMiner. Default: None.

        Returns
        -------
            None

        """

        if sample_benchmark is None:
            sample_benchmark = self.default_benchmark

        self.run_multiple(
            mg_directory=mg_directory,
            proc_card_file=proc_card_file,
            param_card_template_file=param_card_template_file,
            run_card_files=[run_card_file],
            mg_process_directory=mg_process_directory,
            pythia8_card_file=pythia8_card_file,
            configuration_file=configuration_file,
            sample_benchmarks=[sample_benchmark],
            is_background=is_background,
            only_prepare_script=only_prepare_script,
            ufo_model_directory=ufo_model_directory,
            log_directory=log_directory,
            temp_directory=temp_directory,
            initial_command=initial_command,
            systematics=systematics,
            order=order,
            python_executable=python_executable,
        )

[docs]    def run_multiple(
        self,
        mg_directory,
        proc_card_file,
        param_card_template_file,
        run_card_files,
        mg_process_directory=None,
        pythia8_card_file=None,
        configuration_file=None,
        sample_benchmarks=None,
        is_background=False,
        only_prepare_script=False,
        ufo_model_directory=None,
        log_directory=None,
        temp_directory=None,
        initial_command=None,
        systematics=None,
        order="LO",
        python_executable=None,
    ):

        """
        High-level function that creates the the MadGraph process, all required cards, and prepares or runs the event
        generation for multiple combinations of run_cards or importance samplings (`sample_benchmarks`).

        If `only_prepare_scripts=True`, the event generation is not run
        directly, but a bash script is created in `<process_folder>/madminer/run.sh` that will start the event
        generation with the correct settings.

        Parameters
        ----------
        mg_directory : str
            Path to the MadGraph 5 base directory.

        proc_card_file : str
            Path to the process card that tells MadGraph how to generate the process.

        param_card_template_file : str
            Path to a param card that will be used as template to create the appropriate param cards for these runs.

        run_card_files : list of str
            Paths to the MadGraph run card.

        mg_process_directory : str or None, optional
            Path to the MG process directory. If None, MadMiner uses ./MG_process. Default value: None.

        pythia8_card_file : str, optional
            Path to the MadGraph Pythia8 card. If None, the card present in the process folder
            is used. Default value: None.

        configuration_file : str, optional
            Path to the MadGraph me5_configuration card. If None, the card present in the process folder
            is used. Default value: None.

        sample_benchmarks : list of str or None, optional
            Lists the names of benchmarks that should be used to sample events. A different sampling does not change
            the expected differential cross sections, but will change which regions of phase space have many events
            (small variance) or few events (high variance). If None, a run is started for each of the benchmarks, which
            should map out all regions of phase space well. Default value: None.

        is_background : bool, optional
            Should be True for background processes, i.e. process in which the differential cross section does not
            depend on the parameters (i.e. is the same for all benchmarks). In this case, no reweighting is run, which
            can substantially speed up the event generation. Default value: False.

        only_prepare_script : bool, optional
            If True, the event generation is not started, but instead a run.sh script is created in the process
            directory. Default value: False.

        ufo_model_directory : str or None, optional
            Path to an UFO model directory that should be used, but is not yet installed in mg_directory/models. The
            model will be copied to the MadGraph model directory before the process directory is generated. (Default
            value = None)

        log_directory : str or None, optional
            Directory for log files with the MadGraph output. If None, ./logs is used. Default value: None.

        temp_directory : str or None, optional
            Path to a temporary directory. If None, a system default is used. Default value: None.

        initial_command : str or None, optional
            Initial shell commands that have to be executed before MG is run (e.g. to load a virtual environment).
            If not specified and `python2_override` is True, it adds the user-installed Python2 binaries to the PATH.
            Default value: None.

        systematics : None or list of str, optional
            If list of str, defines which systematics are used for these runs.

        order : 'LO' or 'NLO', optional
            Differentiates between LO and NLO order runs. Minor changes to writing, reading and naming cards.
            Default value: 'LO'

        python_executable : None or str, optional
            Provides a path to the Python executable that should be used to call MadMiner. Default: None.

        Returns
        -------
            None

        """

        # Defaults
        if mg_process_directory is None:
            mg_process_directory = "./MG_process"

        if temp_directory is None:
            temp_directory = tempfile.gettempdir()

        if log_directory is None:
            log_directory = "./logs"

        if sample_benchmarks is None:
            sample_benchmarks = [benchmark for benchmark in self.benchmarks.keys()]

        # This snippet is useful when using virtual envs.
        # (Derives from a Python2 - Python3 issue).
        # Ref: https://github.com/madminer-tool/madminer/issues/422
        if python_executable and initial_command is None:
            logger.info(f"Adding {python_executable} bin folder to PATH")
            binary_path = os.popen(f"command -v {python_executable}").read().strip()
            binary_folder = Path(binary_path).parent

            initial_command = f"export PATH={binary_folder}:$PATH"
            logger.info(f"Using Python executable {binary_path}")

        # Generate process folder
        log_file_generate = f"{log_directory}/generate.log"

        generate_mg_process(
            mg_directory,
            temp_directory,
            proc_card_file,
            mg_process_directory,
            ufo_model_directory=ufo_model_directory,
            initial_command=initial_command,
            log_file=log_file_generate,
            python_executable=python_executable,
        )

        # Make MadMiner folders
        Path(mg_process_directory, "madminer", "cards").mkdir(parents=True, exist_ok=True)
        Path(mg_process_directory, "madminer", "scripts").mkdir(parents=True, exist_ok=True)

        # Systematics
        if systematics is None:
            systematics_used = self.systematics
        else:
            systematics_used = OrderedDict()
            for key in systematics:
                systematics_used[key] = self.systematics[key]

        # Loop over settings
        i = 0
        mg_scripts = []

        for run_card_file in run_card_files:
            for sample_benchmark in sample_benchmarks:

                # Files
                script_file = f"madminer/scripts/run_{i}.sh"
                log_file_run = f"run_{i}.log"
                mg_commands_filename = f"madminer/cards/mg_commands_{i}.dat"
                param_card_file = f"madminer/cards/param_card_{i}.dat"
                reweight_card_file = f"madminer/cards/reweight_card_{i}.dat"
                new_pythia8_card_file = None
                if pythia8_card_file is not None:
                    new_pythia8_card_file = f"madminer/cards/pythia8_card_{i}.dat"
                new_run_card_file = None
                if run_card_file is not None:
                    new_run_card_file = f"madminer/cards/run_card_{i}.dat"
                new_configuration_file = None
                if configuration_file is not None:
                    new_configuration_file = f"madminer/cards/me5_configuration_{i}.txt"

                logger.info("Run %s", i)
                logger.info("  Sampling from benchmark: %s", sample_benchmark)
                logger.info("  Original run card:       %s", run_card_file)
                logger.info("  Original Pythia8 card:   %s", pythia8_card_file)
                logger.info("  Original config card:    %s", configuration_file)
                logger.info("  Copied run card:         %s", new_run_card_file)
                logger.info("  Copied Pythia8 card:     %s", new_pythia8_card_file)
                logger.info("  Copied config card:      %s", new_configuration_file)
                logger.info("  Param card:              %s", param_card_file)
                logger.info("  Reweight card:           %s", reweight_card_file)
                logger.info("  Log file:                %s", log_file_run)

                # Check input
                if run_card_file is None and any(
                    syst.type in {SystematicType.PDF, SystematicType.SCALE} for syst in systematics_used.values()
                ):
                    logger.warning(
                        "Warning: No run card given, but PDF or scale variation set up. The correct systematics"
                        " settings are not set automatically. Make sure to set them correctly!"
                    )

                # Create param and reweight cards
                self._export_cards(
                    param_card_template_file,
                    mg_process_directory,
                    sample_benchmark=sample_benchmark,
                    param_card_filename=f"{mg_process_directory}/{param_card_file}",
                    reweight_card_filename=f"{mg_process_directory}/{reweight_card_file}",
                )

                # Create run card
                if run_card_file is not None:
                    export_run_card(
                        template_filename=run_card_file,
                        run_card_filename=f"{mg_process_directory}/{new_run_card_file}",
                        systematics=systematics_used,
                        order=order,
                    )

                # Copy Pythia card
                if pythia8_card_file is not None:
                    copy_file(pythia8_card_file, f"{mg_process_directory}/{new_pythia8_card_file}")

                # Copy Configuration card
                if configuration_file is not None:
                    copy_file(configuration_file, f"{mg_process_directory}/{new_configuration_file}")

                # Run MG and Pythia
                if only_prepare_script:
                    mg_script = setup_mg_with_scripts(
                        mg_process_directory,
                        proc_card_filename_from_mgprocdir=mg_commands_filename,
                        run_card_file_from_mgprocdir=new_run_card_file,
                        param_card_file_from_mgprocdir=param_card_file,
                        reweight_card_file_from_mgprocdir=reweight_card_file,
                        pythia8_card_file_from_mgprocdir=new_pythia8_card_file,
                        configuration_file_from_mgprocdir=new_configuration_file,
                        is_background=is_background,
                        script_file_from_mgprocdir=script_file,
                        initial_command=initial_command,
                        log_dir=log_directory,
                        log_file_from_logdir=log_file_run,
                        python_executable=python_executable,
                        order=order,
                    )
                    mg_scripts.append(mg_script)
                else:
                    run_mg(
                        mg_directory,
                        mg_process_directory,
                        f"{mg_process_directory}/{mg_commands_filename}",
                        f"{mg_process_directory}/{new_run_card_file}",
                        f"{mg_process_directory}/{param_card_file}",
                        f"{mg_process_directory}/{reweight_card_file}",
                        None if new_pythia8_card_file is None else f"{mg_process_directory}/{new_pythia8_card_file}",
                        None if new_configuration_file is None else f"{mg_process_directory}/{new_configuration_file}",
                        is_background=is_background,
                        initial_command=initial_command,
                        log_file=f"{log_directory}/{log_file_run}",
                        python_executable=python_executable,
                        order=order,
                    )

                i += 1

        n_runs_total = i

        # Master shell script
        if only_prepare_script:
            master_script_filename = f"{mg_process_directory}/madminer/run.sh"
            create_master_script(
                log_directory,
                master_script_filename,
                mg_directory,
                mg_process_directory,
                mg_scripts,
            )
            logger.info(
                "To generate events, please run:\n\n %s [MG_directory] [MG_process_directory] [log_dir]\n\n",
                master_script_filename,
            )

        else:
            expected_event_files = [f"{mg_process_directory}/Events/run_{(i+1):02d}" for i in range(n_runs_total)]
            expected_event_files = "\n".join(expected_event_files)
            logger.info(
                "Finished running MadGraph! Please check that events were successfully generated in the following "
                "folders:\n\n%s\n\n",
                expected_event_files,
            )

[docs]    def reweight_existing_sample(
        self,
        mg_process_directory,
        run_name,
        param_card_template_file,
        sample_benchmark,
        reweight_benchmarks=None,
        only_prepare_script=False,
        log_directory=None,
        initial_command=None,
    ):
        """
        High-level function that adds the weights required for MadMiner to an existing sample.

        If `only_prepare_scripts=True`, the event generation is not run
        directly, but a bash script is created in `<process_folder>/madminer/run.sh` that will start the event
        generation with the correct settings.

        Currently does not support adding systematics.

        Parameters
        ----------
        mg_process_directory : str
            Path to the MG process directory. If None, MadMiner uses ./MG_process.

        run_name : str
            Run name.

        param_card_template_file : str
            Path to a param card that will be used as template to create the appropriate param cards for these runs.

        sample_benchmark : str
            The name of the benchmark used to generate this sample.

        reweight_benchmarks : list of str or None
            Lists the names of benchmarks to which the sample should be reweighted. If None, all benchmarks (except
            sample_benchmarks) are used.

        only_prepare_script : bool, optional
            If True, the event generation is not started, but instead a run.sh script is created in the process
            directory. Default value: False.

        log_directory : str or None, optional
            Directory for log files with the MadGraph output. If None, ./logs is used. Default value: None.

        initial_command : str or None, optional
            Initial shell commands that have to be executed before MG is run (e.g. to load a virtual environment).
            Default value: None.

        Returns
        -------
            None

        """

        # TODO: check that we don't reweight to benchmarks that already have weights in the LHE file
        # TODO: add systematics

        # Defaults
        if log_directory is None:
            log_directory = "./logs"

        # Make MadMiner folders
        Path(mg_process_directory, "madminer", "cards").mkdir(parents=True, exist_ok=True)
        Path(mg_process_directory, "madminer", "scripts").mkdir(parents=True, exist_ok=True)

        # Files
        script_file = "madminer/scripts/run_reweight.sh"
        log_file_run = "reweight.log"
        reweight_card_file = "/madminer/cards/reweight_card_reweight.dat"

        # Missing benchmarks
        missing_benchmarks = OrderedDict()
        for benchmark_name in reweight_benchmarks:
            missing_benchmarks[benchmark_name] = self.benchmarks[benchmark_name]

        # Inform user
        logger.info("Reweighting setup")
        logger.info("  Originally sampled from benchmark: %s", sample_benchmark)
        logger.info("  Now reweighting to benchmarks:     %s", reweight_benchmarks)
        logger.info("  Reweight card:                     %s", reweight_card_file)
        logger.info("  Log file:                          %s", log_file_run)

        # Create param and reweight cards
        self._export_cards(
            param_card_template_file,
            mg_process_directory,
            sample_benchmark=sample_benchmark,
            reweight_card_filename=f"{mg_process_directory}/{reweight_card_file}",
            include_param_card=False,
            benchmarks=missing_benchmarks,
        )

        # Run reweighting
        if only_prepare_script:
            call_instruction = setup_mg_reweighting_with_scripts(
                mg_process_directory,
                run_name=run_name,
                reweight_card_file_from_mgprocdir=reweight_card_file,
                script_file_from_mgprocdir=script_file,
                initial_command=initial_command,
                log_dir=log_directory,
                log_file_from_logdir=log_file_run,
            )

            logger.info("To generate events, please run:\n\n %s \n\n", call_instruction)

        else:
            run_mg_reweighting(
                mg_process_directory,
                run_name=run_name,
                reweight_card_file=f"{mg_process_directory}/{reweight_card_file}",
                initial_command=initial_command,
                log_file=f"{log_directory}/{log_file_run}",
            )
            logger.info(
                "Finished running reweighting! Please check that events were successfully reweighted in the following "
                "folder:\n\n %s/Events/%s \n\n",
                mg_process_directory,
                run_name,
            )