Source code for tsunami_ip_utils.perturbations

"""This module is used for generating cross section perturbations and combining them with the sensitivity profiles for a given application
experiment pair to generate a similarity scatter plot"""

from tsunami_ip_utils.readers import RegionIntegratedSdfReader, read_region_integrated_h5_sdf
from tsunami_ip_utils.utils import _convert_paths
from pathlib import Path
from tsunami_ip_utils.xs import read_multigroup_xs
import pickle
import os
import tempfile
from string import Template
import subprocess
import time
from tsunami_ip_utils.utils import _filter_by_nuclie_reaction_dict
import multiprocessing
from multiprocessing import Pool
from tqdm import tqdm
import numpy as np
from tqdm.contrib.concurrent import process_map
from typing import List, Tuple, Dict, Union
from tsunami_ip_utils import config
from numpy.typing import ArrayLike
from tsunami_ip_utils.config import cache_dir

# Number of xs perturbation samples available in SCALE
NUM_SAMPLES = config.NUM_SAMPLES


[docs]
def _generate_and_read_perturbed_library(base_library: Union[str, Path], perturbation_factors: Union[str, Path], sample_number: int, 
                                         all_nuclide_reactions: dict) -> dict:
    """Generates and reads perturbed multigroup cross section libraries.
    
    Parameters
    ----------
    base_library
        Path to the base cross section library.
    perturbation_factors
        Path to the perturbation factors directory (corresponding to the base library).
    sample_number
        The sample number to use for generating the perturbed library. Must be from 1 - ``NUM_SAMPLES``, where 
        ``NUM_SAMPLES`` = :globalparam:`NUM_SAMPLES` is the number of perturbation factor samples provided in 
        the user's current version of SCALE. (``0`` :math:`\\leq` ``sample_number`` :math:`\\leq` :globalparam:`NUM_SAMPLES`)
    all_nuclide_reactions
        A dictionary containing the nuclide reactions that are read from the perturbed library.
    
    Returns
    -------
        A dictionary containing the perturbed cross section libraries for each nuclide reaction."""
    # Read the SCALE input template
    current_dir = Path(__file__).parent
    template_filename = current_dir / 'input_files' / 'generate_perturbed_library.inp'
    with open(template_filename, 'r') as f:
        template = Template(f.read())

    # Open a temporary file to store the output file
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as output_file:
        output_file_path = output_file.name

    # -----------------------------------------
    # Substitute the input file template values
    # -----------------------------------------
    # Convert the paths to strings if not already strings
    if isinstance(perturbation_factors, str):
        perturbation_factors = Path(perturbation_factors)
    if isinstance(base_library, str):
        base_library = Path(base_library)
    
    input_file = template.substitute(
        base_library=str(base_library),
        perturbation_factors=str( perturbation_factors / f'Sample{sample_number}'),
        sample_number=sample_number,
        output=output_file_path
    )

    # Open a temporary file to store the input file, then one to store the output file
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as input_temp_file:
        input_temp_file.write(input_file)
        input_temp_file_path = input_temp_file.name

    # Run the input file with scalerte
    command = ['scalerte', input_temp_file_path]

    proc = subprocess.Popen(command)
    proc.wait()

    # Now delete the input file
    os.remove(input_temp_file_path)

    # Read the perturbed library
    perturbed_xs = read_multigroup_xs(Path(output_file_path), all_nuclide_reactions)

    # Now delete the output file
    os.remove(output_file_path)

    return perturbed_xs



[docs]
@_convert_paths
def generate_points(application_path: Union[Path, List[Path]], experiment_path: Union[Path, List[Path]], 
                    base_library: Union[str, Path], perturbation_factors: Union[str, Path], num_perturbations: int
                    ) -> Union[ List[ Tuple[ float, float ] ], 
                               np.ndarray[ List[ Tuple[ float, float ] ] ]]:
    """Generates points for a similarity scatter plot using the nuclear data sampling method.

    Parameters
    ----------
    application_path
        Path(s) to the application sensitivity profile.
    experiment_path
        Path(s) to the experiment sensitivity profile.
    base_library
        Path to the base cross section library.
    perturbation_factors
        Path to the perturbation factors directory.
    num_perturbations
        Number of perturbation points to generate.
    
    Returns
    -------
        A list of points for the similarity scatter plot.

    Notes
    -----    
    * This function will automatically cache the base cross section library and the perturbed cross section libraries in the 
      user's home directory under the ``.tsunami_ip_utils_cache`` directory if not already cached. Caching is recommended if
      perturbation points are to be generated multiple times, because the I/O overhead of dumping and reading the base and
      perturbed cross section libraries can be significant.

    * This function can also generate a matrix of points for a given set of experiment and applications for making a matrix plot
      done by passing a list of paths for the application and experiment sensitivity profiles.
        
    Theory
    ======
    The nuclear data sampling method
    involves randomly sampling cross section libraries using the AMPX tool ``clarolplus`` to calculate a perturbed cross section
    library :math:`\\Delta \\boldsymbol{\\sigma}_n = \\overline{\\boldsymbol{\\sigma}} - \\boldsymbol{\\sigma}_n`, where
    :math:`\\boldsymbol{\\sigma}_n` is the :math:`n` th randomly sampled cross section library, and 
    :math:`\\overline{\\boldsymbol{\\sigma}}` is the base cross section library, consisting of the mean values of all of the 
    cross sections (e.g. the SCALE 252-group ENDF-V7.1 library). This perturbed cross section library (a vector consisting 
    of the nuclide-reaction-group-wise perturbations to the cross sections) is dotted with the sensitivity vector for the 
    application: :math:`x_n = \\boldsymbol{S}_A \\cdot \\Delta \\boldsymbol{\\sigma}`, and the experiment
    :math:`y_n = \\boldsymbol{S}_A \\cdot \\Delta \\boldsymbol{\\sigma}`, and the resulting points :math:`(x_n, y_n)` are
    plotted on a scatter plot whose Pearson correlation coefficient is meant to correspond to the :math:`c_k` value computed
    by TSUNAMI-IP.
    """
    # Convert the paths to strings if not already strings
    if isinstance(application_path, str):
        application_path = Path(application_path)
    if isinstance(experiment_path, str):
        experiment_path = Path(experiment_path)
    if isinstance(base_library, str):
        base_library = Path(base_library)
    if isinstance(perturbation_factors, str):
        perturbation_factors = Path(perturbation_factors)
    
    # Check if the application and experiment paths are lists (vectorization)
    if isinstance(application_path, list) and isinstance(experiment_path, list):
        points_array = np.empty( ( len(application_path), len(experiment_path), num_perturbations, 2), dtype=object )
        for i, application in enumerate(application_path):
            for j, experiment in enumerate(experiment_path):
                if i < j: # Skip upper right triangle of the matrix (since it's symmetric)
                    continue

                points_array[i, j] = generate_points(
                    application, 
                    experiment, 
                    base_library, 
                    perturbation_factors, 
                    num_perturbations
                )
        # Now popupate the upper right triangle of the matrix with the transpose of the lower left triangle
        for i, application in enumerate(application_path):
            for j, experiment in enumerate(experiment_path):
                if i < j:
                    points_array[i, j] = points_array[j, i]

        return points_array
    elif isinstance(application_path, list) or isinstance(experiment_path, list):
        raise ValueError("Both application and experiment paths must be lists or neither.")

    # Read the sdfs for the application and experiment
    if application_path.suffix == '.sdf':
        application = RegionIntegratedSdfReader(application_path).convert_to_dict('numbers').sdf_data
        application = { nuclide_name: { reaction_name: reaction['sensitivities'] } \
                       for nuclide_name, nuclide in application.items() \
                       for reaction_name, reaction in nuclide.items() }
    elif application_path.suffix == '.h5':
        application = read_region_integrated_h5_sdf(application_path)
    else:
        raise ValueError("The application path must be either an sdf or h5 file.")
    
    if experiment_path.suffix == '.sdf':
        experiment  = RegionIntegratedSdfReader(experiment_path).convert_to_dict('numbers').sdf_data
        experiment = { nuclide_name: { reaction_name: reaction['sensitivities'] } \
                      for nuclide_name, nuclide in experiment.items() \
                      for reaction_name, reaction in nuclide.items() }
    elif experiment_path.suffix == '.h5':
        experiment = read_region_integrated_h5_sdf(experiment_path)
    else:
        raise ValueError("The experiment path must be either an sdf or h5 file.")
    

    # Filter out redundant reactions, which will introduce bias into the similarity scatter plot
    # Absorption, or "capture" as it's referred to in SCALE and total
    # redundant_reactions = ['101', '1'] 
    # application = filter_redundant_reactions(application, redundant_reactions=redundant_reactions)
    # experiment  = filter_redundant_reactions(experiment,  redundant_reactions=redundant_reactions)

    # Create a nuclide reaction dict for the application and experiment
    application_nuclide_reactions = {nuclide: list(reactions.keys()) for nuclide, reactions in application.items()}
    experiment_nuclide_reactions  = {nuclide: list(reactions.keys()) for nuclide, reactions in experiment.items()}

    # Take the union of the nuclide reactions for the application and experiment
    all_nuclide_reactions = application_nuclide_reactions.copy()
    all_nuclide_reactions.update(experiment_nuclide_reactions)

    # Make a directory to store all cached cross section libraries if it doesn't already exist
    if not cache_dir.exists():
        os.mkdir(cache_dir)

    # Make a directory to store the cached perturbed multigroup libraries if it doesn't already exist
    library_name = base_library.name

    # Get the base multigroup cross sections for each nuclide reaction and the list of all available nuclide reactions for
    # caching the sampled perturbed cross sections

    if not (cache_dir / f'cached_{library_name}.pkl').exists():
        base_xs, available_nuclide_reactions = read_multigroup_xs(base_library, all_nuclide_reactions, \
                                                                return_available_nuclide_reactions=True)
        # Now read all cross sections and cache the base cross section library
        base_xs = read_multigroup_xs(base_library, available_nuclide_reactions)
        with open(cache_dir / f'cached_{library_name}.pkl', 'wb') as f:
            pickle.dump(base_xs, f)
    else:
        with open(cache_dir / f'cached_{library_name}.pkl', 'rb') as f:
            base_xs = pickle.load(f)

        # Get available nuclide reactions
        available_nuclide_reactions = { nuclide: list( reactions.keys() ) for nuclide, reactions in base_xs.items() }

        # Filter out the desired nuclide reactions
        base_xs = _filter_by_nuclie_reaction_dict(base_xs, all_nuclide_reactions)

    perturbed_cache = cache_dir / f'cached_{library_name}_perturbations'
    if not perturbed_cache.exists():
        os.mkdir(perturbed_cache)

    # --------------------------------
    # Main loop for generating points
    # --------------------------------
    points = []
    for i in tqdm(range(1, num_perturbations + 1), desc="Generating perturbation points"):
        # Cache the perturbed cross section libraries if not already cached
        perturbed_xs_cache = perturbed_cache / f'perturbed_xs_{i}.pkl'
        if not perturbed_xs_cache.exists():
            perturbed_xs = _generate_and_read_perturbed_library(base_library, perturbation_factors, i, \
                                                               available_nuclide_reactions)
            with open(perturbed_xs_cache, 'wb') as f:
                pickle.dump(perturbed_xs, f)
        else:
            with open(perturbed_xs_cache, 'rb') as f:
                perturbed_xs = pickle.load(f)

        # Now filter out the desired nuclide reactions
        perturbed_xs = _filter_by_nuclie_reaction_dict(perturbed_xs, all_nuclide_reactions)

        # ----------------------------------------------
        # Compute S ⋅ Δσ for application and experiment
        # ----------------------------------------------
        running_total_application = 0
        running_total_experiment = 0
        for isotope, reactions in all_nuclide_reactions.items():
            for reaction in reactions:
                # First compute the cross section delta, then multiply by the sensitivity profile
                delta_xs = perturbed_xs[isotope][reaction] - base_xs[isotope][reaction]
                if isotope in application and reaction in application[isotope]:
                    running_total_application += np.dot(application[isotope][reaction], delta_xs)
                if isotope in experiment and reaction in experiment[isotope]:
                    running_total_experiment  += np.dot(experiment[isotope][reaction], delta_xs)

        points.append((running_total_application, running_total_experiment))

    return points

        


[docs]
def _cache_perturbed_library(args: Tuple[int, Path, Path, int, Dict[str, List[str]], Path]) -> float:
    """Caches a single perturbed cross section library.

    Parameters
    ----------
    args
        A tuple containing all necessary components to perform the caching of a 
        perturbed library.
        
        - i (int):
            The sample number to use for generating the perturbed library.
        - base_library (str | Path):
            Path to the base cross section library.
        - perturbation_factors (str | Path):
            Path to the cross section perturbation factors (used to generate the perturbed libraries).
        - sample_number (int):
            The sample number to use for generating the perturbed library. Must be from 1 - ``NUM_SAMPLES``, where ``NUM_SAMPLES``
            is the number of perturbation factor samples provided in the user's current version of SCALE. 
            (0 :math:`\\leq` ``sample_number`` :math:`\\leq` ``NUM_SAMPLES``)
        - available_nuclide_reactions (Dict[str, List[str]]):
            A dictionary containing the nuclide reactions that are read from the perturbed library.

    Returns
    -------
        The time taken to cache the perturbed library.
    """    
    i, base_library, perturbation_factors, sample_number, available_nuclide_reactions, perturbed_cache = args
    perturbed_xs_cache = perturbed_cache / f'perturbed_xs_{i}.pkl'
    if not perturbed_xs_cache.exists():
        start = time.time()
        perturbed_xs = _generate_and_read_perturbed_library(base_library, perturbation_factors, sample_number, available_nuclide_reactions)
        with open(perturbed_xs_cache, 'wb') as f:
            pickle.dump(perturbed_xs, f)
        
        end = time.time()
        return end - start
    else:
        return 0



[docs]
def cache_all_libraries(base_library: Path, perturbation_factors: Path, reset_cache: bool=False,
                        num_cores: int=multiprocessing.cpu_count()//2) -> None:
    """Caches the base and perturbed cross section libraries for a given base library and perturbed library paths.

    Parameters
    ----------
    base_library
        Path to the base cross section library.
    perturbation_factors
        Path to the cross section perturbation factors (used to generate the perturbed libraries).
    reset_cache
        Whether to reset the cache or not (default is ``False``).
    num_cores
        The number of cores to use for caching the perturbed libraries in parallel (default is half the number of cores 
        available).
        
    Returns
    -------
        This function does not return a value and has no return type.
        
    Notes
    -----
    * This function will cache the base cross section library and the perturbed cross section libraries in the user's home
      directory under the ``.tsunami_ip_utils_cache`` directory. If the user wishes to reset the cache, they can do so by
      setting the ``reset_cache`` parameter to ``True`` in the :func:`cache_all_libraries` function.
    * The caches can be `very` large, so make sure that sufficient space is available. For example, caching SCALE's 252-group
      ENDF-v7.1 library and all of the perturbed libraries currently available in SCALE (1000 samples) requires 48 GB of space,
      and for ENDF-v8.0, it requires 76 GB of space.
    * The time taken to cache the libraries can be significant (~5 hours on 6 cores, but this is hardware dependent), but when 
      caching the libraries a progress bar will be displayed with a time estimate.
    * Note, if using ``num_cores`` greater than half the number of cores available on your system, you may experience excessive
      memory usage, so proceed with caution.
    """
    # Read the base library, use an arbitrary nuclide reaction dict just to get the available reactions
    all_nuclide_reactions = { '92235': ['18'] } # u-235 fission

    # Make a directory to store all cached cross section libraries if it doesn't already exist
    if not cache_dir.exists():
        os.mkdir(cache_dir)

    if reset_cache:
        # Remove all cached cross section libraries
        for f in os.listdir(cache_dir):
            if f.endswith('_perturbations'): # A perturbed library directory
                for p in os.listdir(cache_dir / f):
                    os.remove(cache_dir / f / p)
            else:
                os.remove(cache_dir / f)

    # Cache base library if not already cached (requires reading twice)
    library_name = base_library.name
    base_library_cache = cache_dir / f'cached_{library_name}.pkl'
    print("Reading base library... ")
    if not base_library_cache.exists():
        _, available_nuclide_reactions = read_multigroup_xs(base_library, all_nuclide_reactions, \
                                                                return_available_nuclide_reactions=True)

        start = time.time()
        print("Caching base library... ", end='')
        base_xs = read_multigroup_xs(base_library, available_nuclide_reactions)
        with open( base_library_cache, 'wb') as f:
            pickle.dump(base_xs, f)
        end = time.time()
        print(f"Done in {end - start} seconds")
    else:
        with open(cache_dir / f'cached_{library_name}.pkl', 'rb') as f:
            base_xs = pickle.load(f)

        # Get available nuclide reactions
        available_nuclide_reactions = { nuclide: list( reactions.keys() ) for nuclide, reactions in base_xs.items() }
        
    
    # Make a directory to store the cached perturbed multigroup libraries if it doesn't already exist
    perturbed_cache = cache_dir / f'cached_{library_name}_perturbations'
    if not perturbed_cache.exists():
        os.mkdir(perturbed_cache)

    # ------------------------------------------
    # Main loop for caching perturbed libraries
    # ------------------------------------------

    # Create a pool of worker processes
    pool = Pool(processes=num_cores)

    # Create a list of arguments for each perturbed library
    args_list = [(i, base_library, perturbation_factors, i, available_nuclide_reactions, perturbed_cache)
                 for i in range(1, NUM_SAMPLES + 1)]

    # Use the pool to cache the perturbed libraries in parallel with a progress bar
    process_map(_cache_perturbed_library, args_list, max_workers=num_cores // 2, chunksize=1, desc='Caching perturbed libraries')

    # Close the pool
    pool.close()
    pool.join()