"""Tools for generating comparisons between TSUNAMI-IP calculated integral parameters (e.g. :math:`c_k`, :math:`E`) and calculated
values using correlation methods with either cross section sampling or uncertainty contributions."""
from tsunami_ip_utils.readers import read_integral_indices
from tsunami_ip_utils.integral_indices import calculate_E
import numpy as np
from uncertainties import unumpy
import pandas as pd
from pandas import DataFrame as df
from pathlib import Path
from tsunami_ip_utils.perturbations import generate_points
from typing import List, Dict, Tuple, Any, Optional, Union
from tsunami_ip_utils.viz.scatter_plot import EnhancedPlotlyFigure, InteractiveScatterLegend
from tsunami_ip_utils.viz.plot_utils import generate_plot_objects_array_from_perturbations, generate_plot_objects_array_from_contributions
from tsunami_ip_utils.integral_indices import get_uncertainty_contributions, calculate_E_contributions
from tsunami_ip_utils.viz import matrix_plot
import multiprocessing
from tsunami_ip_utils.utils import _run_and_read_TSUNAMI_IP, _convert_paths
[docs]
def E_calculation_comparison(application_filenames: Union[List[str], List[Path]],
experiment_filenames: Union[List[str], List[Path]], coverx_library: str="252groupcov7.1",
tsunami_ip_output_filename: Optional[Union[str, Path]]=None) -> Dict[str, df]:
"""Function that compares the calculated similarity parameter E with the TSUNAMI-IP output for each application with each
experiment. The comparison is done for the nominal values and the uncertainties of the E values. In addition, the
difference between manually calculated uncertainties and automatically calculated uncertainties (i.e. via the uncertainties
package) is also calculated. The results are returned as a pandas DataFrame.
Parameters
----------
application_filenames
Paths to the application sdf files.
experiment_filenames
Paths to the experiment sdf files.
coverx
The coverx library to use for TSUNAMI-IP. Default is ``'252groupcov7.1'``.
tsunami_ip_output_filename
Optional path to the TSUNAMI-IP output file, if not specified, the function will run TSUNAMI-IP to calculate the
E values using the template file :ref:`MG_reader.inp`.
Returns
-------
Dictionary of pandas DataFrames for each type of E index. The keys are ``'total'``, ``'fission'``, ``'capture'``,
and ``'scatter'``. The DataFrames contain the calculated E values, the manual uncertainties, the TSUNAMI-IP values,
the relative difference in the mean, and the relative difference in the manually computed uncertainty. The DataFrames
are indexed by the experiment number and the columns are a MultiIndex with the application number as the main index
and the attributes as the subindex.
Notes
-----
Each of the results dataframes in the dictionary can easily be written to excel using the pandas ``to_excel`` method."""
# First perform the manual calculations for each type of E index
E_types = ['total', 'fission', 'capture', 'scatter']
E = {}
for E_type in E_types + ['total_manual', 'fission_manual', 'capture_manual', 'scatter_manual']:
if 'manual' in E_type: # Manual uncertainty propagation
if E_type.replace('_manual', '') == 'total':
E[E_type] = calculate_E(application_filenames, experiment_filenames, reaction_type='all', uncertainties='manual')
elif E_type.replace('_manual', '') == 'scatter':
E[E_type] = calculate_E(application_filenames, experiment_filenames, reaction_type='elastic', uncertainties='manual')
else:
E[E_type] = calculate_E(application_filenames, experiment_filenames, reaction_type=E_type.replace('_manual', ''), uncertainties='manual')
else: # Automatic uncertainty propagation
if E_type == 'total':
E[E_type] = calculate_E(application_filenames, experiment_filenames, reaction_type='all')
elif E_type == 'scatter':
E[E_type] = calculate_E(application_filenames, experiment_filenames, reaction_type='elastic')
else:
E[E_type] = calculate_E(application_filenames, experiment_filenames, reaction_type=E_type)
print("Done with calculations")
# Now read the tsunami_ip output if given, and calculate it if not
if tsunami_ip_output_filename is not None:
tsunami_ip_output = read_integral_indices(tsunami_ip_output_filename)
else:
tsunami_ip_output = _run_and_read_TSUNAMI_IP(application_filenames, experiment_filenames, coverx_library)
# Compare the nominal values
E_diff = {}
for E_type in E_types:
E_diff[E_type] = np.abs(unumpy.nominal_values(E[E_type]) - unumpy.nominal_values(tsunami_ip_output[f"E_{E_type}"])) \
/ unumpy.nominal_values(tsunami_ip_output[f"E_{E_type}"])
# Compare the calculated (manual) uncertainty with the TSUNAMI-IP uncertainty
E_diff_unc = {}
for E_type in E_types:
E_diff_unc[E_type] = np.abs( unumpy.std_devs(E[E_type + '_manual']) - unumpy.std_devs(tsunami_ip_output[f"E_{E_type}"]) ) \
/ unumpy.std_devs(tsunami_ip_output[f"E_{E_type}"])
# -----------------------------------------
# Format the results as a pandas DataFrame
# -----------------------------------------
# Create a MultiIndex for columns
num_experiments, num_applications = np.shape(E['total'])
columns = pd.MultiIndex.from_product([
np.arange(1, num_applications + 1), # Main column indices
['Calculated', 'Manual Uncertainty', 'TSUNAMI-IP', 'Relative Difference in Mean', 'Relative Difference in Manual Uncertainty'] # Subcolumns
], names=['Application Number', 'Attribute'])
# Initialize DataFrame
data = {}
print("Creating pandas dataframes")
# Create a pandas DataFrame for each type of E index
for E_type in E_types:
data[E_type] = pd.DataFrame(index=pd.Index(np.arange(1, num_experiments + 1), name='Experiment Number'), columns=columns)
# Populate DataFrame
for application_index in range(num_applications):
for experiment_index in range(num_experiments):
# Now write the data to the DataFrame
data[E_type].loc[experiment_index + 1, (application_index + 1, 'Calculated')] = \
f"{E[E_type][experiment_index, application_index].n:1.3E}+/-{E[E_type][experiment_index, application_index].s:1.2E}"
data[E_type].loc[experiment_index + 1, (application_index + 1, 'Manual Uncertainty')] = \
f"{E[E_type + '_manual'][experiment_index, application_index].s:1.2E}"
data[E_type].loc[experiment_index + 1, (application_index + 1, 'TSUNAMI-IP')] = \
f"{tsunami_ip_output[f'E_{E_type}'][experiment_index, application_index].n:1.3E}+/-{tsunami_ip_output[f'E_{E_type}'][experiment_index, application_index].s:1.2E}"
data[E_type].loc[experiment_index + 1, (application_index + 1, 'Relative Difference in Mean')] = f"{E_diff[E_type][experiment_index, application_index]:1.4E}"
data[E_type].loc[experiment_index + 1, (application_index + 1, 'Relative Difference in Manual Uncertainty')] = f"{E_diff_unc[E_type][experiment_index, application_index]:1.4E}"
return data
[docs]
def _update_annotation(fig: EnhancedPlotlyFigure, integral_index: float, index_name: str
) -> Tuple[EnhancedPlotlyFigure, float, float]:
"""Update the annotation on the plot to include the TSUNAMI-IP c_k value and the percent difference from the
Pearson correlation coefficient. If the percent difference is greater than 5%, the annotation will be colored red.
Parameters
----------
fig
The plotly figure object.
integral_index
The TSUNAMI-IP integral_index value.
index_name
The name of the integral index.
Returns
-------
* fig
The plotly figure object with the updated annotation.
* calculated_value
The calculated value of the integral index.
* percent_difference
The percent difference between the TSUNAMI-IP integral_index value and the Pearson correlation coefficient.
"""
fig_has_annotations = isinstance(fig, EnhancedPlotlyFigure) or isinstance(fig, InteractiveScatterLegend)
if not fig_has_annotations:
# On the diagonal are usually the contributions in a pie plot, and the calculated c_k is 1.0
calculated_value = 1.0
percent_difference = (integral_index - calculated_value) / integral_index * 100
return fig, calculated_value, percent_difference
if isinstance(fig, InteractiveScatterLegend):
# The figure is actually stored under the 'fig' attribute
summary_stats_annotation = fig.fig.layout.annotations[0]
calculated_value = fig.fig.statistics['pearson']
percent_difference = (integral_index - calculated_value)/integral_index * 100
summary_stats_annotation.text += f"<br>TSUNAMI-IP {index_name}: <b>{integral_index}</b><br>Percent Difference: <b>{percent_difference}</b>%"
if abs(percent_difference) > 5:
summary_stats_annotation.update(bordercolor='red')
return fig, calculated_value, percent_difference
else:
summary_stats_annotation = fig.layout.annotations[0]
calculated_value = fig.statistics['pearson']
percent_difference = (integral_index - calculated_value)/integral_index * 100
summary_stats_annotation.text += f"<br>TSUNAMI-IP {index_name}: <b>{integral_index}</b><br>Percent Difference: <b>{percent_difference}</b>%"
if abs(percent_difference) > 5:
summary_stats_annotation.update(bordercolor='red')
return fig, calculated_value, percent_difference
[docs]
def _process_pair(args):
application_file, experiment_file, base_library, perturbation_factors, num_perturbations, integral_value = args
points_array = generate_points(
application_file,
experiment_file,
base_library=base_library,
perturbation_factors=perturbation_factors,
num_perturbations=num_perturbations
)
x_points = unumpy.nominal_values([ pair[0] for pair in points_array ])
y_points = unumpy.nominal_values([ pair[1] for pair in points_array ])
# Calculate the Pearson correlation coefficient
calculated_value = np.corrcoef(x_points, y_points)[0, 1]
percent_difference = (integral_value - calculated_value) / integral_value * 100
return calculated_value, percent_difference
[docs]
@_convert_paths
def correlation_comparison(integral_index_matrix: unumpy.uarray, integral_index_name: str,
application_files: Union[List[str], List[Path]],
experiment_files: Union[List[str], List[Path]], method: str,
base_library: Optional[Union[str, Path]]=None,
perturbation_factors: Optional[Union[str, Path]]=None,
num_perturbations: Optional[int]=None, make_plot: bool=True,
num_cores: int=multiprocessing.cpu_count() - 2,
plot_objects_kwargs: dict={},
matrix_plot_kwargs: dict={}) -> Tuple[pd.DataFrame, Any]:
"""Function that compares the calculated similarity parameter :math:`c_k` (calculated using the cross section sampling method)
with the TSUNAMI-IP output for each application and each experiment. NOTE: that the experiment sdfs and application sdfs
must correspond with those in hte TSUNAMI-IP input file.
Notes
-----
* If the chosen method is 'perturbation', the matrix plot can become extremely memory intensive, so it is recommended
to set ``make_plot=False`` (if only the matrix of comparisons is desired) and/or to use ``num_cores=1`` or a small
number of perturbations to avoid memory issues.
Parameters
----------
integral_index_matrix:
The matrix representing the given integral index. Expected shape is ``(num_applications, num_experiments)``.
integral_index_name:
The name of the integral index (used for selecting the method for the plot). Allowed values of ``'c_k'``, ``'E'``.
application_files
Paths to the input files for the application (required by the chosen method, either TSUNAMI ``.out`` files or
TSUNAMI ``.sdf`` files).
experiment_files
Paths to the input files for the experiment (required by the chosen method, either TSUNAMI ``.out`` files or
TSUNAMI ``.sdf`` files).
method
The method for visualizing the given integral index. Allowed values of ``'perturbation'``,
``'uncertainty_contributions_nuclide'``, ``'uncertainty_contributions_nuclide_reaction'``, ``'variance_contributions_nuclide'``,
``'variance_contributions_nuclide_reaction'``. ``'E_contributions_nuclide'``, ``'E_contributions_nuclide_reaction'``,
``'c_k_contributions'``,
base_library
Path to the base library
perturbation_factors
Path to the perturbation factors.
num_perturbations
Number of perturbations to generate.
make_plot
Whether to generate the matrix plot. Default is ``True``.
num_cores
If ``make_plot`` is ``False``, the number of cores to use for multiprocessing. Default is two less than the number
of available cores.
plot_objects_kwargs
Optional keyword arguments to pass when generating the plot objects.
matrix_plot_kwargs
Optional keyword arguments to pass when generating the matrix plot.
Returns
-------
* comparisons
A pandas DataFrame containing the calculated integral index values, the TSUNAMI-IP values, and the percent
difference between the two values.
* matrix_plot
If ``make_plot=True``, the matrix plot object containing the integral index values and the percent difference,
otherwise the output is just ``comparisons``.
"""
# ===================================
# Perform checks on input parameters
# ===================================
# Check for consistent dimensions of the integral index matrix
num_applications, num_experiments = np.shape(integral_index_matrix)
if num_applications != len(application_files) or num_experiments != len(experiment_files):
raise ValueError("The dimensions of the integral index matrix do not match the number of applications and experiments.")
# Check for missing input parameters or inconsistent method
missing_perturbation_parameters = any([base_library is None, perturbation_factors is None,
num_perturbations is None])
if method == 'perturbation' and missing_perturbation_parameters:
raise ValueError("The method is 'perturbation', and some of the required additional parameters are missing.")
method_for_calculating_c_k = method in ['uncertainty_contributions_nuclide', 'uncertainty_contributions_nuclide_reaction',
'variance_contributions_nuclide', 'variance_contributions_nuclide_reaction',
'perturbation', 'c_k_contributions']
method_for_calculating_E = method in ['E_contributions_nuclide', 'E_contributions_nuclide_reaction']
if integral_index_name == 'c_k' and not method_for_calculating_c_k:
raise ValueError("The integral index name is 'c_k', but a method for calculating c_k was not selected, instead"
f"the method selected was {method}. Please select a method for calculating c_k.")
if integral_index_name == 'E' and not method_for_calculating_E:
raise ValueError("The integral index name is 'E', but the method selected was not 'E_contributions'. Please"
f"the method selected was {method}. Please select a method for calculating E.")
# ===================================
# Generate plots for the matrix plot
# ===================================
match method:
case 'perturbation':
# This is the most memory intensive method, so instead of storing the entire points array, if make_plot is False,
# we will just calculate the Pearson correlation coefficient from the points themselves, and only store the
# results array.
if make_plot:
points_array = generate_points(application_files, experiment_files, base_library, perturbation_factors,
num_perturbations)
plot_objects_array = generate_plot_objects_array_from_perturbations(points_array, **plot_objects_kwargs)
else:
num_applications = len(application_files)
num_experiments = len(experiment_files)
# Prepare arguments for multiprocessing
tasks = [(application_files[i], experiment_files[j], base_library, perturbation_factors, num_perturbations, integral_index_matrix[i, j])
for i in range(num_applications) for j in range(num_experiments)]
# Use multiprocessing to process data
with multiprocessing.Pool(processes=num_cores) as pool:
results = pool.map(_process_pair, tasks)
# Reshape the results into matrices
calculated_values = np.array([result[0] for result in results]).reshape(num_applications, num_experiments)
percent_differences = np.array([result[1] for result in results]).reshape(num_applications, num_experiments)
case 'uncertainty_contributions_nuclide':
contributions_nuclide, _ = get_uncertainty_contributions(application_files, experiment_files)
plot_objects_array = generate_plot_objects_array_from_contributions(contributions_nuclide, '%Δk/k', **plot_objects_kwargs) \
if make_plot else None
case 'uncertainty_contributions_nuclide_reaction':
_, contributions_nuclide_reaction = get_uncertainty_contributions(application_files, experiment_files)
plot_objects_array = generate_plot_objects_array_from_contributions(contributions_nuclide_reaction, '%Δk/k', **plot_objects_kwargs) \
if make_plot else None
case 'variance_contributions_nuclide':
contributions_nuclide, _ = get_uncertainty_contributions(application_files, experiment_files, variance=True)
plot_objects_array = generate_plot_objects_array_from_contributions(contributions_nuclide, '%(Δk/k)^2', **plot_objects_kwargs) \
if make_plot else None
case 'variance_contributions_nuclide_reaction':
_, contributions_nuclide_reaction = get_uncertainty_contributions(application_files, experiment_files, variance=True)
plot_objects_array = generate_plot_objects_array_from_contributions(contributions_nuclide_reaction, '%(Δk/k)^2', **plot_objects_kwargs) \
if make_plot else None
case 'E_contributions_nuclide':
contributions_nuclide, _ = calculate_E_contributions(application_files, experiment_files)
plot_objects_array = generate_plot_objects_array_from_contributions(contributions_nuclide,
integral_index_name, **plot_objects_kwargs) \
if make_plot else None
case 'E_contributions_nuclide_reaction':
_, contributions_nuclide_reaction = calculate_E_contributions(application_files, experiment_files)
plot_objects_array = generate_plot_objects_array_from_contributions(contributions_nuclide_reaction,
integral_index_name, **plot_objects_kwargs) \
if make_plot else None
case 'c_k_contributions':
raise NotImplementedError("The method 'c_k_contributions' is not yet implemented.")
# ==============================
# Update plots with annotations
# ==============================
if make_plot:
percent_differences = np.empty_like(integral_index_matrix)
calculated_values = np.empty_like(integral_index_matrix)
for i, row in enumerate(plot_objects_array):
for j, plot_object in enumerate(row):
updated_plot_object, calculated_value, percent_difference = \
_update_annotation(plot_object, integral_index_matrix[i, j], integral_index_name)
calculated_values[i, j] = calculated_value
percent_differences[i, j] = percent_difference
plot_objects_array[i, j] = updated_plot_object
elif method != 'perturbation':
percent_differences = np.empty_like(integral_index_matrix)
calculated_values = np.empty_like(integral_index_matrix)
# Just calculate pearson correlation coefficient from the points themselves
for i in range(num_experiments):
for j in range(num_applications):
x_points = unumpy.nominal_values(points_array[i, j, :, 0])
y_points = unumpy.nominal_values(points_array[i, j, :, 1])
calculated_values[i, j] = np.corrcoef(x_points, y_points)[0, 1]
percent_differences[i, j] = (integral_index_matrix[i, j] - calculated_values[i, j])/integral_index_matrix[i, j] * 100
# ===================================
# Create dataframes with comparisons
# ===================================
num_applications, num_experiments = np.shape(integral_index_matrix)
columns = pd.MultiIndex.from_product([
np.arange(1, num_applications + 1), # Main column indices
['Calculated', 'TSUNAMI-IP', 'Percent Difference'] # Subcolumns
], names=['Application Number', 'Attribute'])
comparisons = pd.DataFrame(index=pd.Index(np.arange(1, num_experiments + 1), name='Experiment Number'), columns=columns)
# Populate DataFrame
for application_index in range(num_applications):
for experiment_index in range(num_experiments):
# Now write the data to the DataFrame
comparisons.loc[experiment_index + 1, (application_index + 1, 'Calculated')] = \
f"{calculated_values[experiment_index, application_index]:1.3E}"
comparisons.loc[experiment_index + 1, (application_index + 1, 'TSUNAMI-IP')] = \
f"{integral_index_matrix[experiment_index, application_index].n:1.3E}+/-{integral_index_matrix[experiment_index, application_index].s:1.2E}"
comparisons.loc[experiment_index + 1, (application_index + 1, 'Percent Difference')] = \
f"{percent_differences[experiment_index, application_index].n:2.2f}+/-{percent_differences[experiment_index, application_index].s:1.2E}"
# ===================
# Create matrix plot
# ===================
if make_plot:
fig = matrix_plot(plot_objects_array, 'interactive', **matrix_plot_kwargs)
return comparisons, fig
else:
return comparisons