Source code for dynasor.post_processing.average_runs

from copy import deepcopy
from typing import List
import numpy as np
from dynasor.sample import Sample


[docs] def get_sample_averaged_over_independent_runs( samples: List[Sample], live_dangerously: bool = False, ) -> Sample: """ Compute an averaged sample from multiple samples obtained from identical independent runs. Note, all the metadata and dimensions in all samples must be the same, else ValueError is raised (unless ``live_dangerously`` is set to True). Parameters ---------- samples List of all sample objects to be averaged over. live_dangerously Setting True allows for averaging over samples which meta-data information is not identical. """ # get metadata and dimensions from first sample sample_ref = samples[0] data_dict = dict() simulation_data = deepcopy(sample_ref.simulation_data) # test that all samples have identical dimensions for m, sample in enumerate(samples): if sorted(sample.dimensions) != sorted(sample_ref.dimensions): raise ValueError(f'Sample dimensions do not match for sample #{m}.') for dim in sample_ref.dimensions: if dim not in sample.dimensions: raise ValueError(f'Sample dimensions do not match for sample #{m}.') if not np.allclose(sample[dim], sample_ref[dim]): raise ValueError(f'Sample dimensions do not match for sample #{m}.') for dim in sample_ref.dimensions: data_dict[dim] = sample_ref[dim] # test that all samples have identical metadata if not live_dangerously: for m, sample in enumerate(samples): for key, val in simulation_data.items(): if key not in sample.simulation_data: raise ValueError( f'Sample #{m} is missing "{key}" in the simulation_data field.') match = True if isinstance(val, dict): for k, v in val.items(): match &= sample.simulation_data[key].get(k, None) == val[k] elif isinstance(val, np.ndarray): match &= np.allclose(sample.simulation_data[key], val) elif isinstance(val, float): match &= np.isclose(sample.simulation_data[key], val) else: match &= sample.simulation_data[key] == val if not match: raise ValueError(f'Field "{key}" of sample #{m} does not match.') # average all correlation functions for key in sample.available_correlation_functions: data = [] for sample in samples: data.append(sample[key]) data_average = np.nanmean(data, axis=0) data_dict[key] = data_average # keep history of original samples previous_history = [] for m, s in enumerate(samples): for h in s.history: rec = h.copy() rec['func'] += f'_sample{m}' previous_history.append(rec) # compose new sample object new_sample = sample.__class__( data_dict, simulation_data=simulation_data, history=previous_history) new_sample._append_history( 'get_sample_averaged_over_independent_runs', dict( live_dangerously=live_dangerously, n_samples=len(samples), )) return new_sample