Coverage for dynasor/post_processing/average_runs.py: 100%
47 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-18 09:03 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-18 09:03 +0000
1from copy import deepcopy
2from typing import Optional
3import numpy as np
4from dynasor.sample import Sample
7def get_sample_averaged_over_independent_runs(
8 samples: list[Sample],
9 live_dangerously: Optional[bool] = False,
10) -> Sample:
11 """
12 Compute an averaged sample from multiple samples obtained from identical independent runs.
14 Note all the metadata and dimensions in all samples must be the same.
15 Otherwise a `ValueError` is raised (unless `live_dangerously` is set to `True`).
17 Parameters
18 ----------
19 samples
20 List of all sample objects to be averaged over.
21 live_dangerously
22 Setting to `True` allows for averaging over samples
23 which metadata information is not identical.
24 """
26 # get metadata and dimensions from first sample
27 sample_ref = samples[0]
28 data_dict = dict()
29 simulation_data = deepcopy(sample_ref.simulation_data)
31 # test that all samples have identical dimensions
32 for m, sample in enumerate(samples):
33 if sorted(sample.dimensions) != sorted(sample_ref.dimensions):
34 raise ValueError(f'Sample dimensions do not match for sample #{m}.')
35 for dim in sample_ref.dimensions:
36 if not np.allclose(sample[dim], sample_ref[dim]):
37 raise ValueError(f'Sample dimensions do not match for sample #{m}.')
39 for dim in sample_ref.dimensions:
40 data_dict[dim] = sample_ref[dim]
42 # test that all samples have identical metadata
43 if not live_dangerously:
44 for m, sample in enumerate(samples):
45 for key, val in simulation_data.items():
46 if key not in sample.simulation_data:
47 raise ValueError(
48 f'Sample #{m} is missing "{key}" in the simulation_data field.')
49 match = True
50 if isinstance(val, dict):
51 for k, v in val.items():
52 match &= sample.simulation_data[key].get(k, None) == val[k]
53 elif isinstance(val, np.ndarray):
54 match &= np.allclose(sample.simulation_data[key], val)
55 elif isinstance(val, float):
56 match &= np.isclose(sample.simulation_data[key], val)
57 else:
58 match &= sample.simulation_data[key] == val
59 if not match:
60 raise ValueError(f'Field "{key}" of sample #{m} does not match.')
62 # average all correlation functions
63 for key in sample.available_correlation_functions:
64 data = []
65 for sample in samples:
66 data.append(sample[key])
67 data_average = np.nanmean(data, axis=0)
68 data_dict[key] = data_average
70 # keep history of original samples
71 previous_history = []
72 for m, s in enumerate(samples):
73 for h in s.history:
74 rec = h.copy()
75 rec['func'] += f'_sample{m}'
76 previous_history.append(rec)
78 # compose new sample object
79 new_sample = sample.__class__(
80 data_dict,
81 simulation_data=simulation_data,
82 history=previous_history)
83 new_sample._append_history(
84 'get_sample_averaged_over_independent_runs',
85 dict(
86 live_dangerously=live_dangerously,
87 n_samples=len(samples),
88 ))
90 return new_sample