Coverage for dynasor / post_processing / average_runs.py: 85%
49 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-16 12:31 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-16 12:31 +0000
1from copy import deepcopy
2from typing import Optional
3import numpy as np
4from dynasor.sample import Sample
7def get_sample_averaged_over_independent_runs(
8 samples: list[Sample],
9 live_dangerously: Optional[bool] = False,
10) -> Sample:
11 """
12 Compute an averaged sample from multiple samples obtained from identical independent runs.
14 Note all the metadata and dimensions in all samples must be the same.
15 Otherwise a `ValueError` is raised (unless `live_dangerously` is set to `True`).
17 Parameters
18 ----------
19 samples
20 List of all sample objects to be averaged over.
21 live_dangerously
22 Setting to `True` allows for averaging over samples
23 which metadata information is not identical.
24 """
26 # get metadata and dimensions from first sample
27 sample_ref = samples[0]
28 data_dict = dict()
29 simulation_data = deepcopy(sample_ref.simulation_data)
31 # test that all samples have identical dimensions
32 for m, sample in enumerate(samples):
33 if sorted(sample.dimensions) != sorted(sample_ref.dimensions): 33 ↛ 34line 33 didn't jump to line 34 because the condition on line 33 was never true
34 raise ValueError(f'Sample dimensions do not match for sample #{m}.')
35 for dim in sample_ref.dimensions:
36 if dim not in sample.dimensions: 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true
37 raise ValueError(f'Sample dimensions do not match for sample #{m}.')
38 if not np.allclose(sample[dim], sample_ref[dim]):
39 raise ValueError(f'Sample dimensions do not match for sample #{m}.')
41 for dim in sample_ref.dimensions:
42 data_dict[dim] = sample_ref[dim]
44 # test that all samples have identical metadata
45 if not live_dangerously: 45 ↛ 65line 45 didn't jump to line 65 because the condition on line 45 was always true
46 for m, sample in enumerate(samples):
47 for key, val in simulation_data.items():
48 if key not in sample.simulation_data: 48 ↛ 49line 48 didn't jump to line 49 because the condition on line 48 was never true
49 raise ValueError(
50 f'Sample #{m} is missing "{key}" in the simulation_data field.')
51 match = True
52 if isinstance(val, dict):
53 for k, v in val.items():
54 match &= sample.simulation_data[key].get(k, None) == val[k]
55 elif isinstance(val, np.ndarray):
56 match &= np.allclose(sample.simulation_data[key], val)
57 elif isinstance(val, float): 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true
58 match &= np.isclose(sample.simulation_data[key], val)
59 else:
60 match &= sample.simulation_data[key] == val
61 if not match:
62 raise ValueError(f'Field "{key}" of sample #{m} does not match.')
64 # average all correlation functions
65 for key in sample.available_correlation_functions:
66 data = []
67 for sample in samples:
68 data.append(sample[key])
69 data_average = np.nanmean(data, axis=0)
70 data_dict[key] = data_average
72 # keep history of original samples
73 previous_history = []
74 for m, s in enumerate(samples):
75 for h in s.history: 75 ↛ 76line 75 didn't jump to line 76 because the loop on line 75 never started
76 rec = h.copy()
77 rec['func'] += f'_sample{m}'
78 previous_history.append(rec)
80 # compose new sample object
81 new_sample = sample.__class__(
82 data_dict,
83 simulation_data=simulation_data,
84 history=previous_history)
85 new_sample._append_history(
86 'get_sample_averaged_over_independent_runs',
87 dict(
88 live_dangerously=live_dangerously,
89 n_samples=len(samples),
90 ))
92 return new_sample