Coverage for dynasor / post_processing / average_runs.py: 85%

49 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-16 12:31 +0000

1from copy import deepcopy 

2from typing import Optional 

3import numpy as np 

4from dynasor.sample import Sample 

5 

6 

7def get_sample_averaged_over_independent_runs( 

8 samples: list[Sample], 

9 live_dangerously: Optional[bool] = False, 

10) -> Sample: 

11 """ 

12 Compute an averaged sample from multiple samples obtained from identical independent runs. 

13 

14 Note all the metadata and dimensions in all samples must be the same. 

15 Otherwise a `ValueError` is raised (unless `live_dangerously` is set to `True`). 

16 

17 Parameters 

18 ---------- 

19 samples 

20 List of all sample objects to be averaged over. 

21 live_dangerously 

22 Setting to `True` allows for averaging over samples 

23 which metadata information is not identical. 

24 """ 

25 

26 # get metadata and dimensions from first sample 

27 sample_ref = samples[0] 

28 data_dict = dict() 

29 simulation_data = deepcopy(sample_ref.simulation_data) 

30 

31 # test that all samples have identical dimensions 

32 for m, sample in enumerate(samples): 

33 if sorted(sample.dimensions) != sorted(sample_ref.dimensions): 33 ↛ 34line 33 didn't jump to line 34 because the condition on line 33 was never true

34 raise ValueError(f'Sample dimensions do not match for sample #{m}.') 

35 for dim in sample_ref.dimensions: 

36 if dim not in sample.dimensions: 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true

37 raise ValueError(f'Sample dimensions do not match for sample #{m}.') 

38 if not np.allclose(sample[dim], sample_ref[dim]): 

39 raise ValueError(f'Sample dimensions do not match for sample #{m}.') 

40 

41 for dim in sample_ref.dimensions: 

42 data_dict[dim] = sample_ref[dim] 

43 

44 # test that all samples have identical metadata 

45 if not live_dangerously: 45 ↛ 65line 45 didn't jump to line 65 because the condition on line 45 was always true

46 for m, sample in enumerate(samples): 

47 for key, val in simulation_data.items(): 

48 if key not in sample.simulation_data: 48 ↛ 49line 48 didn't jump to line 49 because the condition on line 48 was never true

49 raise ValueError( 

50 f'Sample #{m} is missing "{key}" in the simulation_data field.') 

51 match = True 

52 if isinstance(val, dict): 

53 for k, v in val.items(): 

54 match &= sample.simulation_data[key].get(k, None) == val[k] 

55 elif isinstance(val, np.ndarray): 

56 match &= np.allclose(sample.simulation_data[key], val) 

57 elif isinstance(val, float): 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true

58 match &= np.isclose(sample.simulation_data[key], val) 

59 else: 

60 match &= sample.simulation_data[key] == val 

61 if not match: 

62 raise ValueError(f'Field "{key}" of sample #{m} does not match.') 

63 

64 # average all correlation functions 

65 for key in sample.available_correlation_functions: 

66 data = [] 

67 for sample in samples: 

68 data.append(sample[key]) 

69 data_average = np.nanmean(data, axis=0) 

70 data_dict[key] = data_average 

71 

72 # keep history of original samples 

73 previous_history = [] 

74 for m, s in enumerate(samples): 

75 for h in s.history: 75 ↛ 76line 75 didn't jump to line 76 because the loop on line 75 never started

76 rec = h.copy() 

77 rec['func'] += f'_sample{m}' 

78 previous_history.append(rec) 

79 

80 # compose new sample object 

81 new_sample = sample.__class__( 

82 data_dict, 

83 simulation_data=simulation_data, 

84 history=previous_history) 

85 new_sample._append_history( 

86 'get_sample_averaged_over_independent_runs', 

87 dict( 

88 live_dangerously=live_dangerously, 

89 n_samples=len(samples), 

90 )) 

91 

92 return new_sample