1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
|
import numpy as np
import os
import pandas as pd
from .Model import Model
class MetaModel:
"""
A class that aggregates results from multiple simulation models based on user-defined functions, producing
consolidated outputs for analysis.
Attributes:
multi_model (MultiModel): The container of models whose results are aggregated.
meta_model (Model): Model instance that stores aggregated results.
meta_function (function): Function used to calculate aggregated data.
min_raw_model_len (int): Minimum length of raw data arrays across all models.
min_processed_model_len (int): Minimum length of processed data arrays across all models.
number_of_models (int): Number of models being aggregated.
function_map (dict): Mapping of aggregation function names to function implementations.
"""
META_MODEL_ID = -101
def __init__(self, multimodel, meta_function=None):
"""
Initializes the Metamodel with a MultiModel instance and prepares aggregation functions based on configuration.
:param multimodel: MultiModel instance containing the models to aggregate.
:raise ValueError: If metamodel functionality is not enabled in the configuration.
"""
if not multimodel.user_input.get('metamodel', False):
raise ValueError("Metamodel is not enabled in the config file")
self.function_map = {
'mean': self.mean,
'median': self.median,
'meta_equation1': self.meta_equation1,
}
self.multi_model = multimodel
self.meta_model = Model(
raw_sim_data=[],
id=self.META_MODEL_ID,
path=self.multi_model.output_folder_path
)
if meta_function is not None:
self.meta_function = meta_function
else:
self.meta_function = self.function_map.get(multimodel.user_input['meta_function'], self.mean)
self.min_raw_model_len = min([len(model.raw_sim_data) for model in self.multi_model.models])
self.min_processed_model_len = min([len(model.processed_sim_data) for model in self.multi_model.models])
self.number_of_models = len(self.multi_model.models)
self.compute()
self.output()
def output(self):
"""
Generates outputs by plotting the aggregated results and exporting the metamodel data to a file.
:return: None
:side effect: Outputs data to files and generates plots.
"""
self.plot()
self.output_metamodel()
def compute(self):
"""
Computes aggregated data based on the specified plot type from the configuration.
:raise ValueError: If an unsupported plot type is specified in the configuration.
"""
if self.multi_model.plot_type == 'time_series':
self.compute_time_series()
elif self.multi_model.plot_type == 'cumulative':
self.compute_cumulative()
elif self.multi_model.plot_type == 'cumulative_time_series':
self.compute_cumulative_time_series()
else:
raise ValueError("Invalid plot type in config file")
def plot(self):
"""
Plots the aggregated data according to the specified plot type from the configuration.
:raise ValueError: If an unsupported plot type is specified.
"""
if self.multi_model.plot_type == 'time_series':
self.plot_time_series()
elif self.multi_model.plot_type == 'cumulative':
self.plot_cumulative()
elif self.multi_model.plot_type == 'cumulative_time_series':
self.plot_cumulative_time_series()
else:
raise ValueError("Invalid plot type in config file")
def compute_time_series(self):
"""
Aggregates time series data across models using the specified aggregation function.
:return: None
:side effect: Updates the meta_model's processed data with aggregated results.
"""
for i in range(0, self.min_processed_model_len):
data_entries = []
for j in range(self.number_of_models):
data_entries.append(self.multi_model.models[j].processed_sim_data[i])
self.meta_model.processed_sim_data.append(self.meta_function(data_entries))
self.meta_model.raw_sim_data = self.meta_model.processed_sim_data
def plot_time_series(self):
"""
Generates a time series plot of the aggregated data.
:return: None
:side effect: Displays a time series plot using the multi_model's plotting capabilities.
"""
self.multi_model.models.append(self.meta_model)
self.multi_model.generate_plot()
def compute_cumulative(self):
"""
Aggregates cumulative data entries across all models.
:return: None
:side effect: Updates the meta_model's cumulative data with aggregated results.
"""
for i in range(0, self.min_raw_model_len):
data_entries = []
for j in range(self.number_of_models):
sim_data = self.multi_model.models[j].raw_sim_data
ith_element = sim_data[i]
data_entries.append(ith_element)
self.meta_model.cumulated += self.mean(data_entries)
self.meta_model.cumulated = round(self.meta_model.cumulated, 2)
def plot_cumulative(self):
"""
Generates a cumulative plot of the aggregated data.
:return: None
:side effect: Displays a cumulative plot using the multi_model's plotting capabilities.
"""
self.multi_model.models.append(self.meta_model)
self.multi_model.generate_plot()
def compute_cumulative_time_series(self):
"""
Aggregates cumulative time series data entries across models using the specified aggregation function.
:return: None
:side effect: Updates the meta_model's processed data with cumulative aggregated results.
"""
for i in range(0, self.min_processed_model_len):
data_entries = []
for j in range(self.number_of_models):
data_entries.append(self.multi_model.models[j].processed_sim_data[i])
self.meta_model.processed_sim_data.append(self.meta_function(data_entries))
def plot_cumulative_time_series(self):
"""
Generates a cumulative time series plot of the aggregated data.
:return: None
:side effect: Displays a cumulative time series plot using the multi_model's plotting capabilities.
"""
self.multi_model.models.append(self.meta_model)
self.multi_model.generate_plot()
def output_metamodel(self):
"""
Exports the processed sim data of the metamodel to a parquet file for further analysis or record keeping.
:return: None
:side effect: Writes data to a parquet file at the specified directory path.
"""
directory_path = os.path.join(self.multi_model.output_folder_path, "raw-output/metamodel/seed=0")
os.makedirs(directory_path, exist_ok=True)
current_path = os.path.join(directory_path, f"{self.multi_model.metric}.parquet")
df = pd.DataFrame({'processed_sim_data': self.meta_model.processed_sim_data})
df.to_parquet(current_path, index=False)
def mean(self, chunks):
"""
Calculates the mean of a list of numerical data.
:param chunks (list): The data over which to calculate the mean.
:return: float: The mean of the provided data.
"""
return np.mean(chunks)
def median(self, chunks):
"""
Calculates the median of a list of numerical data.
:param chunks (list): The data over which to calculate the median.
:return: float: The median of the provided data.
"""
return np.median(chunks)
def meta_equation1(self, chunks):
"""
Calculates a weighted mean where the weights are inversely proportional to the absolute difference from the median value.
:param chunks (list): Data chunks from which to calculate the weighted mean.
:return: float: The calculated weighted mean.
"""
"""Attempt 1"""
# median_val = np.median(chunks)
# proximity_weights = 1 / (1 + np.abs(chunks - median_val)) # Avoid division by zero
# weighted_mean = np.sum(proximity_weights * chunks) / np.sum(proximity_weights)
# return weighted_mean
"""Attempt 2 Inter-Quartile Mean (same accuracy as mean)"""
# sorted_preds = np.sort(chunks, axis=0)
# Q1 = int(np.floor(0.25 * len(sorted_preds)))
# Q3 = int(np.floor(0.75 * len(sorted_preds)))
#
# iqm = np.mean(sorted_preds[Q1:Q3], axis=0)
# return iqm
|