From 27f5b7dcb05aefdab9b762175d538931face0aba Mon Sep 17 00:00:00 2001 From: Radu Nicolae Date: Fri, 25 Oct 2024 08:21:49 +0200 Subject: M3SA - Multi-Meta-Model Simulation Analyzer (#251) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * (feat) demo files are now ignored * integrating m3sa changes with opendc * gitignore ignores demo * m3sa linked, tested, works 🎉🎆 * linting & checks fully pass * m3sa documentation (re...)added * package.json added, a potentail solution for Build Docker Images workflow * (fix) opendc-m3sa renamed to opendc-experiments-m3sa * (feat) Model is now a dataclass * (fix) package and package-lock reverted as before the PR, now they mirror the opendc master branch * (fix) Experiments renamed to experiment * branch updated with changes from master branch * trying to fix the build docker image failed workflow * trying to fix the build docker image failed workflow * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#37) Co-authored-by: Dante Niewenhuis * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#38) Co-authored-by: Dante Niewenhuis * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#39) Co-authored-by: Dante Niewenhuis * [TEMP](feat) m3saCli decoupled from experimentCli * spotless and minor refactoring * (feat)[TEMP] decoupling m3sa from experiment * spotless applied * documentation resolved * requirements.txt added * path to M3SA is now provided as a parameter to M3SACLI * spotless applied * (fix) python environment variables solved, output analysis folder solved * documentation changed and matching the master branch doc * package-lock reverted * package-lock reverted --------- Co-authored-by: Dante Niewenhuis --- .../src/main/python/models/MetaModel.py | 214 +++++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py (limited to 'opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py') diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py new file mode 100644 index 00000000..49930d25 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py @@ -0,0 +1,214 @@ +import numpy as np +import os +import pandas as pd + +from .Model import Model + + +class MetaModel: + """ + A class that aggregates results from multiple simulation models based on user-defined functions, producing + consolidated outputs for analysis. + + Attributes: + multi_model (MultiModel): The container of models whose results are aggregated. + meta_model (Model): Model instance that stores aggregated results. + meta_function (function): Function used to calculate aggregated data. + min_raw_model_len (int): Minimum length of raw data arrays across all models. + min_processed_model_len (int): Minimum length of processed data arrays across all models. + number_of_models (int): Number of models being aggregated. + function_map (dict): Mapping of aggregation function names to function implementations. + """ + + META_MODEL_ID = -101 + + def __init__(self, multimodel, meta_function=None): + """ + Initializes the Metamodel with a MultiModel instance and prepares aggregation functions based on configuration. + + :param multimodel: MultiModel instance containing the models to aggregate. + :raise ValueError: If metamodel functionality is not enabled in the configuration. + """ + if not multimodel.user_input.get('metamodel', False): + raise ValueError("Metamodel is not enabled in the config file") + + self.function_map = { + 'mean': self.mean, + 'median': self.median, + 'meta_equation1': self.meta_equation1, + } + + self.multi_model = multimodel + self.meta_model = Model( + raw_sim_data=[], + id=self.META_MODEL_ID, + path=self.multi_model.output_folder_path + ) + + if meta_function is not None: + self.meta_function = meta_function + else: + self.meta_function = self.function_map.get(multimodel.user_input['meta_function'], self.mean) + + self.min_raw_model_len = min([len(model.raw_sim_data) for model in self.multi_model.models]) + self.min_processed_model_len = min([len(model.processed_sim_data) for model in self.multi_model.models]) + self.number_of_models = len(self.multi_model.models) + self.compute() + self.output() + + def output(self): + """ + Generates outputs by plotting the aggregated results and exporting the metamodel data to a file. + :return: None + :side effect: Outputs data to files and generates plots. + """ + self.plot() + self.output_metamodel() + + def compute(self): + """ + Computes aggregated data based on the specified plot type from the configuration. + :raise ValueError: If an unsupported plot type is specified in the configuration. + """ + if self.multi_model.plot_type == 'time_series': + self.compute_time_series() + elif self.multi_model.plot_type == 'cumulative': + self.compute_cumulative() + elif self.multi_model.plot_type == 'cumulative_time_series': + self.compute_cumulative_time_series() + else: + raise ValueError("Invalid plot type in config file") + + def plot(self): + """ + Plots the aggregated data according to the specified plot type from the configuration. + :raise ValueError: If an unsupported plot type is specified. + """ + if self.multi_model.plot_type == 'time_series': + self.plot_time_series() + elif self.multi_model.plot_type == 'cumulative': + self.plot_cumulative() + elif self.multi_model.plot_type == 'cumulative_time_series': + self.plot_cumulative_time_series() + + else: + raise ValueError("Invalid plot type in config file") + + def compute_time_series(self): + """ + Aggregates time series data across models using the specified aggregation function. + :return: None + :side effect: Updates the meta_model's processed data with aggregated results. + """ + for i in range(0, self.min_processed_model_len): + data_entries = [] + for j in range(self.number_of_models): + data_entries.append(self.multi_model.models[j].processed_sim_data[i]) + self.meta_model.processed_sim_data.append(self.meta_function(data_entries)) + self.meta_model.raw_sim_data = self.meta_model.processed_sim_data + + def plot_time_series(self): + """ + Generates a time series plot of the aggregated data. + :return: None + :side effect: Displays a time series plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def compute_cumulative(self): + """ + Aggregates cumulative data entries across all models. + :return: None + :side effect: Updates the meta_model's cumulative data with aggregated results. + """ + + for i in range(0, self.min_raw_model_len): + data_entries = [] + for j in range(self.number_of_models): + sim_data = self.multi_model.models[j].raw_sim_data + ith_element = sim_data[i] + data_entries.append(ith_element) + self.meta_model.cumulated += self.mean(data_entries) + self.meta_model.cumulated = round(self.meta_model.cumulated, 2) + + def plot_cumulative(self): + """ + Generates a cumulative plot of the aggregated data. + :return: None + :side effect: Displays a cumulative plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def compute_cumulative_time_series(self): + """ + Aggregates cumulative time series data entries across models using the specified aggregation function. + :return: None + :side effect: Updates the meta_model's processed data with cumulative aggregated results. + """ + for i in range(0, self.min_processed_model_len): + data_entries = [] + for j in range(self.number_of_models): + data_entries.append(self.multi_model.models[j].processed_sim_data[i]) + self.meta_model.processed_sim_data.append(self.meta_function(data_entries)) + + def plot_cumulative_time_series(self): + """ + Generates a cumulative time series plot of the aggregated data. + :return: None + :side effect: Displays a cumulative time series plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def output_metamodel(self): + """ + Exports the processed sim data of the metamodel to a parquet file for further analysis or record keeping. + :return: None + :side effect: Writes data to a parquet file at the specified directory path. + """ + directory_path = os.path.join(self.multi_model.output_folder_path, "raw-output/metamodel/seed=0") + os.makedirs(directory_path, exist_ok=True) + current_path = os.path.join(directory_path, f"{self.multi_model.metric}.parquet") + df = pd.DataFrame({'processed_sim_data': self.meta_model.processed_sim_data}) + df.to_parquet(current_path, index=False) + + def mean(self, chunks): + """ + Calculates the mean of a list of numerical data. + + :param chunks (list): The data over which to calculate the mean. + :return: float: The mean of the provided data. + """ + return np.mean(chunks) + + def median(self, chunks): + """ + Calculates the median of a list of numerical data. + + :param chunks (list): The data over which to calculate the median. + :return: float: The median of the provided data. + """ + return np.median(chunks) + + def meta_equation1(self, chunks): + """ + Calculates a weighted mean where the weights are inversely proportional to the absolute difference from the median value. + :param chunks (list): Data chunks from which to calculate the weighted mean. + :return: float: The calculated weighted mean. + """ + + """Attempt 1""" + # median_val = np.median(chunks) + # proximity_weights = 1 / (1 + np.abs(chunks - median_val)) # Avoid division by zero + # weighted_mean = np.sum(proximity_weights * chunks) / np.sum(proximity_weights) + # return weighted_mean + + """Attempt 2 Inter-Quartile Mean (same accuracy as mean)""" + # sorted_preds = np.sort(chunks, axis=0) + # Q1 = int(np.floor(0.25 * len(sorted_preds))) + # Q3 = int(np.floor(0.75 * len(sorted_preds))) + # + # iqm = np.mean(sorted_preds[Q1:Q3], axis=0) + # return iqm -- cgit v1.2.3