summaryrefslogtreecommitdiff
path: root/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py
diff options
context:
space:
mode:
authorRadu Nicolae <rnicolae04@gmail.com>2024-10-25 08:21:49 +0200
committerGitHub <noreply@github.com>2024-10-25 08:21:49 +0200
commit27f5b7dcb05aefdab9b762175d538931face0aba (patch)
treeaed9b6cd324f73d4db9af5fc70000a62b4422fc1 /opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py
parent4a010c6b9e033314a2624a0756dcdc7f17010d9d (diff)
M3SA - Multi-Meta-Model Simulation Analyzer (#251)
* (feat) demo files are now ignored * integrating m3sa changes with opendc * gitignore ignores demo * m3sa linked, tested, works 🎉🎆 * linting & checks fully pass * m3sa documentation (re...)added * package.json added, a potentail solution for Build Docker Images workflow * (fix) opendc-m3sa renamed to opendc-experiments-m3sa * (feat) Model is now a dataclass * (fix) package and package-lock reverted as before the PR, now they mirror the opendc master branch * (fix) Experiments renamed to experiment * branch updated with changes from master branch * trying to fix the build docker image failed workflow * trying to fix the build docker image failed workflow * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#37) Co-authored-by: Dante Niewenhuis <d.niewenhuis@hotmail.com> * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#38) Co-authored-by: Dante Niewenhuis <d.niewenhuis@hotmail.com> * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#39) Co-authored-by: Dante Niewenhuis <d.niewenhuis@hotmail.com> * [TEMP](feat) m3saCli decoupled from experimentCli * spotless and minor refactoring * (feat)[TEMP] decoupling m3sa from experiment * spotless applied * documentation resolved * requirements.txt added * path to M3SA is now provided as a parameter to M3SACLI * spotless applied * (fix) python environment variables solved, output analysis folder solved * documentation changed and matching the master branch doc * package-lock reverted * package-lock reverted --------- Co-authored-by: Dante Niewenhuis <d.niewenhuis@hotmail.com>
Diffstat (limited to 'opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py')
-rw-r--r--opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py214
1 files changed, 214 insertions, 0 deletions
diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py
new file mode 100644
index 00000000..49930d25
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py
@@ -0,0 +1,214 @@
+import numpy as np
+import os
+import pandas as pd
+
+from .Model import Model
+
+
+class MetaModel:
+ """
+ A class that aggregates results from multiple simulation models based on user-defined functions, producing
+ consolidated outputs for analysis.
+
+ Attributes:
+ multi_model (MultiModel): The container of models whose results are aggregated.
+ meta_model (Model): Model instance that stores aggregated results.
+ meta_function (function): Function used to calculate aggregated data.
+ min_raw_model_len (int): Minimum length of raw data arrays across all models.
+ min_processed_model_len (int): Minimum length of processed data arrays across all models.
+ number_of_models (int): Number of models being aggregated.
+ function_map (dict): Mapping of aggregation function names to function implementations.
+ """
+
+ META_MODEL_ID = -101
+
+ def __init__(self, multimodel, meta_function=None):
+ """
+ Initializes the Metamodel with a MultiModel instance and prepares aggregation functions based on configuration.
+
+ :param multimodel: MultiModel instance containing the models to aggregate.
+ :raise ValueError: If metamodel functionality is not enabled in the configuration.
+ """
+ if not multimodel.user_input.get('metamodel', False):
+ raise ValueError("Metamodel is not enabled in the config file")
+
+ self.function_map = {
+ 'mean': self.mean,
+ 'median': self.median,
+ 'meta_equation1': self.meta_equation1,
+ }
+
+ self.multi_model = multimodel
+ self.meta_model = Model(
+ raw_sim_data=[],
+ id=self.META_MODEL_ID,
+ path=self.multi_model.output_folder_path
+ )
+
+ if meta_function is not None:
+ self.meta_function = meta_function
+ else:
+ self.meta_function = self.function_map.get(multimodel.user_input['meta_function'], self.mean)
+
+ self.min_raw_model_len = min([len(model.raw_sim_data) for model in self.multi_model.models])
+ self.min_processed_model_len = min([len(model.processed_sim_data) for model in self.multi_model.models])
+ self.number_of_models = len(self.multi_model.models)
+ self.compute()
+ self.output()
+
+ def output(self):
+ """
+ Generates outputs by plotting the aggregated results and exporting the metamodel data to a file.
+ :return: None
+ :side effect: Outputs data to files and generates plots.
+ """
+ self.plot()
+ self.output_metamodel()
+
+ def compute(self):
+ """
+ Computes aggregated data based on the specified plot type from the configuration.
+ :raise ValueError: If an unsupported plot type is specified in the configuration.
+ """
+ if self.multi_model.plot_type == 'time_series':
+ self.compute_time_series()
+ elif self.multi_model.plot_type == 'cumulative':
+ self.compute_cumulative()
+ elif self.multi_model.plot_type == 'cumulative_time_series':
+ self.compute_cumulative_time_series()
+ else:
+ raise ValueError("Invalid plot type in config file")
+
+ def plot(self):
+ """
+ Plots the aggregated data according to the specified plot type from the configuration.
+ :raise ValueError: If an unsupported plot type is specified.
+ """
+ if self.multi_model.plot_type == 'time_series':
+ self.plot_time_series()
+ elif self.multi_model.plot_type == 'cumulative':
+ self.plot_cumulative()
+ elif self.multi_model.plot_type == 'cumulative_time_series':
+ self.plot_cumulative_time_series()
+
+ else:
+ raise ValueError("Invalid plot type in config file")
+
+ def compute_time_series(self):
+ """
+ Aggregates time series data across models using the specified aggregation function.
+ :return: None
+ :side effect: Updates the meta_model's processed data with aggregated results.
+ """
+ for i in range(0, self.min_processed_model_len):
+ data_entries = []
+ for j in range(self.number_of_models):
+ data_entries.append(self.multi_model.models[j].processed_sim_data[i])
+ self.meta_model.processed_sim_data.append(self.meta_function(data_entries))
+ self.meta_model.raw_sim_data = self.meta_model.processed_sim_data
+
+ def plot_time_series(self):
+ """
+ Generates a time series plot of the aggregated data.
+ :return: None
+ :side effect: Displays a time series plot using the multi_model's plotting capabilities.
+ """
+ self.multi_model.models.append(self.meta_model)
+ self.multi_model.generate_plot()
+
+ def compute_cumulative(self):
+ """
+ Aggregates cumulative data entries across all models.
+ :return: None
+ :side effect: Updates the meta_model's cumulative data with aggregated results.
+ """
+
+ for i in range(0, self.min_raw_model_len):
+ data_entries = []
+ for j in range(self.number_of_models):
+ sim_data = self.multi_model.models[j].raw_sim_data
+ ith_element = sim_data[i]
+ data_entries.append(ith_element)
+ self.meta_model.cumulated += self.mean(data_entries)
+ self.meta_model.cumulated = round(self.meta_model.cumulated, 2)
+
+ def plot_cumulative(self):
+ """
+ Generates a cumulative plot of the aggregated data.
+ :return: None
+ :side effect: Displays a cumulative plot using the multi_model's plotting capabilities.
+ """
+ self.multi_model.models.append(self.meta_model)
+ self.multi_model.generate_plot()
+
+ def compute_cumulative_time_series(self):
+ """
+ Aggregates cumulative time series data entries across models using the specified aggregation function.
+ :return: None
+ :side effect: Updates the meta_model's processed data with cumulative aggregated results.
+ """
+ for i in range(0, self.min_processed_model_len):
+ data_entries = []
+ for j in range(self.number_of_models):
+ data_entries.append(self.multi_model.models[j].processed_sim_data[i])
+ self.meta_model.processed_sim_data.append(self.meta_function(data_entries))
+
+ def plot_cumulative_time_series(self):
+ """
+ Generates a cumulative time series plot of the aggregated data.
+ :return: None
+ :side effect: Displays a cumulative time series plot using the multi_model's plotting capabilities.
+ """
+ self.multi_model.models.append(self.meta_model)
+ self.multi_model.generate_plot()
+
+ def output_metamodel(self):
+ """
+ Exports the processed sim data of the metamodel to a parquet file for further analysis or record keeping.
+ :return: None
+ :side effect: Writes data to a parquet file at the specified directory path.
+ """
+ directory_path = os.path.join(self.multi_model.output_folder_path, "raw-output/metamodel/seed=0")
+ os.makedirs(directory_path, exist_ok=True)
+ current_path = os.path.join(directory_path, f"{self.multi_model.metric}.parquet")
+ df = pd.DataFrame({'processed_sim_data': self.meta_model.processed_sim_data})
+ df.to_parquet(current_path, index=False)
+
+ def mean(self, chunks):
+ """
+ Calculates the mean of a list of numerical data.
+
+ :param chunks (list): The data over which to calculate the mean.
+ :return: float: The mean of the provided data.
+ """
+ return np.mean(chunks)
+
+ def median(self, chunks):
+ """
+ Calculates the median of a list of numerical data.
+
+ :param chunks (list): The data over which to calculate the median.
+ :return: float: The median of the provided data.
+ """
+ return np.median(chunks)
+
+ def meta_equation1(self, chunks):
+ """
+ Calculates a weighted mean where the weights are inversely proportional to the absolute difference from the median value.
+ :param chunks (list): Data chunks from which to calculate the weighted mean.
+ :return: float: The calculated weighted mean.
+ """
+
+ """Attempt 1"""
+ # median_val = np.median(chunks)
+ # proximity_weights = 1 / (1 + np.abs(chunks - median_val)) # Avoid division by zero
+ # weighted_mean = np.sum(proximity_weights * chunks) / np.sum(proximity_weights)
+ # return weighted_mean
+
+ """Attempt 2 Inter-Quartile Mean (same accuracy as mean)"""
+ # sorted_preds = np.sort(chunks, axis=0)
+ # Q1 = int(np.floor(0.25 * len(sorted_preds)))
+ # Q3 = int(np.floor(0.75 * len(sorted_preds)))
+ #
+ # iqm = np.mean(sorted_preds[Q1:Q3], axis=0)
+ # return iqm