From 27f5b7dcb05aefdab9b762175d538931face0aba Mon Sep 17 00:00:00 2001
From: Radu Nicolae <rnicolae04@gmail.com>
Date: Fri, 25 Oct 2024 08:21:49 +0200
Subject: M3SA - Multi-Meta-Model Simulation Analyzer (#251)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* (feat) demo files are now ignored

* integrating m3sa changes with opendc

* gitignore ignores demo

* m3sa linked, tested, works 🎉🎆

* linting & checks fully pass

* m3sa documentation (re...)added

* package.json added, a potentail solution for Build Docker Images workflow

* (fix) opendc-m3sa renamed to opendc-experiments-m3sa

* (feat) Model is now a dataclass

* (fix) package and package-lock reverted as before the PR, now they mirror the opendc master branch

* (fix) Experiments renamed to experiment

* branch updated with changes from master branch

* trying to fix the build docker image failed workflow

* trying to fix the build docker image failed workflow

* All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#37)

Co-authored-by: Dante Niewenhuis <d.niewenhuis@hotmail.com>

* All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#38)

Co-authored-by: Dante Niewenhuis <d.niewenhuis@hotmail.com>

* All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#39)

Co-authored-by: Dante Niewenhuis <d.niewenhuis@hotmail.com>

* [TEMP](feat) m3saCli decoupled from experimentCli

* spotless and minor refactoring

* (feat)[TEMP] decoupling m3sa from experiment

* spotless applied

* documentation resolved

* requirements.txt added

* path to M3SA is now provided as a parameter to M3SACLI

* spotless applied

* (fix) python environment variables solved, output analysis folder solved

* documentation changed and matching the master branch doc

* package-lock reverted

* package-lock reverted

---------

Co-authored-by: Dante Niewenhuis <d.niewenhuis@hotmail.com>
---
 .../src/main/python/models/MetaModel.py            | 214 +++++++++++++++++++++
 1 file changed, 214 insertions(+)
 create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py

(limited to 'opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py')

diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py
new file mode 100644
index 00000000..49930d25
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py
@@ -0,0 +1,214 @@
+import numpy as np
+import os
+import pandas as pd
+
+from .Model import Model
+
+
+class MetaModel:
+    """
+    A class that aggregates results from multiple simulation models based on user-defined functions, producing
+    consolidated outputs for analysis.
+
+    Attributes:
+        multi_model (MultiModel): The container of models whose results are aggregated.
+        meta_model (Model): Model instance that stores aggregated results.
+        meta_function (function): Function used to calculate aggregated data.
+        min_raw_model_len (int): Minimum length of raw data arrays across all models.
+        min_processed_model_len (int): Minimum length of processed data arrays across all models.
+        number_of_models (int): Number of models being aggregated.
+        function_map (dict): Mapping of aggregation function names to function implementations.
+    """
+
+    META_MODEL_ID = -101
+
+    def __init__(self, multimodel, meta_function=None):
+        """
+        Initializes the Metamodel with a MultiModel instance and prepares aggregation functions based on configuration.
+
+        :param multimodel: MultiModel instance containing the models to aggregate.
+        :raise ValueError: If metamodel functionality is not enabled in the configuration.
+        """
+        if not multimodel.user_input.get('metamodel', False):
+            raise ValueError("Metamodel is not enabled in the config file")
+
+        self.function_map = {
+            'mean': self.mean,
+            'median': self.median,
+            'meta_equation1': self.meta_equation1,
+        }
+
+        self.multi_model = multimodel
+        self.meta_model = Model(
+            raw_sim_data=[],
+            id=self.META_MODEL_ID,
+            path=self.multi_model.output_folder_path
+        )
+
+        if meta_function is not None:
+            self.meta_function = meta_function
+        else:
+            self.meta_function = self.function_map.get(multimodel.user_input['meta_function'], self.mean)
+
+        self.min_raw_model_len = min([len(model.raw_sim_data) for model in self.multi_model.models])
+        self.min_processed_model_len = min([len(model.processed_sim_data) for model in self.multi_model.models])
+        self.number_of_models = len(self.multi_model.models)
+        self.compute()
+        self.output()
+
+    def output(self):
+        """
+        Generates outputs by plotting the aggregated results and exporting the metamodel data to a file.
+        :return: None
+        :side effect: Outputs data to files and generates plots.
+        """
+        self.plot()
+        self.output_metamodel()
+
+    def compute(self):
+        """
+        Computes aggregated data based on the specified plot type from the configuration.
+        :raise ValueError: If an unsupported plot type is specified in the configuration.
+        """
+        if self.multi_model.plot_type == 'time_series':
+            self.compute_time_series()
+        elif self.multi_model.plot_type == 'cumulative':
+            self.compute_cumulative()
+        elif self.multi_model.plot_type == 'cumulative_time_series':
+            self.compute_cumulative_time_series()
+        else:
+            raise ValueError("Invalid plot type in config file")
+
+    def plot(self):
+        """
+        Plots the aggregated data according to the specified plot type from the configuration.
+        :raise ValueError: If an unsupported plot type is specified.
+        """
+        if self.multi_model.plot_type == 'time_series':
+            self.plot_time_series()
+        elif self.multi_model.plot_type == 'cumulative':
+            self.plot_cumulative()
+        elif self.multi_model.plot_type == 'cumulative_time_series':
+            self.plot_cumulative_time_series()
+
+        else:
+            raise ValueError("Invalid plot type in config file")
+
+    def compute_time_series(self):
+        """
+        Aggregates time series data across models using the specified aggregation function.
+        :return: None
+        :side effect: Updates the meta_model's processed data with aggregated results.
+        """
+        for i in range(0, self.min_processed_model_len):
+            data_entries = []
+            for j in range(self.number_of_models):
+                data_entries.append(self.multi_model.models[j].processed_sim_data[i])
+            self.meta_model.processed_sim_data.append(self.meta_function(data_entries))
+        self.meta_model.raw_sim_data = self.meta_model.processed_sim_data
+
+    def plot_time_series(self):
+        """
+        Generates a time series plot of the aggregated data.
+        :return: None
+        :side effect: Displays a time series plot using the multi_model's plotting capabilities.
+        """
+        self.multi_model.models.append(self.meta_model)
+        self.multi_model.generate_plot()
+
+    def compute_cumulative(self):
+        """
+        Aggregates cumulative data entries across all models.
+        :return: None
+        :side effect: Updates the meta_model's cumulative data with aggregated results.
+        """
+
+        for i in range(0, self.min_raw_model_len):
+            data_entries = []
+            for j in range(self.number_of_models):
+                sim_data = self.multi_model.models[j].raw_sim_data
+                ith_element = sim_data[i]
+                data_entries.append(ith_element)
+            self.meta_model.cumulated += self.mean(data_entries)
+        self.meta_model.cumulated = round(self.meta_model.cumulated, 2)
+
+    def plot_cumulative(self):
+        """
+        Generates a cumulative plot of the aggregated data.
+        :return: None
+        :side effect: Displays a cumulative plot using the multi_model's plotting capabilities.
+        """
+        self.multi_model.models.append(self.meta_model)
+        self.multi_model.generate_plot()
+
+    def compute_cumulative_time_series(self):
+        """
+        Aggregates cumulative time series data entries across models using the specified aggregation function.
+        :return: None
+        :side effect: Updates the meta_model's processed data with cumulative aggregated results.
+        """
+        for i in range(0, self.min_processed_model_len):
+            data_entries = []
+            for j in range(self.number_of_models):
+                data_entries.append(self.multi_model.models[j].processed_sim_data[i])
+            self.meta_model.processed_sim_data.append(self.meta_function(data_entries))
+
+    def plot_cumulative_time_series(self):
+        """
+        Generates a cumulative time series plot of the aggregated data.
+        :return: None
+        :side effect: Displays a cumulative time series plot using the multi_model's plotting capabilities.
+        """
+        self.multi_model.models.append(self.meta_model)
+        self.multi_model.generate_plot()
+
+    def output_metamodel(self):
+        """
+        Exports the processed sim data of the metamodel to a parquet file for further analysis or record keeping.
+        :return: None
+        :side effect: Writes data to a parquet file at the specified directory path.
+        """
+        directory_path = os.path.join(self.multi_model.output_folder_path, "raw-output/metamodel/seed=0")
+        os.makedirs(directory_path, exist_ok=True)
+        current_path = os.path.join(directory_path, f"{self.multi_model.metric}.parquet")
+        df = pd.DataFrame({'processed_sim_data': self.meta_model.processed_sim_data})
+        df.to_parquet(current_path, index=False)
+
+    def mean(self, chunks):
+        """
+        Calculates the mean of a list of numerical data.
+
+        :param chunks (list): The data over which to calculate the mean.
+        :return: float: The mean of the provided data.
+        """
+        return np.mean(chunks)
+
+    def median(self, chunks):
+        """
+        Calculates the median of a list of numerical data.
+
+        :param chunks (list): The data over which to calculate the median.
+        :return: float: The median of the provided data.
+        """
+        return np.median(chunks)
+
+    def meta_equation1(self, chunks):
+        """
+        Calculates a weighted mean where the weights are inversely proportional to the absolute difference from the median value.
+        :param chunks (list): Data chunks from which to calculate the weighted mean.
+        :return: float: The calculated weighted mean.
+        """
+
+        """Attempt 1"""
+        # median_val = np.median(chunks)
+        # proximity_weights = 1 / (1 + np.abs(chunks - median_val))  # Avoid division by zero
+        # weighted_mean = np.sum(proximity_weights * chunks) / np.sum(proximity_weights)
+        # return weighted_mean
+
+        """Attempt 2 Inter-Quartile Mean (same accuracy as mean)"""
+        # sorted_preds = np.sort(chunks, axis=0)
+        # Q1 = int(np.floor(0.25 * len(sorted_preds)))
+        # Q3 = int(np.floor(0.75 * len(sorted_preds)))
+        #
+        # iqm = np.mean(sorted_preds[Q1:Q3], axis=0)
+        # return iqm
-- 
cgit v1.2.3