From 27f5b7dcb05aefdab9b762175d538931face0aba Mon Sep 17 00:00:00 2001 From: Radu Nicolae Date: Fri, 25 Oct 2024 08:21:49 +0200 Subject: M3SA - Multi-Meta-Model Simulation Analyzer (#251) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * (feat) demo files are now ignored * integrating m3sa changes with opendc * gitignore ignores demo * m3sa linked, tested, works 🎉🎆 * linting & checks fully pass * m3sa documentation (re...)added * package.json added, a potentail solution for Build Docker Images workflow * (fix) opendc-m3sa renamed to opendc-experiments-m3sa * (feat) Model is now a dataclass * (fix) package and package-lock reverted as before the PR, now they mirror the opendc master branch * (fix) Experiments renamed to experiment * branch updated with changes from master branch * trying to fix the build docker image failed workflow * trying to fix the build docker image failed workflow * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#37) Co-authored-by: Dante Niewenhuis * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#38) Co-authored-by: Dante Niewenhuis * All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. (#255) (#39) Co-authored-by: Dante Niewenhuis * [TEMP](feat) m3saCli decoupled from experimentCli * spotless and minor refactoring * (feat)[TEMP] decoupling m3sa from experiment * spotless applied * documentation resolved * requirements.txt added * path to M3SA is now provided as a parameter to M3SACLI * spotless applied * (fix) python environment variables solved, output analysis folder solved * documentation changed and matching the master branch doc * package-lock reverted * package-lock reverted --------- Co-authored-by: Dante Niewenhuis --- .../src/main/kotlin/M3saAnalyzer.kt | 55 +++ .../org/opendc/experiments/m3sa/runner/M3SACli.kt | 85 ++++ .../opendc/experiments/m3sa/runner/M3SARunner.kt | 58 +++ .../experiments/m3sa/scenario/M3SAFactories.kt | 38 ++ .../src/main/python/accuracy_evaluator.py | 114 +++++ .../src/main/python/input_parser.py | 135 ++++++ .../src/main/python/main.py | 20 + .../src/main/python/models/MetaModel.py | 214 +++++++++ .../src/main/python/models/Model.py | 70 +++ .../src/main/python/models/MultiModel.py | 501 +++++++++++++++++++++ .../src/main/python/requirements.txt | 4 + .../src/main/python/simulator_specifics.py | 14 + .../src/main/python/utils.py | 25 + 13 files changed, 1333 insertions(+) create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/M3saAnalyzer.kt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SACli.kt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SARunner.kt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/scenario/M3SAFactories.kt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/accuracy_evaluator.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/input_parser.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/main.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/models/Model.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MultiModel.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/requirements.txt create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/simulator_specifics.py create mode 100644 opendc-experiments/opendc-experiments-m3sa/src/main/python/utils.py (limited to 'opendc-experiments/opendc-experiments-m3sa/src') diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/M3saAnalyzer.kt b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/M3saAnalyzer.kt new file mode 100644 index 00000000..6b8f8422 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/M3saAnalyzer.kt @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2024 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import kotlin.io.path.Path + +/** + * This constant variable should be changed depending on the root folder that is being run. + * PATH_TO_PYTHON_MAIN should point to the main python file, ran when the analysis starts. + */ + +public val ANALYSIS_SCRIPTS_DIRECTORY: String = "./opendc-experiments/opendc-experiments-m3sa/src/main/python" +public val ABSOLUTE_SCRIPT_PATH: String = + Path("$ANALYSIS_SCRIPTS_DIRECTORY/main.py").toAbsolutePath().normalize().toString() +public val SCRIPT_LANGUAGE: String = Path("$ANALYSIS_SCRIPTS_DIRECTORY/venv/bin/python3").toAbsolutePath().normalize().toString() + +public fun m3saAnalyze( + outputFolderPath: String, + m3saSetupPath: String, +) { + val process = + ProcessBuilder( + SCRIPT_LANGUAGE, + ABSOLUTE_SCRIPT_PATH, + outputFolderPath, + m3saSetupPath, + ).directory(Path(ANALYSIS_SCRIPTS_DIRECTORY).toFile()) + .start() + + val exitCode = process.waitFor() + if (exitCode == 0) { + println("[M3SA says] M3SA operation(s) completed successfully.") + } else { + val errors = process.errorStream.bufferedReader().readText() + println("[M3SA says] Exit code $exitCode; Error(s): $errors") + } +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SACli.kt b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SACli.kt new file mode 100644 index 00000000..43597ff5 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SACli.kt @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +@file:JvmName("M3SACli") + +package org.opendc.experiments.base.runner + +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.options.default +import com.github.ajalt.clikt.parameters.options.defaultLazy +import com.github.ajalt.clikt.parameters.options.option +import com.github.ajalt.clikt.parameters.types.file +import com.github.ajalt.clikt.parameters.types.int +import m3saAnalyze +import org.opendc.experiments.base.scenario.getExperiment +import org.opendc.experiments.m3sa.scenario.getOutputFolder +import java.io.File + +/** + * Main entrypoint of the application. + */ +public fun main(args: Array): Unit = M3SACommand().main(args) + +/** + * Represents the command for the Scenario experiments. + */ +internal class M3SACommand : CliktCommand(name = "experiment") { + /** + * The path to the environment directory. + */ + private val scenarioPath by option("--experiment-path", help = "path to experiment file") + .file(canBeDir = false, canBeFile = true) + .defaultLazy { File("resources/experiment.json") } + + /** + * The number of threads to use for parallelism. + */ + private val parallelism by option("-p", "--parallelism", help = "number of worker threads") + .int() + .default(Runtime.getRuntime().availableProcessors() - 1) + + private val m3saPath by option("-m", "--m3sa-setup-path", help = "path to m3sa setup file") + .file(canBeDir = false, canBeFile = true) + .defaultLazy { File("") } + + override fun run() { + println("The provided m3saPath is $m3saPath") + + val experiment = getExperiment(scenarioPath) + runExperiment(experiment, parallelism) + + if (m3saPath.toString().isNotEmpty()) { + m3saAnalyze( + outputFolderPath = getOutputFolder(scenarioPath), + m3saSetupPath = m3saPath.toString(), + ) + } else { + println( + "\n" + + "===================================================\n" + + "|M3SA path is not provided. Skipping M3SA analysis.|\n" + + "===================================================", + ) + } + } +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SARunner.kt b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SARunner.kt new file mode 100644 index 00000000..9a61ad17 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/runner/M3SARunner.kt @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +@file:JvmName("M3saCliKt") + +package org.opendc.experiments.base.runner + +import org.opendc.experiments.base.scenario.Scenario +import java.util.concurrent.ForkJoinPool + +/** + * Run scenario when no pool is available for parallel execution + * + * @param experiment The scenarios to run + * @param parallelism The number of scenarios that can be run in parallel + */ +public fun runExperiment( + experiment: List, + parallelism: Int, +) { + val ansiReset = "\u001B[0m" + val ansiGreen = "\u001B[32m" + val ansiBlue = "\u001B[34m" + + setupOutputFolderStructure(experiment[0].outputFolder) + + for (scenario in experiment) { + val pool = ForkJoinPool(parallelism) + println( + "\n\n$ansiGreen================================================================================$ansiReset", + ) + println("$ansiBlue Running scenario: ${scenario.name} $ansiReset") + println("$ansiGreen================================================================================$ansiReset") + runScenario( + scenario, + pool, + ) + } +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/scenario/M3SAFactories.kt b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/scenario/M3SAFactories.kt new file mode 100644 index 00000000..a4df40e1 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/kotlin/org/opendc/experiments/m3sa/scenario/M3SAFactories.kt @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2024 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.m3sa.scenario + +import org.opendc.experiments.base.scenario.ExperimentReader +import java.io.File + +private val experimentReader = ExperimentReader() + +/** + * Returns a list of Scenarios from a given file path (input). + * + * @param filePath The path to the file containing the scenario specifications. + * @return A list of Scenarios. + */ +public fun getOutputFolder(file: File): String { + return experimentReader.read(file).outputFolder + "/outputs" +} diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/accuracy_evaluator.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/accuracy_evaluator.py new file mode 100644 index 00000000..463f69e6 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/accuracy_evaluator.py @@ -0,0 +1,114 @@ +import numpy as np + +from models.MetaModel import MetaModel + + +def accuracy_evaluator( + real_data, + multi_model, + compute_mape=True, + compute_nad=True, + compute_rmsle=True, + rmsle_hyperparameter=0.5, + only_metamodel=False +): + """ + :param real_data: the real-world data of the simulation + :param multi_model: the Multi-Model, containing individual models (possibly also a Meta-Model, with id=101) + :param MAPE: whether to calculate Mean Absolute Percentage Error (MAPE) + :param NAD: whether to calculate Normalized Absolute Differences (NAD) + :param RMSLE: whether to calculate Root Mean Square Logarithmic Error (RMSLE) + :param rmsle_hyperparameter: the hyperparameter that balances the ration underestimations:overestimations + - default is 0.5 (balanced penalty) + - < 0.5: more penalty for overestimations + - > 0.5: more penalty for underestimations + e.g., RMSLE_hyperparameter=0.3 -> 30% penalty for overestimations, 70% penalty for underestimations (3:7 ratio) + :return: None, but prints the accuracy metrics + """ + + meta_model = MetaModel(multimodel=multi_model) + multi_model.models.append(meta_model.meta_model) # metamodel + # multi_model.models.append(Model(raw_host_data=real_data, id=-1, path=None)) # real-world data + + with open(multi_model.output_folder_path + "/accuracy_report.txt", "a") as f: + f.write("====================================\n") + f.write("Accuracy Report, against ground truth\n") + + for model in multi_model.models: + if only_metamodel and model.id != 101: + continue + + if model.id == -1: + f.write("Real-World data") + elif model.id == 101: + f.write( + f"Meta-Model, meta-function: {multi_model.user_input['meta_function']}, window_size: {meta_model.multi_model.window_size}") + else: + f.write(f"Model {model.id}") + + simulation_data = model.raw_sim_data + min_len = min(len(real_data), len(simulation_data)) + real_data = real_data[:min_len] + simulation_data = simulation_data[:min_len] + if compute_mape: + accuracy_mape = mape( + real_data=real_data, + simulation_data=simulation_data + ) + f.write(f"\nMean Absolute Percentage Error (MAPE): {accuracy_mape}%") + + if compute_nad: + accuracy_nad = nad( + real_data=real_data, + simulation_data=simulation_data + ) + f.write(f"\nNormalized Absolute Differences (NAD): {accuracy_nad}%") + + if compute_rmsle: + accuracy_rmsle = rmsle( + real_data=real_data, + simulation_data=simulation_data, + alpha=rmsle_hyperparameter + ) + f.write( + f"\nRoot Mean Square Logarithmic Error (RMSLE), alpha={rmsle_hyperparameter}:{accuracy_rmsle}\n\n") + + f.write("====================================\n") + + +def mape(real_data, simulation_data): + """ + Calculate Mean Absolute Percentage Error (MAPE) + :param real_data: Array of real values + :param simulation_data: Array of simulated values + :return: MAPE value + """ + real_data = np.array(real_data) + simulation_data = np.array(simulation_data) + return round(np.mean(np.abs((real_data - simulation_data) / real_data)) * 100, 3) + + +def nad(real_data, simulation_data): + """ + Calculate Normalized Absolute Differences (NAD) + :param real_data: Array of real values + :param simulation_data: Array of simulated values + :return: NAD value + """ + real_data = np.array(real_data) + simulation_data = np.array(simulation_data) + return round(np.sum(np.abs(real_data - simulation_data)) / np.sum(real_data) * 100, 3) + + +def rmsle(real_data, simulation_data, alpha=0.5): + """ + Calculate Root Mean Square Logarithmic Error (RMSLE) with an adjustable alpha parameter + :param real_data: Array of real values + :param simulation_data: Array of simulated values + :param alpha: Hyperparameter that balances the penalty between underestimations and overestimations + :return: RMSLE value + """ + real_data = np.array(real_data) + simulation_data = np.array(simulation_data) + log_diff = alpha * np.log(real_data) - (1 - alpha) * np.log(simulation_data) + return round(np.sqrt(np.mean(log_diff ** 2)) * 100, 3) diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/input_parser.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/input_parser.py new file mode 100644 index 00000000..cb1bc2b9 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/input_parser.py @@ -0,0 +1,135 @@ +import json +import os +import sys +import warnings + + +def read_input(path=""): + """ + Reads and processes the input JSON file from the specified path. Validates the input path, + ensures the file exists, and decodes the JSON content. Switches to the project root directory + before returning the parsed input. + + :param path: The relative path to the input JSON file. + :type path: str + :raises ValueError: If the input path is not provided, file does not exist, or JSON decoding fails. + :return: Parsed JSON content. + :rtype: dict + :side effect: Changes the working directory to the project root. + """ + if not path: + raise ValueError("No input path provided.") + + path = path.strip().strip(',') + + project_root = find_root_dir() + if not project_root: + raise ValueError("Project root not found.") + + full_path = os.path.join(project_root, path) + + if not os.path.exists(full_path): + raise ValueError(f"File does not exist: {full_path}") + + try: + with open(full_path, 'r') as raw_json: + input_json = json.load(raw_json) + except json.JSONDecodeError: + raise ValueError("Failed to decode JSON.") + except IOError: + raise ValueError("MultiModel's parser says: Error opening file.") + + switch_to_root_dir() + + # Validate and apply defaults + input_json = parse_input(input_json) + return input_json + + +def parse_input(input_json): + """ + Validates and applies default values to the input JSON content. Ensures required fields are present + and raises warnings or errors for missing or invalid values. + + :param input_json: The input JSON content. + :type input_json: dict + :raises ValueError: If required fields are missing or invalid values are provided. + :return: Validated and processed JSON content with defaults applied. + :rtype: dict + """ + + DEFAULTS = { + "multimodel": True, + "metamodel": False, + "window_size": 1, + "window_function": "mean", + "meta_function": "mean", + "samples_per_minute": 0, + "current_unit": "", + "unit_scaling_magnitude": 1, + "plot_type": "time_series", + "plot_title": "", + "x_label": "", + "y_label": "", + "seed": 0, + "y_ticks_count": None, + "x_ticks_count": None, + "y_min": None, + "y_max": None, + "x_min": None, + "x_max": None, + } + + # Apply default values where not specified + for key, default_value in DEFAULTS.items(): + if key not in input_json: + input_json[key] = default_value + + # Special handling for required fields without default values + if "metric" not in input_json: + raise ValueError("Required field 'metric' is missing.") + + if ("meta_function" not in input_json) and input_json["metamodel"]: + raise ValueError("Required field 'meta_function' is missing. Please select between 'mean' and 'median'. Alternatively," + "disable metamodel in the config file.") + + if input_json["meta_function"] not in ["mean", "median", "meta_equation1", "equation2", "equation3"]: + raise ValueError("Invalid value for meta_function. Please select between 'mean', 'median', !!!!!!!to be updated in the end!!!!!!!!.") + + # raise a warning + if not input_json["multimodel"] and input_json["metamodel"]: + warnings.warn("Warning: Cannot have a Meta-Model without a Multi-Model. No computation made.") + + return input_json + + +def find_root_dir(): + """ + Searches for the project root directory by looking for a 'README.md' file in the current + and parent directories. + + :return: The path to the project root directory if found, otherwise None. + :rtype: str or None + """ + current_dir = os.path.dirname(os.path.abspath(__file__)) + root = os.path.abspath(os.sep) + while current_dir and current_dir != root: + if os.path.exists(os.path.join(current_dir, 'README.md')): + return current_dir + current_dir = os.path.dirname(current_dir) + return None + + +def switch_to_root_dir(): + """ + Switches the current working directory to the project root directory. Exits the program if the + root directory is not found. + + :side effect: Changes the current working directory or exits the program. + """ + root_dir = find_root_dir() + if root_dir: + os.chdir(root_dir) + else: + print("Failed to switch to root directory.") + sys.exit(1) diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/main.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/main.py new file mode 100644 index 00000000..11ee836d --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/main.py @@ -0,0 +1,20 @@ +from os import sys + +from input_parser import read_input +from models.MetaModel import MetaModel +from models.MultiModel import MultiModel + + +def main(): + multimodel = MultiModel( + user_input=read_input(sys.argv[2]), + path=sys.argv[1], + ) + + multimodel.generate_plot() + + MetaModel(multimodel) + + +if __name__ == "__main__": + main() diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py new file mode 100644 index 00000000..49930d25 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MetaModel.py @@ -0,0 +1,214 @@ +import numpy as np +import os +import pandas as pd + +from .Model import Model + + +class MetaModel: + """ + A class that aggregates results from multiple simulation models based on user-defined functions, producing + consolidated outputs for analysis. + + Attributes: + multi_model (MultiModel): The container of models whose results are aggregated. + meta_model (Model): Model instance that stores aggregated results. + meta_function (function): Function used to calculate aggregated data. + min_raw_model_len (int): Minimum length of raw data arrays across all models. + min_processed_model_len (int): Minimum length of processed data arrays across all models. + number_of_models (int): Number of models being aggregated. + function_map (dict): Mapping of aggregation function names to function implementations. + """ + + META_MODEL_ID = -101 + + def __init__(self, multimodel, meta_function=None): + """ + Initializes the Metamodel with a MultiModel instance and prepares aggregation functions based on configuration. + + :param multimodel: MultiModel instance containing the models to aggregate. + :raise ValueError: If metamodel functionality is not enabled in the configuration. + """ + if not multimodel.user_input.get('metamodel', False): + raise ValueError("Metamodel is not enabled in the config file") + + self.function_map = { + 'mean': self.mean, + 'median': self.median, + 'meta_equation1': self.meta_equation1, + } + + self.multi_model = multimodel + self.meta_model = Model( + raw_sim_data=[], + id=self.META_MODEL_ID, + path=self.multi_model.output_folder_path + ) + + if meta_function is not None: + self.meta_function = meta_function + else: + self.meta_function = self.function_map.get(multimodel.user_input['meta_function'], self.mean) + + self.min_raw_model_len = min([len(model.raw_sim_data) for model in self.multi_model.models]) + self.min_processed_model_len = min([len(model.processed_sim_data) for model in self.multi_model.models]) + self.number_of_models = len(self.multi_model.models) + self.compute() + self.output() + + def output(self): + """ + Generates outputs by plotting the aggregated results and exporting the metamodel data to a file. + :return: None + :side effect: Outputs data to files and generates plots. + """ + self.plot() + self.output_metamodel() + + def compute(self): + """ + Computes aggregated data based on the specified plot type from the configuration. + :raise ValueError: If an unsupported plot type is specified in the configuration. + """ + if self.multi_model.plot_type == 'time_series': + self.compute_time_series() + elif self.multi_model.plot_type == 'cumulative': + self.compute_cumulative() + elif self.multi_model.plot_type == 'cumulative_time_series': + self.compute_cumulative_time_series() + else: + raise ValueError("Invalid plot type in config file") + + def plot(self): + """ + Plots the aggregated data according to the specified plot type from the configuration. + :raise ValueError: If an unsupported plot type is specified. + """ + if self.multi_model.plot_type == 'time_series': + self.plot_time_series() + elif self.multi_model.plot_type == 'cumulative': + self.plot_cumulative() + elif self.multi_model.plot_type == 'cumulative_time_series': + self.plot_cumulative_time_series() + + else: + raise ValueError("Invalid plot type in config file") + + def compute_time_series(self): + """ + Aggregates time series data across models using the specified aggregation function. + :return: None + :side effect: Updates the meta_model's processed data with aggregated results. + """ + for i in range(0, self.min_processed_model_len): + data_entries = [] + for j in range(self.number_of_models): + data_entries.append(self.multi_model.models[j].processed_sim_data[i]) + self.meta_model.processed_sim_data.append(self.meta_function(data_entries)) + self.meta_model.raw_sim_data = self.meta_model.processed_sim_data + + def plot_time_series(self): + """ + Generates a time series plot of the aggregated data. + :return: None + :side effect: Displays a time series plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def compute_cumulative(self): + """ + Aggregates cumulative data entries across all models. + :return: None + :side effect: Updates the meta_model's cumulative data with aggregated results. + """ + + for i in range(0, self.min_raw_model_len): + data_entries = [] + for j in range(self.number_of_models): + sim_data = self.multi_model.models[j].raw_sim_data + ith_element = sim_data[i] + data_entries.append(ith_element) + self.meta_model.cumulated += self.mean(data_entries) + self.meta_model.cumulated = round(self.meta_model.cumulated, 2) + + def plot_cumulative(self): + """ + Generates a cumulative plot of the aggregated data. + :return: None + :side effect: Displays a cumulative plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def compute_cumulative_time_series(self): + """ + Aggregates cumulative time series data entries across models using the specified aggregation function. + :return: None + :side effect: Updates the meta_model's processed data with cumulative aggregated results. + """ + for i in range(0, self.min_processed_model_len): + data_entries = [] + for j in range(self.number_of_models): + data_entries.append(self.multi_model.models[j].processed_sim_data[i]) + self.meta_model.processed_sim_data.append(self.meta_function(data_entries)) + + def plot_cumulative_time_series(self): + """ + Generates a cumulative time series plot of the aggregated data. + :return: None + :side effect: Displays a cumulative time series plot using the multi_model's plotting capabilities. + """ + self.multi_model.models.append(self.meta_model) + self.multi_model.generate_plot() + + def output_metamodel(self): + """ + Exports the processed sim data of the metamodel to a parquet file for further analysis or record keeping. + :return: None + :side effect: Writes data to a parquet file at the specified directory path. + """ + directory_path = os.path.join(self.multi_model.output_folder_path, "raw-output/metamodel/seed=0") + os.makedirs(directory_path, exist_ok=True) + current_path = os.path.join(directory_path, f"{self.multi_model.metric}.parquet") + df = pd.DataFrame({'processed_sim_data': self.meta_model.processed_sim_data}) + df.to_parquet(current_path, index=False) + + def mean(self, chunks): + """ + Calculates the mean of a list of numerical data. + + :param chunks (list): The data over which to calculate the mean. + :return: float: The mean of the provided data. + """ + return np.mean(chunks) + + def median(self, chunks): + """ + Calculates the median of a list of numerical data. + + :param chunks (list): The data over which to calculate the median. + :return: float: The median of the provided data. + """ + return np.median(chunks) + + def meta_equation1(self, chunks): + """ + Calculates a weighted mean where the weights are inversely proportional to the absolute difference from the median value. + :param chunks (list): Data chunks from which to calculate the weighted mean. + :return: float: The calculated weighted mean. + """ + + """Attempt 1""" + # median_val = np.median(chunks) + # proximity_weights = 1 / (1 + np.abs(chunks - median_val)) # Avoid division by zero + # weighted_mean = np.sum(proximity_weights * chunks) / np.sum(proximity_weights) + # return weighted_mean + + """Attempt 2 Inter-Quartile Mean (same accuracy as mean)""" + # sorted_preds = np.sort(chunks, axis=0) + # Q1 = int(np.floor(0.25 * len(sorted_preds))) + # Q3 = int(np.floor(0.75 * len(sorted_preds))) + # + # iqm = np.mean(sorted_preds[Q1:Q3], axis=0) + # return iqm diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/Model.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/Model.py new file mode 100644 index 00000000..f60f0bb0 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/Model.py @@ -0,0 +1,70 @@ +""" +A model is the output of simulator. It contains the data the simulator output, under a certain topology, seed, +workload, datacenter configuration, etc. A model is further used in the analyzer as part of the MultiModel class, +and further in the MetaModel class. + +:param sim: the simulation data of the model +""" +import json +from dataclasses import dataclass, field + +@dataclass +class Model: + """ + Represents a single simulation output containing various data metrics collected under specific simulation conditions. + A Model object stores raw and processed simulation data and is designed to interact with higher-level structures like + MultiModel and MetaModel for complex data analysis. + + Attributes: + raw_sim_data (list): Initial raw data from the simulator output. + processed_sim_data (list): Data derived from raw_sim_data after applying certain processing operations like aggregation or smoothing. + cumulative_time_series_values (list): Stores cumulative data values useful for time series analysis. + id (int): Unique identifier for the model, typically used for tracking and referencing within analysis tools. + path (str): Base path for storing or accessing related data files. + cumulated (float): Cumulative sum of processed data, useful for quick summaries and statistical analysis. + experiment_name (str): A descriptive name for the experiment associated with this model, potentially extracted from external metadata. + margins_of_error (list): Stores error margins associated with the data, useful for uncertainty analysis. + topologies (list): Describes the network or system topologies used during the simulation. + workloads (list): Lists the types of workloads applied during the simulation, affecting the simulation's applicability and scope. + allocation_policies (list): Details the resource allocation policies used, which influence the simulation outcomes. + carbon_trace_paths (list): Paths to data files containing carbon output or usage data, important for environmental impact studies. + + Methods: + parse_trackr(): Reads additional configuration and metadata from a JSON file named 'trackr.json', enhancing the model with detailed context information. + + Usage: + Model objects are typically instantiated with raw data from simulation outputs and an identifier. After instantiation, + the 'parse_trackr' method can be called to load additional experimental details from a corresponding JSON file. + """ + + path: str + raw_sim_data: list + id: int + processed_sim_data: list = field(default_factory=list) + cumulative_time_series_values: list = field(default_factory=list) + cumulated: float = 0.0 + experiment_name: str = "" + margins_of_error: list = field(default_factory=list) + topologies: list = field(default_factory=list) + workloads: list = field(default_factory=list) + allocation_policies: list = field(default_factory=list) + carbon_trace_paths: list = field(default_factory=list) + + def parse_trackr(self): + """ + Parses the 'trackr.json' file located in the model's base path to extract and store detailed experimental metadata. + This method enhances the model with comprehensive contextual information about the simulation environment. + + :return: None + :side effect: Updates model attributes with data from the 'trackr.json' file, such as experiment names, topologies, and policies. + :raises FileNotFoundError: If the 'trackr.json' file does not exist at the specified path. + :raises json.JSONDecodeError: If there is an error parsing the JSON data. + """ + trackr_path = self.path + "/trackr.json" + with open(trackr_path) as f: + trackr = json.load(f) + self.experiment_name = trackr.get(self.id, {}).get('name', "") + self.topologies = trackr.get(self.id, {}).get('topologies', []) + self.workloads = trackr.get(self.id, {}).get('workloads', []) + self.allocation_policies = trackr.get(self.id, {}).get('allocationPolicies', []) + self.carbon_trace_paths = trackr.get(self.id, {}).get('carbonTracePaths', []) diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MultiModel.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MultiModel.py new file mode 100644 index 00000000..17a92765 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/models/MultiModel.py @@ -0,0 +1,501 @@ +import matplotlib.pyplot as plt +import numpy as np +import os +import pyarrow.parquet as pq +import time +from matplotlib.ticker import MaxNLocator, FuncFormatter + +from simulator_specifics import * +from .MetaModel import MetaModel +from .Model import Model + + +def is_meta_model(model): + """ + Check if the given model is a MetaModel based on its ID. A metamodel will always have an id of -101. + + Args: + model (Model): The model to check. + + Returns: + bool: True if model is MetaModel, False otherwise. + """ + return model.id == MetaModel.META_MODEL_ID + + +class MultiModel: + """ + Handles multiple simulation models, aggregates their data based on user-defined parameters, + and generates plots and statistics. + + Attributes: + user_input (dict): Configuration dictionary containing user settings for model processing. + path (str): The base directory path where output files and analysis results are stored. + window_size (int): The size of the window for data aggregation, which affects how data smoothing and granularity are handled. + models (list of Model): A list of Model instances that store the simulation data. + metric (str): The specific metric to be analyzed and plotted, as defined by the user. + measure_unit (str): The unit of measurement for the simulation data, adjusted according to the user's specifications. + output_folder_path (str): Path to the folder where output files are saved. + raw_output_path (str): Directory path where raw simulation data is stored. + analysis_file_path (str): Path to the file where detailed analysis results are recorded. + plot_type (str): The type of plot to generate, which can be 'time_series', 'cumulative', or 'cumulative_time_series'. + plot_title (str): The title of the plot. + x_label (str), y_label (str): Labels for the x and y axes of the plot. + x_min (float), x_max (float), y_min (float), y_max (float): Optional parameters to define axis limits for the plots. + + Methods: + parse_user_input(window_size): Parses and sets the class attributes based on the provided user input. + adjust_unit(): Adjusts the unit of measurement based on user settings, applying appropriate metric prefixes. + set_paths(): Initializes the directory paths for storing outputs and analysis results. + init_models(): Reads simulation data from Parquet files and initializes Model instances. + compute_windowed_aggregation(): Processes the raw data by applying a windowed aggregation function for smoothing. + generate_plot(): Orchestrates the generation of the specified plot type by calling the respective plotting functions. + generate_time_series_plot(): Generates a time series plot of the aggregated data. + generate_cumulative_plot(): Creates a bar chart showing cumulative data for each model. + generate_cumulative_time_series_plot(): Produces a plot that displays cumulative data over time for each model. + save_plot(): Saves the generated plot to a PDF file in the specified directory. + output_stats(): Writes detailed statistics of the simulation to an analysis file for record-keeping. + mean_of_chunks(np_array, window_size): Calculates the mean of data segments for smoothing and processing. + get_cumulative_limits(model_sums): Determines appropriate x-axis limits for cumulative plots based on the model data. + + Usage: + To use this class, instantiate it with a dictionary of user settings, a path for outputs, and optionally a window size. + Call the `generate_plot` method to process the data and generate plots as configured by the user. + """ + + def __init__(self, user_input, path, window_size=-1): + """ + Initializes the MultiModel with provided user settings and prepares the environment. + + :param user_input (dict): Configurations and settings from the user. + :param path (str): Path where output and analysis will be stored. + :param window_size (int): The size of the window to aggregate data; uses user input if -1. + :return: None + """ + + self.starting_time = time.time() + self.end_time = None + self.workload_time = None + + self.user_input = user_input + + self.metric = None + self.measure_unit = None + self.path = path + self.models = [] + + self.folder_path = None + self.output_folder_path = None + self.raw_output_path = None + self.analysis_file_path = None + self.unit_scaling = 1 + self.window_size = -1 + self.window_function = "median" + self.max_model_len = 0 + self.seed = 0 + + self.plot_type = None + self.plot_title = None + self.x_label = None + self.y_label = None + self.x_min = None + self.x_max = None + self.y_min = None + self.y_max = None + self.plot_path = None + + self.parse_user_input(window_size) + self.set_paths() + self.init_models() + + self.compute_windowed_aggregation() + + def parse_user_input(self, window_size): + """ + Parses and sets attributes based on user input. + + :param window_size (int): Specified window size for data aggregation, defaults to user_input if -1. + :return: None + """ + if window_size == -1: + self.window_size = self.user_input["window_size"] + else: + self.window_size = window_size + self.metric = self.user_input["metric"] + self.measure_unit = self.adjust_unit() + self.window_function = self.user_input["window_function"] + self.seed = self.user_input["seed"] + + self.plot_type = self.user_input["plot_type"] + self.plot_title = self.user_input["plot_title"] + if self.user_input["x_label"] == "": + self.x_label = "Samples" + else: + self.x_label = self.user_input["x_label"] + + if self.user_input["y_label"] == "": + self.y_label = self.metric + " [" + self.measure_unit + "]" + else: + self.y_label = self.user_input["y_label"] + + self.y_min = self.user_input["y_min"] + self.y_max = self.user_input["y_max"] + self.x_min = self.user_input["x_min"] + self.x_max = self.user_input["x_max"] + + def adjust_unit(self): + """ + Adjusts the unit of measurement according to the scaling magnitude specified by the user. + This method translates the given measurement scale into a scientifically accepted metric prefix. + + :return str: The metric prefixed by the appropriate scale (e.g., 'kWh' for kilo-watt-hour if the scale is 3). + :raise ValueError: If the unit scaling magnitude provided by the user is not within the accepted range of scaling factors. + """ + prefixes = ['n', 'μ', 'm', '', 'k', 'M', 'G', 'T'] + scaling_factors = [-9, -6, -3, 1, 3, 6, 9] + given_metric = self.user_input["current_unit"] + self.unit_scaling = self.user_input["unit_scaling_magnitude"] + + if self.unit_scaling not in scaling_factors: + raise ValueError( + "Unit scaling factor not found. Please enter a valid unit from [-9, -6, -3, 1, 3, 6, 9].") + + if self.unit_scaling == 1: + return given_metric + + for i in range(len(scaling_factors)): + if self.unit_scaling == scaling_factors[i]: + self.unit_scaling = 10 ** self.unit_scaling + result = prefixes[i] + given_metric + return result + + def set_paths(self): + """ + Configures and initializes the directory paths for output and analysis based on the base directory provided. + This method sets paths for the raw output and detailed analysis results, ensuring directories are created if + they do not already exist, and prepares a base file for capturing analytical summaries. + + :return: None + :side effect: Creates necessary directories and files for output and analysis. + """ + self.output_folder_path = os.getcwd() + "/" + self.path + self.raw_output_path = os.getcwd() + "/" + self.path + "/raw-output" + self.analysis_file_path = os.getcwd() + "/" + self.path + "/simulation-analysis/" + os.makedirs(self.analysis_file_path, exist_ok=True) + self.analysis_file_path = os.path.join(self.analysis_file_path, "analysis.txt") + if not os.path.exists(self.analysis_file_path): + with open(self.analysis_file_path, "w") as f: + f.write("Analysis file created.\n") + + def init_models(self): + """ + Initializes models from the simulation output stored in Parquet files. This method reads each Parquet file, + processes the relevant data, and initializes Model instances which are stored in the model list. + + :return: None + :raise ValueError: If the unit scaling has not been set prior to model initialization. + """ + model_id = 0 + + for simulation_folder in os.listdir(self.raw_output_path): + if simulation_folder == "metamodel": + continue + path_of_parquet_file = f"{self.raw_output_path}/{simulation_folder}/seed={self.seed}/{SIMULATION_DATA_FILE}.parquet" + parquet_file = pq.read_table(path_of_parquet_file).to_pandas() + raw = parquet_file.select_dtypes(include=[np.number]).groupby("timestamp") + raw = raw[self.metric].sum().values + + if self.unit_scaling is None: + raise ValueError("Unit scaling factor is not set. Please ensure it is set correctly.") + + raw = np.divide(raw, self.unit_scaling) + + if self.user_input["samples_per_minute"] > 0: + MINUTES_IN_DAY = 1440 + self.workload_time = len(raw) * self.user_input["samples_per_minute"] / MINUTES_IN_DAY + + model = Model(raw_sim_data=raw, id=model_id, path=self.output_folder_path) + self.models.append(model) + model_id += 1 + + self.max_model_len = min([len(model.raw_sim_data) for model in self.models]) + + def compute_windowed_aggregation(self): + """ + Applies a windowed aggregation function to each model's dataset. This method is typically used for smoothing + or reducing data granularity. It involves segmenting the dataset into windows of specified size and applying + an aggregation function to each segment. + + :return: None + :side effect: Modifies each model's processed_sim_data attribute to contain aggregated data. + """ + if self.plot_type != "cumulative": + for model in self.models: + numeric_values = model.raw_sim_data + model.processed_sim_data = self.mean_of_chunks(numeric_values, self.window_size) + + def generate_plot(self): + """ + Creates and saves plots based on the processed data from multiple models. This method determines + the type of plot to generate based on user input and invokes the appropriate plotting function. + + The plotting options supported are 'time_series', 'cumulative', and 'cumulative_time_series'. + Depending on the type specified, this method delegates to specific plot-generating functions. + + :return: None + :raises ValueError: If the plot type specified is not recognized or supported by the system. + :side effect: + - Generates and saves a plot to the file system. + - Updates the plot attributes based on the generated plot. + - Displays the plot on the matplotlib figure canvas. + """ + plt.figure(figsize=(12, 10)) + plt.xticks(size=22) + plt.yticks(size=22) + plt.ylabel(self.y_label, size=26) + plt.xlabel(self.x_label, size=26) + plt.title(self.plot_title, size=26) + plt.grid() + + formatter = FuncFormatter(lambda x, _: '{:,}'.format(int(x)) if x >= 1000 else int(x)) + ax = plt.gca() + ax.xaxis.set_major_formatter(formatter) + # ax.yaxis.set_major_formatter(formatter) yaxis has formatting issues - to solve in a future iteration + + if self.user_input['x_ticks_count'] is not None: + ax = plt.gca() + ax.xaxis.set_major_locator(MaxNLocator(self.user_input['x_ticks_count'])) + + if self.user_input['y_ticks_count'] is not None: + ax = plt.gca() + ax.yaxis.set_major_locator(MaxNLocator(self.user_input['y_ticks_count'])) + + self.set_x_axis_lim() + self.set_y_axis_lim() + + if self.plot_type == "time_series": + self.generate_time_series_plot() + elif self.plot_type == "cumulative": + self.generate_cumulative_plot() + elif self.plot_type == "cumulative_time_series": + self.generate_cumulative_time_series_plot() + else: + raise ValueError( + "Plot type not recognized. Please enter a valid plot type. The plot can be either " + "'time_series', 'cumulative', or 'cumulative_time_series'." + ) + + plt.tight_layout() + plt.subplots_adjust(right=0.85) + plt.legend(fontsize=12, bbox_to_anchor=(1, 1)) + self.save_plot() + self.output_stats() + + def generate_time_series_plot(self): + """ + Plots time series data for each model. This function iterates over each model, applies the defined + windowing function to smooth the data, and plots the resulting series. + + :return: None + :side effect: Plots are displayed on the matplotlib figure canvas. + """ + for model in self.models: + label = "Meta-Model" if is_meta_model(model) else "Model " + str(model.id) + if is_meta_model(model): + repeated_means = np.repeat(means, self.window_size)[:len(model.processed_sim_data) * self.window_size] + plt.plot( + repeated_means, + drawstyle='steps-mid', + label=label, + color="red", + linestyle="--", + marker="o", + markevery=max(1, len(repeated_means) // 50), + linewidth=2 + ) + else: + means = self.mean_of_chunks(model.raw_sim_data, self.window_size) + repeated_means = np.repeat(means, self.window_size)[:len(model.raw_sim_data)] + plt.plot(repeated_means, drawstyle='steps-mid', label=label) + + def generate_cumulative_plot(self): + """ + Generates a horizontal bar chart showing cumulative data for each model. This function + aggregates total values per model and displays them in a bar chart, providing a visual + comparison of total values across models. + + :return: None + :side effect: Plots are displayed on the matplotlib figure canvas. + """ + plt.xlim(self.get_cumulative_limits(model_sums=self.sum_models_entries())) + plt.ylabel("Model ID", size=20) + plt.xlabel("Total " + self.metric + " [" + self.measure_unit + "]") + plt.yticks(range(len(self.models)), [model.id for model in self.models]) + plt.grid(False) + + cumulated_energies = self.sum_models_entries() + for i, model in enumerate(self.models): + label = "Meta-Model" if is_meta_model(model) else "Model " + str(model.id) + if is_meta_model(model): + plt.barh(label=label, y=i, width=cumulated_energies[i], color="red") + else: + plt.barh(label=label, y=i, width=cumulated_energies[i]) + plt.text(cumulated_energies[i], i, str(cumulated_energies[i]), ha='left', va='center', size=26) + + def generate_cumulative_time_series_plot(self): + """ + Generates a plot showing the cumulative data over time for each model. This visual representation is + useful for analyzing trends and the accumulation of values over time. + + :return: None + :side effect: Displays the cumulative data over time on the matplotlib figure canvas. + """ + self.compute_cumulative_time_series() + + for model in self.models: + if is_meta_model(model): + cumulative_repeated = np.repeat(model.cumulative_time_series_values, self.window_size)[ + :len(model.processed_sim_data) * self.window_size] + plt.plot( + cumulative_repeated, + drawstyle='steps-mid', + label=("Meta-Model"), + color="red", + linestyle="--", + marker="o", + markevery=max(1, len(cumulative_repeated) // 10), + linewidth=3 + ) + else: + cumulative_repeated = np.repeat(model.cumulative_time_series_values, self.window_size)[ + :len(model.raw_sim_data)] + plt.plot(cumulative_repeated, drawstyle='steps-mid', label=("Model " + str(model.id))) + + def compute_cumulative_time_series(self): + """ + Computes the cumulative sum of processed data over time for each model, storing the result for use in plotting. + + :return: None + :side effect: Updates each model's 'cumulative_time_series_values' attribute with the cumulative sums. + """ + for model in self.models: + cumulative_array = [] + _sum = 0 + for value in model.processed_sim_data: + _sum += value + cumulative_array.append(_sum * self.window_size) + model.cumulative_time_series_values = cumulative_array + + def save_plot(self): + """ + Saves the current plot to a PDF file in the specified directory, constructing the file path from the + plot attributes and ensuring that the directory exists before saving. + + :return: None + :side effect: Creates or overwrites a PDF file containing the plot in the designated folder. + """ + folder_prefix = self.output_folder_path + "/simulation-analysis/" + self.metric + "/" + self.plot_path = folder_prefix + self.plot_type + "_plot_multimodel_metric=" + self.metric + "_window=" + str( + self.window_size) + ".pdf" + plt.savefig(self.plot_path) + + def set_x_axis_lim(self): + """ + Sets the x-axis limits for the plot based on user-defined minimum and maximum values. If values + are not specified, the axis limits will default to encompassing all data points. + + :return: None + :side effect: Adjusts the x-axis limits of the current matplotlib plot. + """ + if self.x_min is not None: + plt.xlim(left=self.x_min) + + if self.x_max is not None: + plt.xlim(right=self.x_max) + + def set_y_axis_lim(self): + """ + Dynamically sets the y-axis limits to be slightly larger than the range of the data, enhancing + the readability of the plot by ensuring all data points are comfortably within the view. + + :return: None + :side effect: Adjusts the y-axis limits of the current matplotlib plot. + """ + if self.y_min is not None: + plt.ylim(bottom=self.y_min) + if self.y_max is not None: + plt.ylim(top=self.y_max) + + def sum_models_entries(self): + """ + Computes the total values from each model for use in cumulative plotting. This method aggregates + the data across all models and prepares it for cumulative display. + + :return: List of summed values for each model, useful for plotting and analysis. + """ + models_sums = [] + for (i, model) in enumerate(self.models): + if is_meta_model(model): + models_sums.append(model.cumulated) + else: + cumulated_energy = model.raw_sim_data.sum() + cumulated_energy = round(cumulated_energy, 2) + models_sums.append(cumulated_energy) + + return models_sums + + def output_stats(self): + """ + Records and writes detailed simulation statistics to an analysis file. This includes time stamps, + performance metrics, and other relevant details. + + :return: None + :side effect: Appends detailed simulation statistics to an existing file for record-keeping and analysis. + """ + self.end_time = time.time() + with open(self.analysis_file_path, "a") as f: + f.write("\n\n========================================\n") + f.write("Simulation made at " + time.strftime("%Y-%m-%d %H:%M:%S") + "\n") + f.write("Metric: " + self.metric + "\n") + f.write("Unit: " + self.measure_unit + "\n") + f.write("Window size: " + str(self.window_size) + "\n") + f.write("Sample count in raw sim data: " + str(self.max_model_len) + "\n") + f.write("Computing time " + str(round(self.end_time - self.starting_time, 1)) + "s\n") + if (self.user_input["samples_per_minute"] > 0): + f.write("Workload time: " + str(round(self.workload_time, 2)) + " days\n") + f.write("Plot path" + self.plot_path + "\n") + f.write("========================================\n") + + def mean_of_chunks(self, np_array, window_size): + """ + Calculates the mean of data within each chunk for a given array. This method helps in smoothing the data by + averaging over specified 'window_size' segments. + + :param np_array (np.array): Array of numerical data to be chunked and averaged. + :param window_size (int): The size of each segment to average over. + :return: np.array: An array of mean values for each chunk. + :side effect: None + """ + if window_size == 1: + return np_array + + chunks = [np_array[i:i + window_size] for i in range(0, len(np_array), window_size)] + means = [np.mean(chunk) for chunk in chunks] + return np.array(means) + + def get_cumulative_limits(self, model_sums): + """ + Calculates the appropriate x-axis limits for cumulative plots based on the summarized data from each model. + + :param model_sums (list of float): The total values for each model. + :return: tuple: A tuple containing the minimum and maximum x-axis limits. + """ + axis_min = min(model_sums) * 0.9 + axis_max = max(model_sums) * 1.1 + + if self.user_input["x_min"] is not None: + axis_min = self.user_input["x_min"] + if self.user_input["x_max"] is not None: + axis_max = self.user_input["x_max"] + + return [axis_min * 0.9, axis_max * 1.1] diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/requirements.txt b/opendc-experiments/opendc-experiments-m3sa/src/main/python/requirements.txt new file mode 100644 index 00000000..cbd22985 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/requirements.txt @@ -0,0 +1,4 @@ +matplotlib==3.8.4 +numpy==2.1.1 +pandas==2.2.2 +pyarrow==16.1.0 diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/simulator_specifics.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/simulator_specifics.py new file mode 100644 index 00000000..4e1c36e1 --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/simulator_specifics.py @@ -0,0 +1,14 @@ +""" +This file is the integration layer of the M3SA tool upon any (ICT) simulator. + +The system will use the elements from this file in the analysis / meta-simulation process. +""" + +""" +SIMULATION_DATA_FILE (str): The name of the file containing the simulation data. Enter only the name, not the path, not +the extension. The data file must be parquet format. + +✅ Good: "host", "simulation_data", "cats_predictions" +❌ Wrong: "host.json", "opendc/folder_x/folder_y/data" +""" +SIMULATION_DATA_FILE = "host" # opendc outputs in file host.parquet diff --git a/opendc-experiments/opendc-experiments-m3sa/src/main/python/utils.py b/opendc-experiments/opendc-experiments-m3sa/src/main/python/utils.py new file mode 100644 index 00000000..fd4fec2e --- /dev/null +++ b/opendc-experiments/opendc-experiments-m3sa/src/main/python/utils.py @@ -0,0 +1,25 @@ +import sys + +""" +Constants for the main.py file +""" + +SIMULATION_ANALYSIS_FOLDER_NAME = 'simulation-analysis' +EMISSIONS_ANALYSIS_FOLDER_NAME = 'carbon_emission' +ENERGY_ANALYSIS_FOLDER_NAME = 'power_draw' + +""" +Utility functions +""" + + +def clean_analysis_file(metric): + analysis_file_path = SIMULATION_ANALYSIS_FOLDER_NAME + "/" + if metric == "power_draw": + analysis_file_path += ENERGY_ANALYSIS_FOLDER_NAME + else: + analysis_file_path += EMISSIONS_ANALYSIS_FOLDER_NAME + analysis_file_path += "/analysis.txt" + + with open(analysis_file_path, "w") as f: + f.write("") -- cgit v1.2.3