import pandas as pd import time import csv import os from time import gmtime, strftime import numpy as np import matplotlib.pyplot as plt PATH = "/src/experiment1/output/gmail/greenifier-demo-scaling/raw-output/0/seed=0/" CONSTANT = 3_6000_000 / 24 def create_dataframes() -> tuple[pd.DataFrame, pd.DataFrame]: cwd = os.getcwd() cwd = cwd + PATH print(cwd) hosts: str = "%shost.parquet" % cwd tasks: str = "%stask.parquet" % cwd try: df_hosts = pd.read_parquet(hosts) df_tasks = pd.read_parquet(tasks) return (df_hosts, df_tasks) except Exception: print("Exception: error opening files.") exit(1) def get_name() -> str: curr = time.time() s = strftime("%d_%b_%Y_%H%M%S", gmtime(curr)) return s def iterate(frame): dictionary = {} for i in range(len(frame)): if frame["downtime"].iloc[i] > 0.0: ts = frame["timestamp"].iloc[i] dictionary[(ts)] = dictionary.get((ts), 0) + 1 return dictionary # This is a running function frequently changed def plot_hosts(frame: pd.DataFrame): dictionary = { 216000000: 277, 219600000: 277, 280800000: 139, 727200000: 139, 730800000: 139, 802800000: 162, 1407600000: 139, 1850400000: 139, 1857600000: 277, 1861200000: 277, 1872000000: 139, 1926000000: 62, 2023200000: 162, 2037600000: 277, 2041200000: 208, 2070000000: 139, } dictionary2 = { 3600000: 11, 180000000: 11, 212400000: 103, 216000000: 103, 219600000: 103, 223200000: 103, 374400000: 57, 712800000: 103, 795600000: 57, 838800000: 11, 882000000: 103, 975600000: 57, 979200000: 11, 982800000: 11, 1087200000: 11, 1234800000: 11, 1404000000: 11, 1854000000: 103, 1857600000: 103, 1861200000: 103, 1926000000: 36, 2034000000: 103, 2037600000: 103, 2041200000: 34, 2080800000: 11, 2102400000: 11, 2163600000: 11, 2185200000: 57, 2383200000: 57, } for key in dictionary2.keys(): dictionary2[key] += 150 df = pd.DataFrame(list(dictionary2.items()), columns=["timestamp", "count"]) df3 = iterate(frame) df2 = pd.DataFrame(list(df3.items()), columns=["timestamp", "count"]) plt.plot( df2["timestamp"] / CONSTANT, df2["count"], label="Actual failures", linewidth=1, zorder=2, ) plt.plot( df["timestamp"] / CONSTANT, df["count"], label="Detected failures", linewidth=3, zorder=1, ) plt.xlabel("Time [days]") plt.ylabel("Failures per timestamp") plt.title("Failure detection results") plt.legend() path = os.getcwd() + "/src/experiment1/" location: str = path + "figures/%s.pdf" % get_name() plt.savefig(location, dpi=300) total = sum(dictionary.values()) total2 = sum(df3.values()) print(f"Percentage detected: {total/total2}") def export_results(path: str): assert path # 0 is hosts, 1 is tasks frames: tuple = create_dataframes() plot_hosts(frames[0]) def main(): export_results(PATH) if __name__ == "__main__": main()