From ffb1045c8d27b48868ff582551a0b41dd37afe45 Mon Sep 17 00:00:00 2001 From: mjkwiatkowski Date: Tue, 2 Jun 2026 19:09:38 +0200 Subject: feat: added more text to the background, refactor is still needed --- appendix/glossary.tex | 2 + .../iteration1/1-s2.0-S0306261924012236-main.pdf | Bin 0 -> 11383972 bytes ...xandruIosup_ICT_DigitalTwin_ModSim24_SHARED.pdf | Bin 0 -> 45377841 bytes .../iteration1/aaai/00026-AAAI24.SarkarS-DM.pdf | Bin 0 -> 1306442 bytes citations2/iteration1/acm/3408308.3427982.pdf | Bin 0 -> 3289160 bytes citations2/iteration1/acm/3472727.3472802.pdf | Bin 0 -> 770664 bytes citations2/iteration1/acm/3563357.3564050.pdf | Bin 0 -> 950480 bytes citations2/iteration1/acm/3600100.3623719.pdf | Bin 0 -> 3047370 bytes citations2/iteration1/acm/3604283.pdf | Bin 0 -> 3273011 bytes citations2/iteration1/acm/3772078.pdf | Bin 0 -> 18924613 bytes ..._Data_Centers_A_Digital_Twin-Based_Approach.pdf | Bin 0 -> 3302437 bytes ...al_Twins_A_Predictive_and_Adaptive_Approach.pdf | Bin 0 -> 1303380 bytes ...enters__Visualization_and_Anomaly_Detection.pdf | Bin 0 -> 448897 bytes ...Trends_in_Data_Center_Management_Automation.pdf | Bin 0 -> 448918 bytes ...Analytics_for_Digital_Twins_in_Data_Centers.pdf | Bin 0 -> 6566771 bytes ...sed_Energy-Saving_Solution_for_Data_Centers.pdf | Bin 0 -> 1301647 bytes ...Considerations_Challenges_and_Opportunities.pdf | Bin 0 -> 4858789 bytes citations2/iteration1/osti/2376329.pdf | Bin 0 -> 13281765 bytes citations2/iteration1/osti/2480045.pdf | Bin 0 -> 2419927 bytes .../iteration1/springer/978-3-030-44907-0.pdf | Bin 0 -> 15656795 bytes .../iteration1/springer/s00466-022-02152-3.pdf | Bin 0 -> 2186711 bytes ...to modeling power consumption for a hybrid.pdf" | Bin 0 -> 870031 bytes content/background.tex | 87 ++++++++++++++++++++- content/intro.tex | 9 +++ style/style.tex | 2 +- 25 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 citations2/iteration1/1-s2.0-S0306261924012236-main.pdf create mode 100644 citations2/iteration1/2024-08-15_AlexandruIosup_ICT_DigitalTwin_ModSim24_SHARED.pdf create mode 100644 citations2/iteration1/aaai/00026-AAAI24.SarkarS-DM.pdf create mode 100644 citations2/iteration1/acm/3408308.3427982.pdf create mode 100644 citations2/iteration1/acm/3472727.3472802.pdf create mode 100644 citations2/iteration1/acm/3563357.3564050.pdf create mode 100644 citations2/iteration1/acm/3600100.3623719.pdf create mode 100644 citations2/iteration1/acm/3604283.pdf create mode 100644 citations2/iteration1/acm/3772078.pdf create mode 100644 citations2/iteration1/ieee/Adaptive_Capacity_Provisioning_for_Carbon-Aware_Data_Centers_A_Digital_Twin-Based_Approach.pdf create mode 100644 citations2/iteration1/ieee/Data_Center_Optimization_with_Digital_Twins_A_Predictive_and_Adaptive_Approach.pdf create mode 100644 citations2/iteration1/ieee/DyTwin_Federated_Adaptive_Digital_Twins_for_Data_Centers__Visualization_and_Anomaly_Detection.pdf create mode 100644 citations2/iteration1/ieee/Emerging_Trends_in_Data_Center_Management_Automation.pdf create mode 100644 citations2/iteration1/ieee/Implementing_Immersive_Analytics_for_Digital_Twins_in_Data_Centers.pdf create mode 100644 citations2/iteration1/ieee/Smart_DC_An_AI_and_Digital_Twin-based_Energy-Saving_Solution_for_Data_Centers.pdf create mode 100644 citations2/iteration1/ieee/Visualizing_an_Exascale_Data_Center_Digital_Twin_Considerations_Challenges_and_Opportunities.pdf create mode 100644 citations2/iteration1/osti/2376329.pdf create mode 100644 citations2/iteration1/osti/2480045.pdf create mode 100644 citations2/iteration1/springer/978-3-030-44907-0.pdf create mode 100644 citations2/iteration1/springer/s00466-022-02152-3.pdf create mode 100644 "citations2/iteration2/wiley/Concurrency and Computation - 2018 - S\303\256rbu - A data\342\200\220driven approach to modeling power consumption for a hybrid.pdf" diff --git a/appendix/glossary.tex b/appendix/glossary.tex index 9e78c83..d760000 100644 --- a/appendix/glossary.tex +++ b/appendix/glossary.tex @@ -10,3 +10,5 @@ \newacronym{genai}{GenAI}{Generative Artificial Intelligence} \newacronym{llm}{LLMS}{Large Language Models} \newacronym{iot}{IoT}{Internet-of-Things} +\newacronym{ed}{EDT}{ExaDigiT} +\newacronym{rl}{RL}{Reinforcement Learning} diff --git a/citations2/iteration1/1-s2.0-S0306261924012236-main.pdf b/citations2/iteration1/1-s2.0-S0306261924012236-main.pdf new file mode 100644 index 0000000..3ac99c1 Binary files /dev/null and b/citations2/iteration1/1-s2.0-S0306261924012236-main.pdf differ diff --git a/citations2/iteration1/2024-08-15_AlexandruIosup_ICT_DigitalTwin_ModSim24_SHARED.pdf b/citations2/iteration1/2024-08-15_AlexandruIosup_ICT_DigitalTwin_ModSim24_SHARED.pdf new file mode 100644 index 0000000..e4f6f32 Binary files /dev/null and b/citations2/iteration1/2024-08-15_AlexandruIosup_ICT_DigitalTwin_ModSim24_SHARED.pdf differ diff --git a/citations2/iteration1/aaai/00026-AAAI24.SarkarS-DM.pdf b/citations2/iteration1/aaai/00026-AAAI24.SarkarS-DM.pdf new file mode 100644 index 0000000..4ea00dd Binary files /dev/null and b/citations2/iteration1/aaai/00026-AAAI24.SarkarS-DM.pdf differ diff --git a/citations2/iteration1/acm/3408308.3427982.pdf b/citations2/iteration1/acm/3408308.3427982.pdf new file mode 100644 index 0000000..5bf156c Binary files /dev/null and b/citations2/iteration1/acm/3408308.3427982.pdf differ diff --git a/citations2/iteration1/acm/3472727.3472802.pdf b/citations2/iteration1/acm/3472727.3472802.pdf new file mode 100644 index 0000000..f9b1e1d Binary files /dev/null and b/citations2/iteration1/acm/3472727.3472802.pdf differ diff --git a/citations2/iteration1/acm/3563357.3564050.pdf b/citations2/iteration1/acm/3563357.3564050.pdf new file mode 100644 index 0000000..bdbcd8c Binary files /dev/null and b/citations2/iteration1/acm/3563357.3564050.pdf differ diff --git a/citations2/iteration1/acm/3600100.3623719.pdf b/citations2/iteration1/acm/3600100.3623719.pdf new file mode 100644 index 0000000..a442e3d Binary files /dev/null and b/citations2/iteration1/acm/3600100.3623719.pdf differ diff --git a/citations2/iteration1/acm/3604283.pdf b/citations2/iteration1/acm/3604283.pdf new file mode 100644 index 0000000..3ee515f Binary files /dev/null and b/citations2/iteration1/acm/3604283.pdf differ diff --git a/citations2/iteration1/acm/3772078.pdf b/citations2/iteration1/acm/3772078.pdf new file mode 100644 index 0000000..64ca5b6 Binary files /dev/null and b/citations2/iteration1/acm/3772078.pdf differ diff --git a/citations2/iteration1/ieee/Adaptive_Capacity_Provisioning_for_Carbon-Aware_Data_Centers_A_Digital_Twin-Based_Approach.pdf b/citations2/iteration1/ieee/Adaptive_Capacity_Provisioning_for_Carbon-Aware_Data_Centers_A_Digital_Twin-Based_Approach.pdf new file mode 100644 index 0000000..713811e Binary files /dev/null and b/citations2/iteration1/ieee/Adaptive_Capacity_Provisioning_for_Carbon-Aware_Data_Centers_A_Digital_Twin-Based_Approach.pdf differ diff --git a/citations2/iteration1/ieee/Data_Center_Optimization_with_Digital_Twins_A_Predictive_and_Adaptive_Approach.pdf b/citations2/iteration1/ieee/Data_Center_Optimization_with_Digital_Twins_A_Predictive_and_Adaptive_Approach.pdf new file mode 100644 index 0000000..f013e0a Binary files /dev/null and b/citations2/iteration1/ieee/Data_Center_Optimization_with_Digital_Twins_A_Predictive_and_Adaptive_Approach.pdf differ diff --git a/citations2/iteration1/ieee/DyTwin_Federated_Adaptive_Digital_Twins_for_Data_Centers__Visualization_and_Anomaly_Detection.pdf b/citations2/iteration1/ieee/DyTwin_Federated_Adaptive_Digital_Twins_for_Data_Centers__Visualization_and_Anomaly_Detection.pdf new file mode 100644 index 0000000..191a213 Binary files /dev/null and b/citations2/iteration1/ieee/DyTwin_Federated_Adaptive_Digital_Twins_for_Data_Centers__Visualization_and_Anomaly_Detection.pdf differ diff --git a/citations2/iteration1/ieee/Emerging_Trends_in_Data_Center_Management_Automation.pdf b/citations2/iteration1/ieee/Emerging_Trends_in_Data_Center_Management_Automation.pdf new file mode 100644 index 0000000..4f78634 Binary files /dev/null and b/citations2/iteration1/ieee/Emerging_Trends_in_Data_Center_Management_Automation.pdf differ diff --git a/citations2/iteration1/ieee/Implementing_Immersive_Analytics_for_Digital_Twins_in_Data_Centers.pdf b/citations2/iteration1/ieee/Implementing_Immersive_Analytics_for_Digital_Twins_in_Data_Centers.pdf new file mode 100644 index 0000000..02ecad8 Binary files /dev/null and b/citations2/iteration1/ieee/Implementing_Immersive_Analytics_for_Digital_Twins_in_Data_Centers.pdf differ diff --git a/citations2/iteration1/ieee/Smart_DC_An_AI_and_Digital_Twin-based_Energy-Saving_Solution_for_Data_Centers.pdf b/citations2/iteration1/ieee/Smart_DC_An_AI_and_Digital_Twin-based_Energy-Saving_Solution_for_Data_Centers.pdf new file mode 100644 index 0000000..5b01fc8 Binary files /dev/null and b/citations2/iteration1/ieee/Smart_DC_An_AI_and_Digital_Twin-based_Energy-Saving_Solution_for_Data_Centers.pdf differ diff --git a/citations2/iteration1/ieee/Visualizing_an_Exascale_Data_Center_Digital_Twin_Considerations_Challenges_and_Opportunities.pdf b/citations2/iteration1/ieee/Visualizing_an_Exascale_Data_Center_Digital_Twin_Considerations_Challenges_and_Opportunities.pdf new file mode 100644 index 0000000..af4a8f1 Binary files /dev/null and b/citations2/iteration1/ieee/Visualizing_an_Exascale_Data_Center_Digital_Twin_Considerations_Challenges_and_Opportunities.pdf differ diff --git a/citations2/iteration1/osti/2376329.pdf b/citations2/iteration1/osti/2376329.pdf new file mode 100644 index 0000000..c451c59 Binary files /dev/null and b/citations2/iteration1/osti/2376329.pdf differ diff --git a/citations2/iteration1/osti/2480045.pdf b/citations2/iteration1/osti/2480045.pdf new file mode 100644 index 0000000..3f29d70 Binary files /dev/null and b/citations2/iteration1/osti/2480045.pdf differ diff --git a/citations2/iteration1/springer/978-3-030-44907-0.pdf b/citations2/iteration1/springer/978-3-030-44907-0.pdf new file mode 100644 index 0000000..ab5f0e6 Binary files /dev/null and b/citations2/iteration1/springer/978-3-030-44907-0.pdf differ diff --git a/citations2/iteration1/springer/s00466-022-02152-3.pdf b/citations2/iteration1/springer/s00466-022-02152-3.pdf new file mode 100644 index 0000000..7b2a0ab Binary files /dev/null and b/citations2/iteration1/springer/s00466-022-02152-3.pdf differ diff --git "a/citations2/iteration2/wiley/Concurrency and Computation - 2018 - S\303\256rbu - A data\342\200\220driven approach to modeling power consumption for a hybrid.pdf" "b/citations2/iteration2/wiley/Concurrency and Computation - 2018 - S\303\256rbu - A data\342\200\220driven approach to modeling power consumption for a hybrid.pdf" new file mode 100644 index 0000000..c407da7 Binary files /dev/null and "b/citations2/iteration2/wiley/Concurrency and Computation - 2018 - S\303\256rbu - A data\342\200\220driven approach to modeling power consumption for a hybrid.pdf" differ diff --git a/content/background.tex b/content/background.tex index 85f3c57..c88367f 100644 --- a/content/background.tex +++ b/content/background.tex @@ -2,10 +2,38 @@ \section{Datacenters}\label{ss:datacenters} Explain the high risk phenomena that occur in datacenters, which includes failures. -\subsection{Failures} +\subsection{Failures}\label{sss:failures} + \section{Digital Twinning}\label{ss:digital-twinning} + +\gls{ed} is an open-source framework for developing digital twins of supercomputers. +It consists of 3 modules: +\begin{enumerate*}[label=(\arabic*)] + \item resource allocator and power simulator + \item thermal cooling model + \item augmented reality 3D model +\end{enumerate*} +of the supercomputer. +\gls{ed} has been used at the Frontier supercomputer at the Oak Ridge National Laboratory in the USA, successfully predicting potential energy losses at the supercomputer. +Brewer \etal include alongside the framework architecture an open-source artifact and a set of extensive verification and validation experiments. +The authors differentiate between different digital twins within \gls{ed}, such as \begin{enumerate*}[label=(\arabic*)] + \item descriptive twin + \item informative twin + \item predictive twin + \item comprehensive twin + \item autonomous twin +\end{enumerate*} +that together form the \gls{ed}. +The \emph{predictive twin} leverages data driven operational analytics to create \gls{ml} models. Authors argue that alongside simulation, \gls{ml} models should also have a significant role for modeling system workloads in \eg application fingerprinting. +Within the \emph{autonomous twin} the authors use \gls{rl} to train agents that can be used to make control decisions in order to optimize different processes. +In order to model the cooling system the authors use the Modelica software, and to predict energy power draw they coded a Python script. +The authors provide a intuitive way to interact with the system using a visual dashboard, and an advanced augmented reality model. +The authors posit that the best way to address the 3V's of data (velocity, volume and variety) is to use augmented reality coupled with dashboards. + + + Predictive modelling uses statistics to predict outcomes. When deployed commercially, for example in datacenters, predictive modelling is often referred to as predictive analytics~\cite{Wikipedia:PredictiveModelling}. Almost any statistical model can be used for prediction purposes, but nowadays predictive analysis is synonymous with machine learning. @@ -23,6 +51,62 @@ The process of inference from data to provide the best explanation is called abd %Include something about data-preprocessing in the pipeline. %See the article by Fei Tao +\subsection{Datacenter simulation}\label{sss:simulation} + +\begin{table}[h] + \centering + \renewcommand{\arraystretch}{1.4} + \begin{tabular}{m{0.7\linewidth}cc} + \toprule + Feature & \gls{ed} & \\ + \midrule + Virtual Prototyping & & \\ + Scenario Exploration & & \\ + 3D Facility Modelling & & \\ + Predictive maintenance & & \\ + Predictive energy modelling & & \\ + Reliability and availability modeling & & \\ + Cooling modelling & & \\ + Network modelling & & \\ + Predictive modelling & & \\ + Power consumption modelling & & \\ + Visual analytics dashboard & & \\ + Forensic analysis and diagnostics & & \\ + Failure detection & & \\ + Operational optimization & & \\ + Resource allocation & & \\ + \midrule + \end{tabular} + \caption{Comparison of selected features of existing datacenter digital twins.} +\end{table} + + +\begin{table}[h] + \centering + \renewcommand{\arraystretch}{1.4} + \begin{tabular}{cccm{0.3\linewidth}c} + \toprule + Project & Environment & Stakeholders & Highlighted Features & GUI \\ + \midrule + + CloudSim & Cloud, Fog, Edge & Research & VC\textsuperscript{$\star$}, N, S, E, WF, FD, EXP, CM, PI & \ding{51}\textsuperscript{$\dagger$} \\ + \midrule + SimGrid & Grid, P3P, Cloud & Research, Edu. & VC\textsuperscript{$\star$}, N\textsuperscript{$\star$}, S, E\textsuperscript{$\star$}, WF\textsuperscript{$\star$} & \ding{51}\textsuperscript{$\dagger$} \\ + \midrule + DGSim & Grid & Research & WF, F, EXP & \ding{55} \\ + \midrule + GroudSim & Grid, Cloud & Research & WF, CM, F & \ding{55} \\ + \midrule + iCanCloud & Cloud & Research & VC, N\textsuperscript{$\star$}, S, CM & \ding{51}\textsuperscript{$\star$} \\ + \midrule + \textbf{OpenDC} & Cloud & Research, Edu. & VC\textsuperscript{$\star$}, N, S, E\textsuperscript{$\star$},, CM, FS\textsuperscript{$\star$}, ML, WF, F\textsuperscript{$\star$}, PI, EXP\textsuperscript{$\star$} & \ding{51}\textsuperscript{$\star$} \\ + \bottomrule + \end{tabular} + \caption{Comparison of selected datacenter simulators. \textbf{Models:} VC = VMs and containers; N = Network, S = Storage, E = Energy, CM = Cost Models, FS = FaaS, ML = Machine Learning, WF = Workflows, FD = Federation; \textbf{Phenomena:} F = Failures, PI = Performance interface; \textbf{Tools:} EXP = Experiment automation; \textbf{Support:} \ding{51} = Yes, \ding{55} = No; $\dagger$ = extension, not integrated; $\star$ = advanced, carefully calibrated feature. Adapted form Mastenbroek \etal} +\end{table} + + + One of the key arguments that speak for a datacenter digital twin is that datacenters already connect hundreds of monitoring sensors and data coming from them. Monitoring of server racks, VM's, CPU profiling and all that give us lots of data. @@ -32,7 +116,6 @@ ODA can predict failures, help maintain the equipment, save bills, cut costs. But currently one of the key challenges is to somehow connect the physical and virtual spaces. The answer to how to do this is a digital twin. - %[citation needed] As of 2026, there is a lack of consensus of what is a digital twin. diff --git a/content/intro.tex b/content/intro.tex index 0aa5b84..6b92521 100644 --- a/content/intro.tex +++ b/content/intro.tex @@ -65,6 +65,8 @@ Many \gls{dcdt} frameworks still lack critical data analysis components, fault d Such limitations gravely reduce the applicability of \gls{dcdt}'s in real world scenarios~\cite{DBLP:journals/corr/IosupKLVG22}. \gls{dcdt}'s are urgently needed, because datacenters exhibit hundreds unexpected events every day,such as \eg service failures or hardware faults. Downtime, which is the result of failures, disturbs the users and produces unfulfilled \gls{sla}~\cite{DBLP:conf/acsos/TalluriOVTI21}. +% On the operational side, two main areas have been instrumental for improving datacenter efficiency: simulations and analysis of system telemetry. Additional improvements necessitate innovative tools that focus on end-to-end improvement, such as digital twins~\cite{DBLP:ExaDigiT}. +% DT's merge both simulation and telemetry to develop a holistic virtual representation of the system, bridging both the physical and virtual worlds. However, predicting datacenter behaviour quickly and reliably is a non-trivial problem that remains insufficiently unaddressed in the existing \gls{dcdt} architectures ~\cite{DBLP:conf/wosp/SumanCNTMI24, DBLP:journals/computer/AthavaleBBMMPS24} and deployments~\cite{DBLP:conf/sc/BrewerMKWBHSGGW24}. @@ -116,6 +118,13 @@ To answer the third research question, we will need to design comprehensive expe \section{Thesis Contributions}\label{s:thesis-contributions} + +\begin{enumerate}[label=\textbf{C\arabic*.}, align=left] + \item An open-source \gls{dcdt} prototype for predictive facility maintenance, with data analysis supported by in-band and out-of-band telemetry and discrete-event simulation. + \item Extensive evaluation and validation experiments of the system. + \item Demonstration of the \gls{dcdt} in pair with a simulated datacenter. +\end{enumerate} + \section{Plagiarism Declaration}\label{s:plagiarism-declaraion} I hereby declare that this thesis is my own independent work and writing. The thesis does not contain any material copied from other sources (person, Internet, or AI), and has not been submitted for assessment elsewhere. diff --git a/style/style.tex b/style/style.tex index 58aebc7..e28a5cb 100644 --- a/style/style.tex +++ b/style/style.tex @@ -1,6 +1,6 @@ \usepackage[T1]{fontenc} \usepackage[inline]{enumitem} -\usepackage{xspace, xcolor, lipsum, booktabs, caption, fancyhdr, footmisc, natbib, nomencl, rotating, setspace, subfigure,tocbibind, vmargin, watermark, graphicx, pifont, float} +\usepackage{xspace, xcolor, lipsum, booktabs, caption, fancyhdr, footmisc, natbib, nomencl, rotating, setspace, subfigure,tocbibind, vmargin, watermark, graphicx, pifont, float, array} \hypersetup { hidelinks=false, -- cgit v1.2.3