summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormjkwiatkowski <mati.rewa@gmail.com>2026-07-02 19:02:27 +0200
committermjkwiatkowski <mati.rewa@gmail.com>2026-07-02 19:02:27 +0200
commit604201a150c62285ad5421d1cdd2928668bf0bee (patch)
tree0a164a8156ef7f6cbc22eb8b081fd918b1ccbe10
parent90fe47fe94b455bbe02b3b017dd993af2ff24af1 (diff)
feat: last changes
-rw-r--r--indent.log2
-rw-r--r--main.tex302
-rw-r--r--style/style.tex2
3 files changed, 168 insertions, 138 deletions
diff --git a/indent.log b/indent.log
index 2b112d6..92ad483 100644
--- a/indent.log
+++ b/indent.log
@@ -1,6 +1,6 @@
INFO: latexindent version 3.24.7, 2025-08-15, a script to indent .tex files
latexindent lives here: /usr/share/texmf-dist/scripts/latexindent/
- Sun Jun 28 19:02:38 2026
+ Sun Jun 28 19:57:30 2026
Reading input from STDIN
INFO: Processing switches:
INFO: Directory for backup files and log file indent.log:
diff --git a/main.tex b/main.tex
index 444bd07..8908ab0 100644
--- a/main.tex
+++ b/main.tex
@@ -1,119 +1,140 @@
-\documentclass[12pt, handout]{beamer}
+\documentclass[12pt]{beamer}
\input{style/style.tex}
\begin{document}
\frame{\titlepage \centering \footnotesize Online slideshow: \url{mjkw.pl/vu/bsc}}
-\begin{frame}\frametitle{Motivation}
- \begin{tcolorbox}[title=Context]
- 21\textsuperscript{st} century datacenters (DC) are mostly heterogeneous~\cite{DBLP:conf/date/MilojicicFDR21} and modern computational needs of AI drive managers to diversify them even more~\cite{DBLP:journals/computer/AthavaleBBMMPS24}.
- In result datacenters become extremely complex and hard to operate with millions of CPU's, GPU's etc.
- \end{tcolorbox}
- \begin{center}
- \includegraphics[width=\linewidth]{images/datacenter_complexity.png}
- \end{center}
- \tiny
- \textbf{Figure 1.1:} Society depends on datacenters to keep running, and therefore we cannot afford to let these systems break down or experience significant performance-related issues.
- With millions of servers in the largest datacenters, real-time management becomes very difficult.
- Left to right: a Google datacenter, server racks, Ada Lovelace AD102 GPU architecture.
+\begin{frame}[t]\frametitle{Motivation}
+ \only<1-2>{
+ \vspace{-0.2cm}
+ \begin{tcolorbox}[title=Context]
+ 21\textsuperscript{st} century datacenters (DC) are mostly heterogeneous~\cite{DBLP:conf/date/MilojicicFDR21} and modern computational needs of AI drive managers to diversify them even more~\cite{DBLP:journals/computer/AthavaleBBMMPS24}.
+ In result datacenters become extremely complex and hard to operate with millions of CPU's, GPU's etc.
+ \end{tcolorbox}
+ }
+ \only<2>{\begin{center}
+ \includegraphics[width=\linewidth]{images/datacenter_complexity.png}
+ \end{center}
+ \tiny
+ \textbf{Figure 1.1:} Society depends on datacenters to keep running, and therefore we cannot afford to let these systems break down or experience significant performance-related issues.
+ With millions of servers in the largest datacenters, real-time management becomes very difficult.
+ Left to right: a Google datacenter, server racks, Ada Lovelace AD102 GPU architecture.
+ }
\end{frame}
-\begin{frame}\frametitle{Problem Statement}
- \begin{tcolorbox}[title=DCDT's lack predictive analytics]
- We need Datacenter Digital Twins (DCDT) to be better able to detect and solve issues in critical ICT infrastructure~\cite{DBLP:journals/computer/AthavaleBBMMPS24}.
- However, DCDT's are still actively developed and lack crucial features such as predictive analytics~\cite{DBLP:usdoe/report/AP26894} to \emph{e.g.,} prevent unexpected failures.
- \end{tcolorbox}
-
- \begin{center}
- \includegraphics[width=0.9\linewidth]{images/predictive_analytics.pdf}
- \end{center}
- \tiny
- \textbf{Figure 1.2:} Datacenter Digital Twin Diagram. There are 5 core elements to any Digital Twin: \myCircled{A} The Digital $\rightarrow$ Physical Twin link, \myCircled{B} the Physical Twin (\emph{e.g.,} the datacenter), \myCircled{C} the Physical $\rightarrow$ Digital Twin link, \myCircled{D} the Digital Twin, \myCircled{E} the features necessary to any Digital Twin.
- \textcolor{Green}{\faHighlighter~Highlighted areas are the contributions from this thesis, which include the autonomous actions resulting from predictive insights \myCircledGreen{A} and the predictive analysis framework (including simple storage capabilities) within \myCircledGreen{E}.}
+\begin{frame}[t]\frametitle{Problem Statement}
+ \only<1-2>{
+ \vspace{-0.2cm}
+ \begin{tcolorbox}[title=DCDT's lack predictive analytics]
+ We need Datacenter Digital Twins (DCDT) to be better able to detect and solve issues in critical ICT infrastructure~\cite{DBLP:journals/computer/AthavaleBBMMPS24}.
+ However, DCDT's are still actively developed and lack crucial features such as predictive analytics~\cite{DBLP:usdoe/report/AP26894} to \emph{e.g.,} prevent unexpected failures.
+ \end{tcolorbox}
+ }
+ \only<2>{
+ \begin{center}
+ \includegraphics[width=0.9\linewidth]{images/predictive_analytics.pdf}
+ \end{center}
+ \tiny
+ \textbf{Figure 1.2:} Datacenter Digital Twin Diagram. There are 5 core elements to any Digital Twin: \myCircled{A} The Digital $\rightarrow$ Physical Twin link, \myCircled{B} the Physical Twin (\emph{e.g.,} the datacenter), \myCircled{C} the Physical $\rightarrow$ Digital Twin link, \myCircled{D} the Digital Twin, \myCircled{E} the features necessary to any Digital Twin.
+ \textcolor{Green}{\faHighlighter~Highlighted areas are the contributions from this thesis, which include the autonomous actions resulting from predictive insights \myCircledGreen{A} and the predictive analysis framework (including simple storage capabilities) within \myCircledGreen{E}.}
+ }
\end{frame}
-\begin{frame}\frametitle{Research Questions}
- \begin{tcolorbox}[title=Main Research Question, colbacktitle=red!70!black,colback=red!20!white]
- How to enable predictive analytics for datacenters through digital twinning?
- \end{tcolorbox}
+\begin{frame}[t]\frametitle{Research Questions}
+ \only<1-4>{
+ \vspace{-0.2cm}
+ \begin{tcolorbox}[title=Main Research Question, colbacktitle=red!70!black,colback=red!20!white]
+ How to enable predictive analytics for datacenters through digital twinning?
+ \end{tcolorbox}}
- \begin{tcolorbox}[title=Research Question 1]
- How to asses the current state-of-the-art of digital twinning for datacenters?
- \end{tcolorbox}
+ \only<2-4>{\begin{tcolorbox}[title=Research Question 1]
+ How to asses the current state-of-the-art of digital twinning for datacenters?
+ \end{tcolorbox}}
- \begin{tcolorbox}[title=Research Question 2]
- How to design a reference architecture for a predictive datacenter digital twin using discrete-event simulation?
- \end{tcolorbox}
+ \only<3-4>{\begin{tcolorbox}[title=Research Question 2]
+ How to design a reference architecture for a predictive datacenter digital twin using discrete-event simulation?
+ \end{tcolorbox}
+ }
- \begin{tcolorbox}[title=Research Question 3]
- % no "and validate?"
- How to validate and evaluate a datacenter digital twin architecture in relation to system requirements?
- \end{tcolorbox}
+ \only<4>{\begin{tcolorbox}[title=Research Question 3]
+ % no "and validate?"
+ How to validate and evaluate a datacenter digital twin architecture in relation to system requirements?
+ \end{tcolorbox}
+ }
\end{frame}
-\begin{frame}\frametitle{\textbf{RQ1}: Literature Review I}
- \begin{tcolorbox}[title=Main Finding I]
- There is little literature on DCDTs.
- Some systems barely classify as DTs (\emph{e.g.,} Kalibre~\cite{DBLP:conf/sensys/WangZD0TCWZ20}, ChatTwin~\cite{DBLP:conf/sensys/LiW0Z0T23}).
- Existing deployments specialize in \textcolor{Red}{Cooling and Heat Modelling}, together with \textcolor{Red}{3D visualizations}.
- Most lack predictive modelling of DC operations.
- \end{tcolorbox}
- \vspace{-0.1cm}
- \input{sources/table.tex}
+\begin{frame}[t]\frametitle{\textbf{RQ1}: Literature Review I}
+ \only<1-2>{
+ \vspace{-0.2cm}
+ \begin{tcolorbox}[title=Main Finding I]
+ There is little literature on DCDTs.
+ Some systems barely classify as DTs (\emph{e.g.,} Kalibre~\cite{DBLP:conf/sensys/WangZD0TCWZ20}, ChatTwin~\cite{DBLP:conf/sensys/LiW0Z0T23}).
+ Existing deployments specialize in \textcolor{Red}{Cooling and Heat Modelling}, together with \textcolor{Red}{3D visualizations}.
+ Most lack predictive modelling of DC operations.
+ \end{tcolorbox}}
+ \only<2>{
+ \vspace{-0.2cm}
+ \input{sources/table.tex}
+
+ }
% Research on DTs for datacenters have been separate, siloed efforts focused on either datacenter cooling, network performance, power consumption or visualization efforts.
% CFD usually means Navier-Stokes equations.
% CFD models take ages to compute.
\end{frame}
-\begin{frame}\frametitle{\textbf{RQ1}: Literature Review II}
+\begin{frame}[t]\frametitle{\textbf{RQ1}: Literature Review II}
% Mandatory: split the figure into 2: top and bottom, and that way you can fill in the entire slide nicely.
- \begin{tcolorbox}[title=A holistic DCDT system model]
- We propose a holistic model of datacenter digital twinning that can be mapped to each system from \textbf{Table 1.1}. Within this model (see \textbf{Fig. 1.3}) we introduce a concept of the \emph{Digital Thread}: a bridge between the DCDT and the physical DC equipment.
- \end{tcolorbox}
- \begin{center}
- \vspace{-0.1cm}
- \includegraphics[width=0.8\textwidth]{images/system_model.pdf}
- \end{center}
- % The reason why the cooling system is in the graph is because of the fact that 40\% of total energy consumed in DCs comes from cooling~\cite{DBLP:conf/noms/ZhangZLZWC22}.
- % It has come to the point where datacenters are being build in the Pan-Arctic region, such as Finland,Russia,Sweden etc. with Iceland leading in number of DCs https://www.datacentermap.com/iceland/
- % The SmarDC digital twin is purely to get more training data for AI models.
- % Not really a digital twin per se.
-
- \tiny
- \textbf{Figure 1.3:} To answer \textbf{RQ1} we designed a generic datacenter digital twin system model based on a comprehensive literature review and findings from \textbf{Table 1.1}. The \emph{Infrastructure Model} simulates the structure of the DC and the \emph{Operations Model} simulates the behaviour of the DC.
- \emph{Note:} Federation is not included explicitly but is covered by the model.
- % Consider splitting the figure into 2 a.k.a. top and bottom.
- % By the AIAA definition, the DT mimicks the structure and behaviour.
- % Data Lake -> Data Storage
- % Use cases of DT's found by Brewer et al.: augmented reality, forensic analysis and diagnostics, predictive modelling, failure detection, operational optimization, ``what-if''' scenarios and virtual prototyping.
+ \only<1-2>{\vspace{-0.2cm}
+ \begin{tcolorbox}[title=A holistic DCDT system model]
+ We propose a holistic model of datacenter digital twinning that can be mapped to each system from \textbf{Table 1.1}. Within this model (see \textbf{Fig. 1.3}) we introduce a concept of the \emph{Digital Thread}: a bridge between the DCDT and the physical DC equipment.
+ \end{tcolorbox}
+ }
+ \only<2>{\begin{center}
+ \vspace{-0.1cm}
+ \includegraphics[width=0.8\textwidth]{images/system_model.pdf}
+ \end{center}
+ % The reason why the cooling system is in the graph is because of the fact that 40\% of total energy consumed in DCs comes from cooling~\cite{DBLP:conf/noms/ZhangZLZWC22}.
+ % It has come to the point where datacenters are being build in the Pan-Arctic region, such as Finland,Russia,Sweden etc. with Iceland leading in number of DCs https://www.datacentermap.com/iceland/
+ % The SmarDC digital twin is purely to get more training data for AI models.
+ % Not really a digital twin per se.
+ \tiny
+ \textbf{Figure 1.3:} To answer \textbf{RQ1} we designed a generic datacenter digital twin system model based on a comprehensive literature review and findings from \textbf{Table 1.1}. The \emph{Infrastructure Model} simulates the structure of the DC and the \emph{Operations Model} simulates the behaviour of the DC.
+ \emph{Note:} Federation is not included explicitly but is covered by the model.
+ % Consider splitting the figure into 2 a.k.a. top and bottom.
+ % By the AIAA definition, the DT mimicks the structure and behaviour.
+ % Data Lake -> Data Storage
+ % Use cases of DT's found by Brewer et al.: augmented reality, forensic analysis and diagnostics, predictive modelling, failure detection, operational optimization, ``what-if''' scenarios and virtual prototyping.
+ }
\end{frame}
\begin{frame}\frametitle{\textbf{RQ2}: Reference Architecture}
% Make Kafka logos clearly defined --> add a legend with icons?
- \hspace{-0.3cm}
- \begin{minipage}[b]{0.45\linewidth}
- \begin{center}
- % Change to Datacenter (Physical Twin)
- \includegraphics[width=1.15\textwidth]{images/ref_architecture.pdf}
- \end{center}
- \vspace{-0.15cm}
- \tiny
- \textbf{Figure 1.4:} The predictive datacenter digital twin reference architecture.
- We call the system \emph{Sunfish}.
- The architecture was designed with the \emph{AtLarge Design Process}~\cite{DBLP:conf/icdcs/IosupVTETBFMT19} over several iterations in the past months.
- \vspace{0.2cm}
- \end{minipage}
- \hspace{0.6cm}
- \begin{minipage}[b]{0.45\linewidth}
- \begin{center}
- \includegraphics[width=1.17\linewidth]{images/implementation.png}
- \end{center}
- \vspace{-0.2cm}
- \tiny
- \textbf{Figure 1.5:} The prototype and its components based on the architecture.
- The time-series data flows first to the \texttt{Grafana} dashboard, \texttt{PostgreSQL} database and \texttt{Redis} cache as advised in ~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24}.
- \vspace{0.1cm}
- \end{minipage}
+ \only<1-2>{\hspace{-0.3cm}
+ \begin{minipage}[b]{0.45\linewidth}
+ \begin{center}
+ % Change to Datacenter (Physical Twin)
+ \includegraphics[width=1.15\textwidth]{images/ref_architecture.pdf}
+ \end{center}
+ \vspace{-0.15cm}
+ \tiny
+ \textbf{Figure 1.4:} The predictive datacenter digital twin reference architecture.
+ We call the system \emph{Sunfish}.
+ The architecture was designed with the \emph{AtLarge Design Process}~\cite{DBLP:conf/icdcs/IosupVTETBFMT19} over several iterations in the past months.
+ \vspace{0.2cm}
+ \end{minipage}
+ \hspace{0.6cm}
+ }
+ \only<2>{\begin{minipage}[b]{0.45\linewidth}
+ \begin{center}
+ \includegraphics[width=1.17\linewidth]{images/implementation.png}
+ \end{center}
+ \vspace{-0.2cm}
+ \tiny
+ \textbf{Figure 1.5:} The prototype and its components based on the architecture.
+ The time-series data flows first to the \texttt{Grafana} dashboard, \texttt{PostgreSQL} database and \texttt{Redis} cache as advised in ~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24}.
+ \vspace{0.1cm}
+ \end{minipage}
+ }
% We decided to use discrete-event simulation, as opposed to computational fluid dynamics because of the high overheads of development time needed for CFD.
% CFD simply takes too long to run, making it unfeasible for real-time analytics and simulation.
@@ -162,27 +183,31 @@
% Failures that are more intensive are worse than failures with long duration.
\begin{tcolorbox}[title=Main Finding II]
We posit digital twinning can be used for failure detection to the benefit of DC operators.
- We replicate an experiment from DyTwin~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24} designed by Milojicic \etal to show our system can reliably detect \emph{unexpected} host failures.
+ We replicate an experiment from DyTwin~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24} designed by Milojicic \etal to show \emph{Sunfish} can reliably detect \emph{unexpected} host failures.
\end{tcolorbox}
- \hspace{-0.2cm}
- \begin{minipage}[b]{0.45\linewidth}
- \begin{center}
- \includegraphics[width=1.1\textwidth]{images/red_yellow_alarms.pdf}
- \end{center}
- \vspace{-0.3cm}
- \tiny
- \textbf{Figure 1.7a:} Experiment 1a. In this experiment we use red and yellow alarms to notify datacenter operators of unexpected failures.
- We use a threshold based on predictions done by the simulator and a statistical distribution.
- \end{minipage}
- \hspace{0.6cm}
- \begin{minipage}[b]{0.45\linewidth}
- \begin{center}
- \includegraphics[width=1.1\textwidth]{images/failure_detecton_rate.pdf}
- \end{center}
- \vspace{-0.3cm}
- \tiny
- \textbf{Figure 1.7b:} Experiment 1b. The mean failure detection rate is around 12\%. Even though this seems low, if we look at \textbf{Fig. E.1} (see Extra Slides), this simply means around 12\% of failures are unexpected.
- \end{minipage}
+ \only<1-2>{
+ \hspace{-0.2cm}
+ \begin{minipage}[b]{0.45\linewidth}
+ \vspace{0.1cm}
+ \begin{center}
+ \includegraphics[width=1.1\textwidth]{images/red_yellow_alarms.pdf}
+ \end{center}
+ \vspace{-0.3cm}
+ \tiny
+ \textbf{Figure 1.7a:} Experiment 1a. In this experiment we use red and yellow alarms to notify datacenter operators of unexpected failures.
+ We use a threshold based on predictions done by the simulator and a statistical distribution.
+ \end{minipage}
+ \hspace{0.6cm}
+ }
+ \only<2>{\begin{minipage}[b]{0.45\linewidth}
+ \begin{center}
+ \includegraphics[width=1.1\textwidth]{images/failure_detecton_rate.pdf}
+ \end{center}
+ \vspace{-0.3cm}
+ \tiny
+ \textbf{Figure 1.7b:} Experiment 1b. The mean failure detection rate is around 12\%. Even though this seems low, if we look at \textbf{Fig. E.1} (see Extra Slides), this simply means around 12\% of failures are unexpected.
+ \end{minipage}
+ }
% Explain what the axis are in the figure caption.
% Talk about the experimental setup in the figure.
% Give more reliable results than just numbers -- do statistical testing, i.e., standard deviation, confidence intervals.
@@ -199,33 +224,38 @@
\begin{tcolorbox}[title=Main Finding III]
Predicting failures in advance is really difficult. \emph{Sunfish} is capable of dynamic adjustments to the physical twin at runtime, and can slightly lower the number of failed tasks.
\end{tcolorbox}
- \hspace{-0.2cm}
- \begin{minipage}[b]{0.45\linewidth}
- \begin{center}
- \includegraphics[width=1.1\textwidth]{images/failure_likelihood.pdf}
- \end{center}
- \vspace{-0.3cm}
- \tiny
- \textbf{Figure 1.8a:} Experiment 2a. The figure shows which failure distribution is the most likely to be the true failure distribution while the simulation is running.
- This figure shows the difficulty of predictive analytics.
- \end{minipage}
- \hspace{0.5cm}
- \begin{minipage}[b]{0.45\linewidth}
- \vspace{-0.1cm}
- \begin{center}
- \includegraphics[width=1.1\textwidth]{images/conceptual_experiment.pdf}
- \end{center}
- \vspace{-0.3cm}
- \tiny
- \textbf{Figure 1.8b:} Experiment 2b. With perfect precognition (\emph{i.e.,} knowing on which day, what failures might happen) we could lower the mean number of failures.
- This experiment is a proof of concept (results are indication-only).
- \end{minipage}
+ \only<1-2>{
+ \hspace{-0.2cm}
+ \begin{minipage}[b]{0.45\linewidth}
+ \vspace{0.2cm}
+ \begin{center}
+ \includegraphics[width=1.1\textwidth]{images/failure_likelihood.pdf}
+ \end{center}
+ \vspace{-0.3cm}
+ \tiny
+ \textbf{Figure 1.8a:} Experiment 2a. The figure shows which failure distribution is the most likely to be the true failure distribution while the simulation is running.
+ This figure shows the difficulty of predictive analytics.
+ \end{minipage}
+ \hspace{0.5cm}
+ }
+ \only<2>{\begin{minipage}[b]{0.45\linewidth}
+ \vspace{-0.1cm}
+ \begin{center}
+ \includegraphics[width=1.1\textwidth]{images/conceptual_experiment.pdf}
+ \end{center}
+ \vspace{-0.3cm}
+ \tiny
+ \textbf{Figure 1.8b:} Experiment 2b. With perfect precognition (\emph{i.e.,} knowing on which day, what failures might happen) we could lower the mean number of failures.
+ This experiment is a proof of concept (results are indication-only).
+ \end{minipage}
+ }
\end{frame}
\begin{frame}\frametitle{Key Takeaways}
+ \vspace{-0.2cm}
\begin{tcolorbox}[title=Societal Context]
Datacenter manageability is a top-priority for the digital society.
- Over 3 million jobs in the Netherlands directly depend on cloud services, which are hosted in datacenters~\cite{DBLP:journals/corr/IosupKLVG22}.
+ Over 3 million jobs in the Netherlands directly depend on cloud services, which 1are hosted in datacenters~\cite{DBLP:journals/corr/IosupKLVG22}.
\end{tcolorbox}
\begin{tcolorbox}[title=Problem Statement]
@@ -244,7 +274,7 @@
% Not enough space for another tcolorbox.
\end{frame}
-\setcounter{framenumber}{4}
+\setcounter{framenumber}{2}
\setbeamertemplate{footline}[page number]{}
% Unfortunately this must remain here.
@@ -272,7 +302,7 @@
\end{tcolorbox}
- \begin{tcolorbox}[title=How did we adjust OpenDC (Physical Twin)?]
+ \begin{tcolorbox}[title=How did we adjust OpenDC (Physical Twin)?]1
We use a SURF~\cite{DBLP:journals/fgcs/VersluisCGLPCUI23} datacenter topology with 277 hosts.
We wrote a custom Kotlin \texttt{ComputeMonitor} to export live-metrics into Kafka, and a custom Kotlin \texttt{HTTPClient} to talk to the digital twin.
We add a new scheduling mechanism, the \texttt{SmartScheduler}.
diff --git a/style/style.tex b/style/style.tex
index 9f2647c..1020b8f 100644
--- a/style/style.tex
+++ b/style/style.tex
@@ -1,6 +1,6 @@
\usetheme{Rochester}
\usepackage[dvipsnames]{xcolor}
-\usepackage{helvet, textpos, stix, caption, booktabs, array, lipsum, fontawesome5, circledsteps, url, inconsolata, amsmath, amssymb, xspace}
+\usepackage{helvet, textpos, stix, caption, booktabs, array, lipsum, fontawesome5, circledsteps, url, inconsolata, amsmath, amssymb, xspace, animate, multimedia}
\newcommand{\etal}{\emph{et~al.}\xspace}