summaryrefslogtreecommitdiff
path: root/main.tex
diff options
context:
space:
mode:
Diffstat (limited to 'main.tex')
-rw-r--r--main.tex129
1 files changed, 107 insertions, 22 deletions
diff --git a/main.tex b/main.tex
index 93780e7..ab51130 100644
--- a/main.tex
+++ b/main.tex
@@ -2,11 +2,11 @@
\input{style/style.tex}
\begin{document}
-\frame{\titlepage \centering \footnotesize Online slides: \url{https://www.overleaf.com/read/tknhxmqfgtdy\#87413e}}
+\frame{\titlepage \centering \footnotesize Online slideshow: \url{mjkw.pl/vu/bsc}}
\begin{frame}\frametitle{Motivation}
\begin{tcolorbox}[title=Context]
- 21\textsuperscript{st} century datacenters are primarily heterogeneous~\cite{DBLP:conf/date/MilojicicFDR21} and
+ 21\textsuperscript{st} century datacenters (DC) are mostly heterogeneous~\cite{DBLP:conf/date/MilojicicFDR21} and
modern computational needs of AI drive managers to diversify datacenters even more~\cite{DBLP:journals/computer/AthavaleBBMMPS24}.
In result datacenters become extremely complex and hard to operate with millions of CPU's, GPU's etc.
\end{tcolorbox}
@@ -19,12 +19,11 @@
Left to right: a Google datacenter, server racks, Ada Lovelace AD102 GPU architecture.
\end{frame}
-
\begin{frame}\frametitle{Problem Statement}
\begin{tcolorbox}[title=DCDT's lack predictive analytics]
We need Datacenter Digital Twins (DCDT) to be better able to detect and solve issues in critical ICT infrastructure~\cite{DBLP:journals/computer/AthavaleBBMMPS24}.
However, DCDT's are still actively developed and lack crucial features such as predictive analytics~\cite{DBLP:usdoe/report/AP26894} to \emph{e.g.,} prevent unexpected failures.
- With predictive analysis (\emph{e.g.,} regression) DCDT's could save millions of lost \$USD~\cite{DBLP:conf/acsos/TalluriOVTI21}.
+ With predictive analysis (\emph{e.g.,} simulation) DCDT's could save millions of lost \$USD~\cite{DBLP:conf/acsos/TalluriOVTI21}.
\end{tcolorbox}
\begin{center}
@@ -33,7 +32,7 @@
\tiny
\textbf{Figure 1.2:} Where does our work fit within the field of datacenter digital twinning?
There are 5 core elements to any Digital Twin: \myCircled{A} The Digital $\rightarrow$ Physical Twin link, \myCircled{B} the Physical Twin (\emph{e.g.,} the datacenter), \myCircled{C} the Physical $\rightarrow$ Digital Twin link, \myCircled{D} the Digital Twin, \myCircled{E} the features necessary to any Digital Twin.
- \textcolor{ForestGreen}{\faHighlighter~Highlighted areas are the contributions from this thesis, which include the autonomous actions resulting from predictive insights \myCircledGreen{A} and the predictive analysis itself within \myCircledGreen{E}.}
+ \textcolor{Green}{\faHighlighter~Highlighted areas are the contributions from this thesis, which include the autonomous actions resulting from predictive insights \myCircledGreen{A} and the predictive analysis itself within \myCircledGreen{E}.}
\end{frame}
\begin{frame}\frametitle{Research Questions}
@@ -46,7 +45,7 @@
\end{tcolorbox}
\begin{tcolorbox}[title=Research Question 2]
- How to design a datacenter digital twin reference architecture using discrete-event simulation and predictive data analytics?
+ How to design a reference architecture for a predictive datacenter digital twin using discrete-event simulation?
\end{tcolorbox}
\begin{tcolorbox}[title=Research Question 3]
@@ -57,45 +56,131 @@
\begin{frame}\frametitle{\textbf{RQ1}: Literature Review I}
\begin{tcolorbox}[title=Results]
- This is a dummy sentence meant to make the tcolorbox have more than 2 lines of text width so that I am able to show the text and the table spacing better.
- I hope it fits its purpose well.
+ The literature on DCDTs is scarce.
+ Some systems barely classify as DTs (\emph{e.g.,} Kalibre~\cite{DBLP:conf/sensys/WangZD0TCWZ20}, ChatTwin~\cite{DBLP:conf/sensys/LiW0Z0T23}).
+ Existing deployments specialize in \textcolor{Red}{Cooling and Heat Modelling}, together with \textcolor{Red}{3D visualizations}.
+ Most lack crucial predictive DC behaviour modelling.
\end{tcolorbox}
\input{images/table.tex}
+ % Research on DTs for datacenters have been separate, siloed efforts focused on either datacenter cooling, network performance, power consumption or visualization efforts.
+ % CFD usually means Navier-Stokes equations.
+ % CFD models take ages to compute.
\end{frame}
\begin{frame}\frametitle{\textbf{RQ1}: Literature Review II}
% Mandatory: split the figure into 2: top and bottom, and that way you can fill in the entire slide nicely.
+
+ \begin{tcolorbox}[title=A holistic DCDT system model]
+ We propose a generic model of datacenter digital twinning that can be mapped to each system from \textbf{Table 1.1}. To answer \textbf{RQ2}, we design a ref. arch. for \emph{Operations Model}.
+ We introduce the \emph{Digital Thread}: a bridge between software and reality.
+ \end{tcolorbox}
+ \begin{center}
+ \vspace{-0.1cm}
+ \includegraphics[width=0.8\textwidth]{images/system_model2.pdf}
+ \end{center}
+ % The reason why the cooling system is in the graph is because of the fact that 40\% of total energy consumed in DCs comes from cooling~\cite{DBLP:conf/noms/ZhangZLZWC22}.
+ % It has come to the point where datacenters are being build in the Pan-Arctic region, such as Finland,Russia,Sweden etc. with Iceland leading in number of DCs https://www.datacentermap.com/iceland/
+ % The SmarDC digital twin is purely to get more training data for AI models.
+ % Not really a digital twin per se.
+
+ \tiny
+ \textbf{Figure 1.3:} To answer \textbf{RQ1} we designed a generic datacenter digital twin system model based on a comprehensive literature review and findings from \textbf{Table 1.1}. The \emph{Infrastructure Model} simulates the structure of the DC and the \emph{Operations model} simulates the behaviour of the DC.
+ % Consider splitting the figure into 2 a.k.a. top and bottom.
+ % By the AIAA definition, the DT mimicks the structure and behaviour.
+ % Data Lake -> Data Storage
+ % Use cases of DT's found by Brewer et al.: augmented reality, forensic analysis and diagnostics, predictive modelling, failure detection, operational optimization, ``what-if''' scenarios and virtual prototyping.
+\end{frame}
+
+\begin{frame}\frametitle{\textbf{RQ2}: Reference Architecture}
+ % Make Kafka logos clearly defined --> add a legend with icons?
+ \hspace{-0.3cm}
\begin{minipage}[b]{0.45\linewidth}
\begin{center}
- \includegraphics[width=1.15\textwidth]{images/system_model.pdf}
+ % Change to Datacenter (Physical Twin)
+ \includegraphics[width=1.15\textwidth]{images/ref_architecture.pdf}
\end{center}
+ \vspace{-0.2cm}
\tiny
- \textbf{Figure 1.3:} To answer \textbf{RQ1} we designed a generic datacenter digital twin system model based on a comprehensive literature review and findings from \textbf{Table 1.1}.
+ \textbf{Figure 1.4:} The predictive datacenter digital twin reference architecture.
+ The architecture was designed with the \emph{AtLarge Design Process}~\cite{DBLP:conf/icdcs/IosupVTETBFMT19}.
+ \vspace{0.2cm}
+ \end{minipage}
+ \hspace{0.8cm}
+ \begin{minipage}[b]{0.45\linewidth}
+ \begin{center}
+ \includegraphics[width=1.15\linewidth]{images/implementation.png}
+ \end{center}
+ \vspace{-0.2cm}
+ \tiny
+ \textbf{Figure 1.5:} The prototype based on \textbf{Figure 1.4} towards answering \textbf{RQ3}.
+ The time-series data flows first to the \texttt{Grafana} dashboard, \texttt{PostgreSQL} database and \texttt{Redis} cache, as advised in~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24}.
\end{minipage}
- % Consider splitting the figure into 2 a.k.a. top and bottom.
- % Data Lake -> Data Storage
-\end{frame}
-\begin{frame}\frametitle{\textbf{RQ2}: Reference Architecture}
+ % We decided to use discrete-event simulation, as opposed to computational fluid dynamics because of the high overheads of development time needed for CFD.
+ % CFD simply takes too long to run, making it unfeasible for real-time analytics and simulation.
+ % Citing ExaDigit: [CFD] they are also more computationally expensive, generally making real-time operation unfeasible.
+ % Consider adding this minipage directly to the ``draw.io'' diagram
+\end{frame}
+% You should skip \hfill completely or in favour of \hspace very minimally.
+\begin{frame}\frametitle{\textbf{RQ3}: Experimental Setup}
+ \begin{minipage}[b]{0.45\linewidth}
+ \begin{tcolorbox}[title=Problem, colbacktitle=red!70!black,colback=red!20!white]
+ We cannot just go and test digital twins on large systems, because we do not have large systems at hand.
+ Moreover, real-world experimentation is costly and unsustainable in the long run~\cite{DBLP:conf/ccgrid/MastenbroekAJLB21}.
+ \end{tcolorbox}
+ \vspace{0.5cm}
+ \begin{tcolorbox}[title=Solution, colbacktitle=Green!70!black, colback=Green!20!white]
+ \scriptsize
+ They way we test our reference architecture prototype is by using multiple simulators.
+ We use an additional OpenDC process to play the role of a real datacenter.
+ \end{tcolorbox}
+ \vspace{1cm}
+ \end{minipage}
+ \hspace{0.25cm}
+ \begin{minipage}[b]{0.45\linewidth}
+ \vspace{-0.2cm}
+ \begin{center}
+ \includegraphics[width=1.2\linewidth]{images/predictive_analyticsv3.pdf}
+ \end{center}
+ \tiny
+ \vspace{-0.2cm}
+ \textbf{Figure 1.6:} The experimental setup.
+ Answering \textbf{RQ3} we provide a novel way to evaluate datacenter digital twins through discrete-event simulation.
+ \end{minipage}
\end{frame}
\begin{frame}\frametitle{\textbf{RQ3}: Experimental Results I}
- \begin{tcolorbox}[title=Main Finding I]
- Here explain what did you find.
+ % You have some model, and this can be based on multiple traces.
+ %Get insight from CINECA --> you get a probability of certain hosts failing.
+ % Anomaly detection --> CINECA, how good their detection is?
+ %If you incorporate that? If you can make the case that because of our new digital twin we can incorporate such models, anomaly/failure detection, from CINECA.
+ %If we had that in, we can reach these kinds of gains.
+ % @Mateusz there is really not a possibility to incorporate CINECA's models, so to address Dante's feedback, I created this experiment.
+
+ \begin{tcolorbox}[title=Failure Detection: Main Finding I]
+ On average, \emph{Sunfish} can detect 14.5\% of unexpected failures in the physical twin.
+ We show, that digital twinning \emph{can} be used for failure detection.
+
\end{tcolorbox}
- Here goes the figure that backs up claim in Main Finding I.
- Evidence for Main Finding I.
+ \begin{minipage}[b]{0.45\linewidth}
+ \begin{center}
+ \includegraphics[width=1.1\textwidth]{images/23_Jun_2026_102028.pdf}
+ \end{center}
+ \vspace{-0.3cm}
+ \tiny
+ \textbf{Figure 1.5:} Experiment 1 Setup: The Digital Twin estimates the failures based on the Normal Distribution \emph{N\textasciitilde($\mu$,$\sigma$)} with $\mu = 1.5$ and $\sigma = 0.5$.
+ ``Real'' OpenDC failures come from a WhatsApp user reports.
+ \end{minipage}
% Explain what the axis are in the figure caption.
% Talk about the experimental setup in the figure.
% Give more reliable results than just numbers -- do statistical testing, i.e., standard deviation, confidence intervals.
\end{frame}
-
\begin{frame}\frametitle{\textbf{RQ3}: Experimental Results II}
- \begin{tcolorbox}[title=Main Finding II]
+ \begin{tcolorbox}[title=Scheduling Optimization: Main Finding II]
Here explain what did you find.
\end{tcolorbox}
- Here goes the figure that backs up claim in Main Finding II.
+
\end{frame}
\begin{frame}\frametitle{Key Takeaways}
@@ -121,7 +206,7 @@
\end{frame}
-\setcounter{framenumber}{3}
+\setcounter{framenumber}{4}
\setbeamertemplate{footline}[page number]{
% Unfortunately this must remain here.