From 6b1970ea96a71f96d36065f2d8aafd89076704d3 Mon Sep 17 00:00:00 2001 From: mjkwiatkowski Date: Fri, 26 Jun 2026 13:34:06 +0200 Subject: feat: added the experimental results plots --- main.tex | 89 +++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 69 insertions(+), 20 deletions(-) (limited to 'main.tex') diff --git a/main.tex b/main.tex index 2ff5706..700c64f 100644 --- a/main.tex +++ b/main.tex @@ -55,7 +55,7 @@ \end{frame} \begin{frame}\frametitle{\textbf{RQ1}: Literature Review I} - \begin{tcolorbox}[title=Main Finding] + \begin{tcolorbox}[title=Main Finding I] The literature on DCDTs is scarce. Some systems barely classify as DTs (\emph{e.g.,} Kalibre~\cite{DBLP:conf/sensys/WangZD0TCWZ20}, ChatTwin~\cite{DBLP:conf/sensys/LiW0Z0T23}). Existing deployments specialize in \textcolor{Red}{Cooling and Heat Modelling}, together with \textcolor{Red}{3D visualizations}. @@ -105,7 +105,7 @@ The architecture was designed with the \emph{AtLarge Design Process}~\cite{DBLP:conf/icdcs/IosupVTETBFMT19}. \vspace{0.2cm} \end{minipage} - \hspace{0.8cm} + \hspace{0.6cm} \begin{minipage}[b]{0.45\linewidth} \begin{center} \includegraphics[width=1.17\linewidth]{images/implementation.png} @@ -123,7 +123,8 @@ % Consider adding this minipage directly to the ``draw.io'' diagram \end{frame} % You should skip \hfill completely or in favour of \hspace very minimally. -\begin{frame}\frametitle{\textbf{RQ3}: Experimental Setup} +\begin{frame}\frametitle{\textbf{RQ3}: Experimental Setup I} + \hspace{-0.3cm} \begin{minipage}[b]{0.45\linewidth} \begin{tcolorbox}[title=Problem, colbacktitle=red!70!black,colback=red!20!white] We cannot just go and test digital twins on large systems, because we do not have large systems at hand. @@ -150,6 +151,7 @@ \end{minipage} \end{frame} + \begin{frame}\frametitle{\textbf{RQ3}: Experimental Results I} % You have some model, and this can be based on multiple traces. %Get insight from CINECA --> you get a probability of certain hosts failing. @@ -157,32 +159,52 @@ %If you incorporate that? If you can make the case that because of our new digital twin we can incorporate such models, anomaly/failure detection, from CINECA. %If we had that in, we can reach these kinds of gains. % @Mateusz there is really not a possibility to incorporate CINECA's models, so to address Dante's feedback, I created this experiment. - - \begin{tcolorbox}[title=Validation] + % If a single host crashes for the entire workload, that's not really that bad. + % If a lot of hosts suddenly crash but for a really short time, that's terrible. + % Failures that are more intensive are worse than failures with long duration. + \begin{tcolorbox}[title=Main Finding II] We posit digital twinning can be used for failure detection to the benefit of DC operators. - We validate our system against DyTwin~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24} designed by Milojicic \etal to show we achieve similar results. + We replicate an experiment from DyTwin~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24} designed by Milojicic \etal to show our system can reliably detect \emph{unexpected} host failures. \end{tcolorbox} + \hspace{-0.2cm} \begin{minipage}[b]{0.45\linewidth} \begin{center} - \includegraphics[width=1.1\textwidth]{images/23_Jun_2026_102028.pdf} + \includegraphics[width=1.1\textwidth]{images/25_Jun_2026_152341.pdf} \end{center} \vspace{-0.3cm} \tiny - \textbf{Figure 1.5:} Experiment 1 Setup: The Digital Twin estimates the failures based on the Normal Distribution \emph{N\textasciitilde($\mu$,$\sigma$)} with $\mu = 1.5$ and $\sigma = 0.5$. - ``Real'' OpenDC failures come from a WhatsApp user reports. + \textbf{Figure 1.7:} Experiment 1a. In this experiment we use red and yellow alarms to notify datacenter operators of unexpected failures. + We use a threshold based on predictions done by the simulator. + \end{minipage} + \hspace{0.6cm} + \begin{minipage}[b]{0.45\linewidth} + \begin{center} + \includegraphics[width=1.1\textwidth]{images/25_Jun_2026_161052.pdf} + \end{center} + \vspace{-0.3cm} + \tiny + \textbf{Figure 1.8:} Experiment 1b. The mean failure detection rate is around 15\%. Even though this seems low, if we look at \textbf{Fig. 1.9} (see extra slides), this simply means around 15\% of failures are unexpected. \end{minipage} % Explain what the axis are in the figure caption. % Talk about the experimental setup in the figure. % Give more reliable results than just numbers -- do statistical testing, i.e., standard deviation, confidence intervals. \end{frame} -\begin{frame}\frametitle{\textbf{RQ3}: Experimental Results II} - \begin{tcolorbox}[title=Evaluation] - Predictive analytics is core to digital twinning. We evaluate our system against the requirements (extra slides) by predicting an optimal scheduling policy. - During runtime, we make dynamic adjustments to the physical twin, if the scheduling results differ. - \end{tcolorbox} - -\end{frame} +%\begin{frame}\frametitle{\textbf{RQ3}: Experimental Results II} +% \begin{tcolorbox}[title=Evaluation] +% Predictive analytics is core to digital twinning. We evaluate our system against the requirements (extra slides) by predicting an optimal scheduling policy. +% During runtime, we make dynamic adjustments to the physical twin, if the scheduling results differ. +% \end{tcolorbox} +% \hspace{0.2cm} +% \begin{minipage}[b]{0.32\linewidth} +% \begin{center} +% \includegraphics[width=1.1\textwidth]{images/23_Jun_2026_102028.pdf} +% \end{center} +% \vspace{-0.3cm} +% \tiny +% \textbf{Figure 1.9:} Experiment 1 +% \end{minipage} +%\end{frame} \begin{frame}\frametitle{Key Takeaways} \begin{tcolorbox}[title=What is the societal context?] @@ -200,14 +222,15 @@ \end{tcolorbox} \begin{tcolorbox}[title=What did we find?] - \emph{Sunfish} is able to detect around 20\% of unexpected failures based on discrete-event predictions, and can predict the most efficient scheduling policies for given workloads. + \emph{Sunfish} can reliably detect unexpected failures based on discrete-event predictions, and can serve as a foundation for additional research and future work. \end{tcolorbox} % Mandatory to mention here the future work that you see happening. % Not enough space for another tcolorbox. \end{frame} -\setcounter{framenumber}{5} -\setbeamertemplate{footline}[page number]{ +\setcounter{framenumber}{3} +\setbeamertemplate{footline}[page number]{} + % Unfortunately this must remain here. \setbeamercolor{frametitle}{fg=Brown,bg=Brown!20} @@ -217,13 +240,36 @@ \usebeamerfont{frametitle}\insertframetitle\hfill \end{beamercolorbox} } - \begin{frame}[allowframebreaks]\frametitle{Extra Slides: References} \tiny \bibliographystyle{is-plain} \bibliography{main.bib} \end{frame} +\begin{frame}\frametitle{Technical Setup } + \begin{tcolorbox}[title=What is the simulation workload?] + The compute workload is BitBrainsSmall. + The failure traces include user reports from Gmail, WhatsApp and Twitter. + + \end{tcolorbox} + \begin{tcolorbox}[title=What is the experiment environment?] A commodity laptop: Framework Laptop 13, with 32GB of DDR5 RAM and an AMD Ryzen 7840U processor and an ArchLinux OS with Linux 7.0.13-arch1-1 kernel. + + \end{tcolorbox} + + + \begin{tcolorbox}[title=How did we adjust OpenDC (Physical Twin)?] + We use a SURF~\cite{DBLP:journals/fgcs/VersluisCGLPCUI23} datacenter topology with 277 hosts. + We wrote a custom Kotlin \texttt{ComputeMonitor} to export live-metrics into Kafka, and a custom Kotlin \texttt{HTTPClient} to talk to the digital twin. + We add a new scheduling mechanism, the \texttt{SmartScheduler}. + + \end{tcolorbox} + \begin{tcolorbox}[title=Which metrics do we measure?] + Timestamps, host names, uptime, downtime, CPU utilization \emph{etc.} + \end{tcolorbox} + +\end{frame} + + \begin{frame}\frametitle{Extra Slides: Why Digital Twinning?} \begin{tcolorbox}[title=Definition] A DCDT mirrors the structure, context and behaviour of a datacenter~\cite{DBLP:journals/computer/AthavaleBBMMPS24}. The prerequisite to any digital twin is good monitoring and sensing capabilities in the physical entity. @@ -253,6 +299,9 @@ \tiny \textbf{Figure E.3:} Real-time control that is tightly-coupled with the IT equipment is a prerequisite for timely predictions within seconds/minutes~\cite{DBLP:journals/computer/AthavaleBBMMPS24}. \end{frame} + + + % Computational Fluid Dynamics (CFD) have high computation overhead, unsuitable for real-time simulation of a dynamic datacenter. %Moreover oftentimes a poorly configured CFD model can lead to high error rates~\cite{DBLP:conf/sensys/WangZD0TCWZ20}. %Data-driven Machine Learning performs poorly by the cases not covered in the training data. -- cgit v1.2.3