From 6b1970ea96a71f96d36065f2d8aafd89076704d3 Mon Sep 17 00:00:00 2001
From: mjkwiatkowski <mati.rewa@gmail.com>
Date: Fri, 26 Jun 2026 13:34:06 +0200
Subject: feat: added the experimental results plots

---
 main.tex | 89 +++++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 20 deletions(-)

(limited to 'main.tex')

diff --git a/main.tex b/main.tex
index 2ff5706..700c64f 100644
--- a/main.tex
+++ b/main.tex
@@ -55,7 +55,7 @@
 \end{frame}
 
 \begin{frame}\frametitle{\textbf{RQ1}: Literature Review I}
-	\begin{tcolorbox}[title=Main Finding]
+	\begin{tcolorbox}[title=Main Finding I]
 		The literature on DCDTs is scarce.
 		Some systems barely classify as DTs (\emph{e.g.,} Kalibre~\cite{DBLP:conf/sensys/WangZD0TCWZ20}, ChatTwin~\cite{DBLP:conf/sensys/LiW0Z0T23}).
 		Existing deployments specialize in \textcolor{Red}{Cooling and Heat Modelling}, together with \textcolor{Red}{3D visualizations}.
@@ -105,7 +105,7 @@
 		The architecture was designed with the \emph{AtLarge Design Process}~\cite{DBLP:conf/icdcs/IosupVTETBFMT19}.
 		\vspace{0.2cm}
 	\end{minipage}
-	\hspace{0.8cm}
+	\hspace{0.6cm}
 	\begin{minipage}[b]{0.45\linewidth}
 		\begin{center}
 			\includegraphics[width=1.17\linewidth]{images/implementation.png}
@@ -123,7 +123,8 @@
 	% Consider adding this minipage directly to the ``draw.io'' diagram
 \end{frame}
 % You should skip \hfill completely or in favour of \hspace very minimally.
-\begin{frame}\frametitle{\textbf{RQ3}: Experimental Setup}
+\begin{frame}\frametitle{\textbf{RQ3}: Experimental Setup I}
+	\hspace{-0.3cm}
 	\begin{minipage}[b]{0.45\linewidth}
 		\begin{tcolorbox}[title=Problem, colbacktitle=red!70!black,colback=red!20!white]
 			We cannot just go and test digital twins on large systems, because we do not have large systems at hand.
@@ -150,6 +151,7 @@
 	\end{minipage}
 \end{frame}
 
+
 \begin{frame}\frametitle{\textbf{RQ3}: Experimental Results I}
 	% You have some model, and this can be based on multiple traces.
 	%Get insight from CINECA --> you get a probability of certain hosts failing.
@@ -157,32 +159,52 @@
 	%If you incorporate that? If you can make the case that because of our new digital twin we can incorporate such models, anomaly/failure detection, from CINECA.
 	%If we had that in, we can reach these kinds of gains.
 	% @Mateusz there is really not a possibility to incorporate CINECA's models, so to address Dante's feedback, I created this experiment.
-
-	\begin{tcolorbox}[title=Validation]
+	% If a single host crashes for the entire workload, that's not really that bad.
+	% If a lot of hosts suddenly crash but for a really short time, that's terrible.
+	% Failures that are more intensive are worse than failures with long duration.
+	\begin{tcolorbox}[title=Main Finding II]
 		We posit digital twinning can be used for failure detection to the benefit of DC operators.
-		We validate our system against DyTwin~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24} designed by Milojicic \etal to show we achieve similar results.
+		We replicate an experiment from DyTwin~\cite{DBLP:conf/sc/TaheriBPRHDEWPM24} designed by Milojicic \etal to show our system can reliably detect \emph{unexpected} host failures.
 	\end{tcolorbox}
+	\hspace{-0.2cm}
 	\begin{minipage}[b]{0.45\linewidth}
 		\begin{center}
-			\includegraphics[width=1.1\textwidth]{images/23_Jun_2026_102028.pdf}
+			\includegraphics[width=1.1\textwidth]{images/25_Jun_2026_152341.pdf}
 		\end{center}
 		\vspace{-0.3cm}
 		\tiny
-		\textbf{Figure 1.5:} Experiment 1 Setup: The Digital Twin estimates the failures based on the Normal Distribution \emph{N\textasciitilde($\mu$,$\sigma$)} with $\mu = 1.5$ and $\sigma = 0.5$.
-		``Real'' OpenDC failures come from a WhatsApp user reports.
+		\textbf{Figure 1.7:} Experiment 1a. In this experiment we use red and yellow alarms to notify datacenter operators of unexpected failures.
+		We use a threshold based on predictions done by the simulator.
+	\end{minipage}
+	\hspace{0.6cm}
+	\begin{minipage}[b]{0.45\linewidth}
+		\begin{center}
+			\includegraphics[width=1.1\textwidth]{images/25_Jun_2026_161052.pdf}
+		\end{center}
+		\vspace{-0.3cm}
+		\tiny
+		\textbf{Figure 1.8:} Experiment 1b. The mean failure detection rate is around 15\%. Even though this seems low, if we look at \textbf{Fig. 1.9} (see extra slides), this simply means around 15\% of failures are unexpected.
 	\end{minipage}
 	% Explain what the axis are in the figure caption.
 	% Talk about the experimental setup in the figure.
 	% Give more reliable results than just numbers -- do statistical testing, i.e., standard deviation, confidence intervals.
 \end{frame}
 
-\begin{frame}\frametitle{\textbf{RQ3}: Experimental Results II}
-	\begin{tcolorbox}[title=Evaluation]
-		Predictive analytics is core to digital twinning. We evaluate our system against the requirements (extra slides) by predicting an optimal scheduling policy.
-		During runtime, we make dynamic adjustments to the physical twin, if the scheduling results differ.
-	\end{tcolorbox}
-
-\end{frame}
+%\begin{frame}\frametitle{\textbf{RQ3}: Experimental Results II}
+%	\begin{tcolorbox}[title=Evaluation]
+%		Predictive analytics is core to digital twinning. We evaluate our system against the requirements (extra slides) by predicting an optimal scheduling policy.
+%		During runtime, we make dynamic adjustments to the physical twin, if the scheduling results differ.
+%	\end{tcolorbox}
+%	\hspace{0.2cm}
+%	\begin{minipage}[b]{0.32\linewidth}
+%		\begin{center}
+%			\includegraphics[width=1.1\textwidth]{images/23_Jun_2026_102028.pdf}
+%		\end{center}
+%		\vspace{-0.3cm}
+%		\tiny
+%		\textbf{Figure 1.9:} Experiment 1
+%	\end{minipage}
+%\end{frame}
 
 \begin{frame}\frametitle{Key Takeaways}
 	\begin{tcolorbox}[title=What is the societal context?]
@@ -200,14 +222,15 @@
 	\end{tcolorbox}
 
 	\begin{tcolorbox}[title=What did we find?]
-		\emph{Sunfish} is able to detect around 20\% of unexpected failures based on discrete-event predictions, and can predict the most efficient scheduling policies for given workloads.
+		\emph{Sunfish} can reliably detect unexpected failures based on discrete-event predictions, and can serve as a foundation for additional research and future work.
 	\end{tcolorbox}
 	% Mandatory to mention here the future work that you see happening.
 	% Not enough space for another tcolorbox.
 \end{frame}
 
-\setcounter{framenumber}{5}
-\setbeamertemplate{footline}[page number]{
+\setcounter{framenumber}{3}
+\setbeamertemplate{footline}[page number]{}
+
 
 % Unfortunately this must remain here.
 \setbeamercolor{frametitle}{fg=Brown,bg=Brown!20}
@@ -217,13 +240,36 @@
 		\usebeamerfont{frametitle}\insertframetitle\hfill
 	\end{beamercolorbox}
 }
-
 \begin{frame}[allowframebreaks]\frametitle{Extra Slides: References}
 	\tiny
 	\bibliographystyle{is-plain}
 	\bibliography{main.bib}
 \end{frame}
 
+\begin{frame}\frametitle{Technical Setup }
+	\begin{tcolorbox}[title=What is the simulation workload?]
+		The compute workload is BitBrainsSmall.
+		The failure traces include user reports from Gmail, WhatsApp and Twitter.
+
+	\end{tcolorbox}
+	\begin{tcolorbox}[title=What is the experiment environment?]    A commodity laptop: Framework Laptop 13, with 32GB of DDR5 RAM and an AMD Ryzen 7840U processor and an ArchLinux OS with Linux 7.0.13-arch1-1 kernel.
+
+	\end{tcolorbox}
+
+
+	\begin{tcolorbox}[title=How did we adjust OpenDC (Physical Twin)?]
+		We use a SURF~\cite{DBLP:journals/fgcs/VersluisCGLPCUI23} datacenter topology with 277 hosts.
+		We wrote a custom Kotlin \texttt{ComputeMonitor} to export live-metrics into Kafka, and a custom Kotlin \texttt{HTTPClient} to talk to the digital twin.
+		We add a new scheduling mechanism, the \texttt{SmartScheduler}.
+
+	\end{tcolorbox}
+	\begin{tcolorbox}[title=Which metrics do we measure?]
+		Timestamps, host names, uptime, downtime, CPU utilization \emph{etc.}
+	\end{tcolorbox}
+
+\end{frame}
+
+
 \begin{frame}\frametitle{Extra Slides: Why Digital Twinning?}
 	\begin{tcolorbox}[title=Definition]
 		A DCDT mirrors the structure, context and behaviour of a datacenter~\cite{DBLP:journals/computer/AthavaleBBMMPS24}. The prerequisite to any digital twin is good monitoring and sensing capabilities in the physical entity.
@@ -253,6 +299,9 @@
 	\tiny \textbf{Figure E.3:} Real-time control that is tightly-coupled with the IT equipment is a prerequisite for timely predictions within seconds/minutes~\cite{DBLP:journals/computer/AthavaleBBMMPS24}.
 \end{frame}
 
+
+
+
 % Computational Fluid Dynamics (CFD) have high computation overhead, unsuitable for real-time simulation of a dynamic datacenter.
 %Moreover oftentimes a poorly configured CFD model can lead to high error rates~\cite{DBLP:conf/sensys/WangZD0TCWZ20}.
 %Data-driven Machine Learning performs poorly by the cases not covered in the training data.
-- 
cgit v1.2.3