merge: Restructure experiments and remove legacy harness (#82)

This pull request restructures the experiments present in the `opendc-experiments` directory and removes the legacy OpenDC Harness. Previously, the experiments were written against the OpenDC Harness, which facilitates generation and execution of scenarios. However, the OpenDC Harness does not integrate well into the web-based workflow of OpenDC, where users should be able to submit scenarios in the web interface and automatically simulate it in the cloud, since the harness relied on a special Kotlin DSL to specify experiments. In future pull request, we'll attempt to introduce a similar approach for specifying and running experiments as we have done for the Radice experiments, where the entire experiment is described in a serializable (JSON/YAML) format. ## Implementation Notes :hammer_and_pick: * Add helper tools for FaaS simulations * Fix infinite loop due to invalid rounding * Convert experiment into integration test * Add independent Capelin distribution * Remove OpenDC Harness modules * Remove unnecessary dependencies ## Breaking API Changes :warning: * Removal of the OpenDC Harness modules. Instead, we now package each experiment individually. We'll focus in the future on extracting common code from the Capelin and Radice experiments so they can be re-used by other experiments as well.
author: Fabian Mastenbroek <mail.fabianm@gmail.com> 2022-05-06 22:36:25 +0200
committer: GitHub <noreply@github.com> 2022-05-06 22:36:25 +0200
commit: 09c6168f022245380d910cd35495b657ab34fbd1 (patch)
tree: f40c1d040c84b322da42ce2f4285a214e0f27e6e /opendc-experiments/opendc-experiments-tf20
parent: c3d8d967f82f39f1ef461d5687eb68fb867336c5 (diff)
parent: 0cb3ff64074ba2bfd671c8ca945f54708ea66fe9 (diff)
5 files changed, 58 insertions, 21 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
index f61c8fef..7b3b084f 100644
--- a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
+++ b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
@@ -20,16 +20,15 @@
  * SOFTWARE.
  */
 
-description = "Experiments with the OpenDC TensorFlow model"
+description = "TensorFlow application model in OpenDC"
 
 /* Build configuration */
 plugins {
-    `experiment-conventions`
+    `kotlin-conventions`
     `testing-conventions`
 }
 
 dependencies {
-    api(projects.opendcHarness.opendcHarnessApi)
     implementation(projects.opendcSimulator.opendcSimulatorCore)
     implementation(projects.opendcSimulator.opendcSimulatorCompute)
     implementation(projects.opendcCommon)
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt
index 9ef5b621..be166bd5 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt
@@ -20,8 +20,10 @@
  * SOFTWARE.
  */
 
-package org.opendc.experiments.tf20.keras
+package org.opendc.experiments.tf20
 
+import org.opendc.experiments.tf20.keras.Sequential
+import org.opendc.experiments.tf20.keras.TrainableModel
 import org.opendc.experiments.tf20.keras.activations.Activation
 import org.opendc.experiments.tf20.keras.layer.conv.Conv2D
 import org.opendc.experiments.tf20.keras.layer.conv.ConvPadding
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt
index d2105196..90350142 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt
@@ -39,6 +39,7 @@ import java.util.*
 import kotlin.coroutines.Continuation
 import kotlin.coroutines.CoroutineContext
 import kotlin.coroutines.resume
+import kotlin.math.ceil
 import kotlin.math.roundToLong
 
 /**
@@ -137,7 +138,7 @@ public class SimTFDevice(
                 if (activeWork.consume(consumedWork)) {
                     this.activeWork = null
                 } else {
-                    val duration = (activeWork.flops / conn.capacity * 1000).roundToLong()
+                    val duration = ceil(activeWork.flops / conn.capacity * 1000).toLong()
                     conn.push(conn.capacity)
                     return duration
                 }
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt
index 19236029..7d72b48d 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 AtLarge Research
+ * Copyright (c) 2022 AtLarge Research
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -22,43 +22,76 @@
 
 package org.opendc.experiments.tf20
 
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.Test
+import org.junit.jupiter.api.assertAll
 import org.opendc.experiments.tf20.core.SimTFDevice
-import org.opendc.experiments.tf20.distribute.*
-import org.opendc.experiments.tf20.keras.AlexNet
+import org.opendc.experiments.tf20.distribute.OneDeviceStrategy
 import org.opendc.experiments.tf20.util.MLEnvironmentReader
-import org.opendc.harness.dsl.Experiment
-import org.opendc.harness.dsl.anyOf
 import org.opendc.simulator.compute.power.LinearPowerModel
 import org.opendc.simulator.core.runBlockingSimulation
 
 /**
- * Experiments with the TensorFlow simulation model.
+ * Integration test suite for the TensorFlow application model in OpenDC.
  */
-public class TensorFlowExperiment : Experiment(name = "tf20") {
+class TensorFlowTest {
     /**
-     * The environment file to use.
+     * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC.
      */
-    private val environmentFile by anyOf("/kth.json")
+    @Test
+    fun testSmokeAlexNet() = runBlockingSimulation {
+        val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json"))
+        val def = MLEnvironmentReader().readEnvironment(envInput).first()
+
+        val device = SimTFDevice(
+            def.uid, def.meta["gpu"] as Boolean, coroutineContext, clock, def.model.cpus[0], def.model.memory[0],
+            LinearPowerModel(250.0, 60.0)
+        )
+        val strategy = OneDeviceStrategy(device)
+        val batchSize = 32
+        val model = AlexNet(batchSize.toLong())
+        model.use {
+            it.compile(strategy)
+
+            it.fit(epochs = 9088 / batchSize, batchSize = batchSize)
+        }
+
+        device.close()
+
+        val stats = device.getDeviceStats()
+        assertAll(
+            { assertEquals(3309694252, clock.millis()) },
+            { assertEquals(8.2520933087E8, stats.energyUsage) }
+        )
+    }
 
     /**
-     * The batch size used.
+     * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC.
      */
-    private val batchSize by anyOf(16, 32, 64, 128)
-
-    override fun doRun(repeat: Int): Unit = runBlockingSimulation {
-        val envInput = checkNotNull(TensorFlowExperiment::class.java.getResourceAsStream(environmentFile))
+    @Test
+    fun testSmokeVGG() = runBlockingSimulation {
+        val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json"))
         val def = MLEnvironmentReader().readEnvironment(envInput).first()
+
         val device = SimTFDevice(
             def.uid, def.meta["gpu"] as Boolean, coroutineContext, clock, def.model.cpus[0], def.model.memory[0],
             LinearPowerModel(250.0, 60.0)
         )
         val strategy = OneDeviceStrategy(device)
-
-        val model = AlexNet(batchSize.toLong())
+        val batchSize = 128
+        val model = VGG16(batchSize.toLong())
         model.use {
             it.compile(strategy)
 
             it.fit(epochs = 9088 / batchSize, batchSize = batchSize)
         }
+
+        device.close()
+
+        val stats = device.getDeviceStats()
+        assertAll(
+            { assertEquals(176230322904, clock.millis()) },
+            { assertEquals(4.296544914744E10, stats.energyUsage) }
+        )
     }
 }
diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
index fd18a3a7..21d30250 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
@@ -63,6 +63,8 @@ internal class SimTFDeviceTest {
             launch { device.compute(2e6) }
         }
 
+        device.close()
+
         val stats = device.getDeviceStats()
 
         assertAll(
author	Fabian Mastenbroek <mail.fabianm@gmail.com>	2022-05-06 22:36:25 +0200
committer	GitHub <noreply@github.com>	2022-05-06 22:36:25 +0200
commit	09c6168f022245380d910cd35495b657ab34fbd1 (patch)
tree	f40c1d040c84b322da42ce2f4285a214e0f27e6e /opendc-experiments/opendc-experiments-tf20
parent	c3d8d967f82f39f1ef461d5687eb68fb867336c5 (diff)
parent	0cb3ff64074ba2bfd671c8ca945f54708ea66fe9 (diff)