merge: Fix distributed strategy for TensorFlow experiment (#89)

This pull request fixes an issue where the distributed strategies for the TensorFlow experiments did not work correctly. ## Implementation Notes :hammer_and_pick: * Limit growth rate for trace construction * Derive device statistics directly from SimMachine * Always recompute power usage when a `SImBareMetalMachine` converges * Add a test case for `MirroredStrategy`
author: Fabian Mastenbroek <mail.fabianm@gmail.com> 2022-06-16 11:08:14 +0200
committer: GitHub <noreply@github.com> 2022-06-16 11:08:14 +0200
commit: 8eab5895dcf21b4a3f585c62db14c9a049c81d98 (patch)
tree: b2b698d85f7397ef67485d52128a9390f40f7252 /opendc-experiments/opendc-experiments-tf20/src/test
parent: d146814bbbb86bfcb19ccb94250424703e9179e5 (diff)
parent: 282f199e6f16350123a915b06faff62ca82be91b (diff)
2 files changed, 44 insertions, 3 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt
index 7d72b48d..328f1326 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt
@@ -26,10 +26,12 @@ import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.Test
 import org.junit.jupiter.api.assertAll
 import org.opendc.experiments.tf20.core.SimTFDevice
+import org.opendc.experiments.tf20.distribute.MirroredStrategy
 import org.opendc.experiments.tf20.distribute.OneDeviceStrategy
 import org.opendc.experiments.tf20.util.MLEnvironmentReader
 import org.opendc.simulator.compute.power.LinearPowerModel
 import org.opendc.simulator.core.runBlockingSimulation
+import java.util.*
 
 /**
  * Integration test suite for the TensorFlow application model in OpenDC.
@@ -61,7 +63,7 @@ class TensorFlowTest {
         val stats = device.getDeviceStats()
         assertAll(
             { assertEquals(3309694252, clock.millis()) },
-            { assertEquals(8.2520933087E8, stats.energyUsage) }
+            { assertEquals(8.27423563E8, stats.energyUsage) }
         )
     }
 
@@ -91,7 +93,46 @@ class TensorFlowTest {
         val stats = device.getDeviceStats()
         assertAll(
             { assertEquals(176230322904, clock.millis()) },
-            { assertEquals(4.296544914744E10, stats.energyUsage) }
+            { assertEquals(4.4057580726E10, stats.energyUsage) }
+        )
+    }
+
+    /**
+     * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC.
+     */
+    @Test
+    fun testSmokeDistribute() = runBlockingSimulation {
+        val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json"))
+        val def = MLEnvironmentReader().readEnvironment(envInput).first()
+
+        val deviceA = SimTFDevice(
+            def.uid, def.meta["gpu"] as Boolean, coroutineContext, clock, def.model.cpus[0], def.model.memory[0],
+            LinearPowerModel(250.0, 60.0)
+        )
+
+        val deviceB = SimTFDevice(
+            UUID.randomUUID(), def.meta["gpu"] as Boolean, coroutineContext, clock, def.model.cpus[0], def.model.memory[0],
+            LinearPowerModel(250.0, 60.0)
+        )
+
+        val strategy = MirroredStrategy(listOf(deviceA, deviceB))
+        val batchSize = 32
+        val model = AlexNet(batchSize.toLong())
+        model.use {
+            it.compile(strategy)
+
+            it.fit(epochs = 9088 / batchSize, batchSize = batchSize)
+        }
+
+        deviceA.close()
+        deviceB.close()
+
+        val statsA = deviceA.getDeviceStats()
+        val statsB = deviceB.getDeviceStats()
+        assertAll(
+            { assertEquals(1704994000, clock.millis()) },
+            { assertEquals(4.262485E8, statsA.energyUsage) },
+            { assertEquals(4.262485E8, statsB.energyUsage) }
         )
     }
 }
diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
index 21d30250..051d5730 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
@@ -69,7 +69,7 @@ internal class SimTFDeviceTest {
 
         assertAll(
             { assertEquals(3681, clock.millis()) },
-            { assertEquals(325.75, stats.energyUsage) }
+            { assertEquals(749.25, stats.energyUsage) }
         )
     }
 }
author	Fabian Mastenbroek <mail.fabianm@gmail.com>	2022-06-16 11:08:14 +0200
committer	GitHub <noreply@github.com>	2022-06-16 11:08:14 +0200
commit	8eab5895dcf21b4a3f585c62db14c9a049c81d98 (patch)
tree	b2b698d85f7397ef67485d52128a9390f40f7252 /opendc-experiments/opendc-experiments-tf20/src/test
parent	d146814bbbb86bfcb19ccb94250424703e9179e5 (diff)
parent	282f199e6f16350123a915b06faff62ca82be91b (diff)