Rewrote the FlowEngine (#256)

* Removed unused components. Updated tests. Improved checkpointing model Improved model, started with SimPowerSource implemented FailureModels and Checkpointing First working version midway commit first update All simulation are now run with a single CPU and single MemoryUnit. multi CPUs are combined into one. This is for performance and explainability. * fixed merge conflicts * Updated M3SA paths. * Fixed small typo
author: Dante Niewenhuis <d.niewenhuis@hotmail.com> 2024-10-25 13:32:41 +0200
committer: GitHub <noreply@github.com> 2024-10-25 13:32:41 +0200
commit: 5a365dbc068f2a8cdfa9813c39cc84bb30e15637 (patch)
tree: 72716d562787b85e03cdc7fe1d30c827054d25a0 /opendc-experiments/opendc-experiments-tf20/src/test
parent: 27f5b7dcb05aefdab9b762175d538931face0aba (diff)
2 files changed, 0 insertions, 231 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt
deleted file mode 100644
index 447827e9..00000000
--- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2022 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.experiments.tf20
-
-import org.junit.jupiter.api.Assertions.assertEquals
-import org.junit.jupiter.api.assertAll
-import org.opendc.experiments.tf20.core.SimTFDevice
-import org.opendc.experiments.tf20.distribute.MirroredStrategy
-import org.opendc.experiments.tf20.distribute.OneDeviceStrategy
-import org.opendc.experiments.tf20.util.MLEnvironmentReader
-import org.opendc.simulator.compute.power.CpuPowerModels
-import org.opendc.simulator.kotlin.runSimulation
-import java.util.UUID
-
-/**
- * Integration test suite for the TensorFlow application model in OpenDC.
- */
-class TensorFlowTest {
-    /**
-     * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC.
-     */
-    fun testSmokeAlexNet() =
-        runSimulation {
-            val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json"))
-            val def = MLEnvironmentReader().readEnvironment(envInput).first()
-
-            val device =
-                SimTFDevice(
-                    def.uid,
-                    def.meta["gpu"] as Boolean,
-                    dispatcher,
-                    def.model.cpu,
-                    def.model.memory,
-                    CpuPowerModels.linear(250.0, 60.0),
-                )
-            val strategy = OneDeviceStrategy(device)
-            val batchSize = 32
-            val model = getAlexNet(batchSize.toLong())
-            model.use {
-                it.compile(strategy)
-
-                it.fit(epochs = 9088 / batchSize, batchSize = batchSize)
-            }
-
-            device.close()
-
-            val stats = device.getDeviceStats()
-            assertAll(
-                { assertEquals(3309694252, timeSource.millis()) },
-                { assertEquals(8.27423563E8, stats.energyUsage) },
-            )
-        }
-
-    /**
-     * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC.
-     */
-    fun testSmokeVGG() =
-        runSimulation {
-            val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json"))
-            val def = MLEnvironmentReader().readEnvironment(envInput).first()
-
-            val device =
-                SimTFDevice(
-                    def.uid,
-                    def.meta["gpu"] as Boolean,
-                    dispatcher,
-                    def.model.cpu,
-                    def.model.memory,
-                    CpuPowerModels.linear(250.0, 60.0),
-                )
-            val strategy = OneDeviceStrategy(device)
-            val batchSize = 128
-            val model = getVGG16(batchSize.toLong())
-            model.use {
-                it.compile(strategy)
-
-                it.fit(epochs = 9088 / batchSize, batchSize = batchSize)
-            }
-
-            device.close()
-
-            val stats = device.getDeviceStats()
-            assertAll(
-                { assertEquals(176230328513, timeSource.millis()) },
-                { assertEquals(4.405758212825E10, stats.energyUsage) },
-            )
-        }
-
-    /**
-     * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC.
-     */
-    fun testSmokeDistribute() =
-        runSimulation {
-            val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json"))
-            val def = MLEnvironmentReader().readEnvironment(envInput).first()
-
-            val deviceA =
-                SimTFDevice(
-                    def.uid,
-                    def.meta["gpu"] as Boolean,
-                    dispatcher,
-                    def.model.cpu,
-                    def.model.memory,
-                    CpuPowerModels.linear(250.0, 60.0),
-                )
-
-            val deviceB =
-                SimTFDevice(
-                    UUID.randomUUID(),
-                    def.meta["gpu"] as Boolean,
-                    dispatcher,
-                    def.model.cpu,
-                    def.model.memory,
-                    CpuPowerModels.linear(250.0, 60.0),
-                )
-
-            val strategy = MirroredStrategy(listOf(deviceA, deviceB))
-            val batchSize = 32
-            val model = getAlexNet(batchSize.toLong())
-            model.use {
-                it.compile(strategy)
-
-                it.fit(epochs = 9088 / batchSize, batchSize = batchSize)
-            }
-
-            deviceA.close()
-            deviceB.close()
-
-            val statsA = deviceA.getDeviceStats()
-            val statsB = deviceB.getDeviceStats()
-            assertAll(
-                { assertEquals(1704994000, timeSource.millis()) },
-                { assertEquals(4.262485E8, statsA.energyUsage) },
-                { assertEquals(4.262485E8, statsB.energyUsage) },
-            )
-        }
-}
diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
deleted file mode 100644
index e0c4599a..00000000
--- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2021 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.experiments.tf20.core
-
-import kotlinx.coroutines.coroutineScope
-import kotlinx.coroutines.launch
-import org.junit.jupiter.api.Assertions.assertAll
-import org.junit.jupiter.api.Assertions.assertEquals
-import org.junit.jupiter.api.Test
-import org.opendc.simulator.compute.model.Cpu
-import org.opendc.simulator.compute.model.MemoryUnit
-import org.opendc.simulator.compute.power.CpuPowerModels
-import org.opendc.simulator.kotlin.runSimulation
-import java.util.UUID
-
-/**
- * Test suite for the [SimTFDevice] class.
- */
-internal class SimTFDeviceTest {
-    @Test
-    fun testSmoke() =
-        runSimulation {
-            val pu = Cpu(0, 1, 960 * 1230.0, "NVIDIA", "Tesla V100", "unknown")
-            val memory = MemoryUnit("NVIDIA", "Tesla V100", 877.0, 32_000)
-
-            val device =
-                SimTFDevice(
-                    UUID.randomUUID(),
-                    isGpu = true,
-                    dispatcher,
-                    pu,
-                    memory,
-                    CpuPowerModels.linear(250.0, 100.0),
-                )
-
-            // Load 1 GiB into GPU memory
-            device.load(1000)
-            assertEquals(1140, timeSource.millis())
-
-            coroutineScope {
-                launch { device.compute(1e6) }
-                launch { device.compute(2e6) }
-            }
-
-            device.close()
-
-            val stats = device.getDeviceStats()
-
-            assertAll(
-                { assertEquals(3681, timeSource.millis()) },
-                { assertEquals(749.25, stats.energyUsage) },
-            )
-        }
-}
author	Dante Niewenhuis <d.niewenhuis@hotmail.com>	2024-10-25 13:32:41 +0200
committer	GitHub <noreply@github.com>	2024-10-25 13:32:41 +0200
commit	5a365dbc068f2a8cdfa9813c39cc84bb30e15637 (patch)
tree	72716d562787b85e03cdc7fe1d30c827054d25a0 /opendc-experiments/opendc-experiments-tf20/src/test
parent	27f5b7dcb05aefdab9b762175d538931face0aba (diff)