diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-05-06 10:19:47 +0200 |
|---|---|---|
| committer | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-05-06 17:45:40 +0200 |
| commit | 8e3905273c7a3f2df4df5d5840e4088d99b0dffb (patch) | |
| tree | 65fc68b4fb23e1254c8e9374346824b264d350c2 | |
| parent | ddb57f774415579f97d43a5097381a816c7015ca (diff) | |
refactor(exp/tf20): Directly expose device stats stats to user
This change updates the `TFDevice` interface to directly expose
statistics about the accelerator device to the user. Previously, the
user had to access these values through OpenTelemetry, which required
substantial extra work.
5 files changed, 63 insertions, 12 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts index 43093abf..5762ce64 100644 --- a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts +++ b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts @@ -36,8 +36,7 @@ dependencies { implementation(projects.opendcCommon) implementation(libs.kotlin.logging) - implementation(libs.jackson.module.kotlin) { - exclude(group = "org.jetbrains.kotlin", module = "kotlin-reflect") - } - implementation("org.jetbrains.kotlin:kotlin-reflect:1.6.10") + implementation(libs.jackson.module.kotlin) + + testImplementation(libs.slf4j.simple) } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt index 5245261c..99948c8e 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt @@ -22,7 +22,6 @@ package org.opendc.experiments.tf20.core -import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.api.metrics.Meter import kotlinx.coroutines.* import org.opendc.simulator.compute.SimBareMetalMachine @@ -52,7 +51,7 @@ public class SimTFDevice( context: CoroutineContext, clock: Clock, meter: Meter, - private val pu: ProcessingUnit, + pu: ProcessingUnit, private val memory: MemoryUnit, powerModel: PowerModel ) : TFDevice { @@ -70,17 +69,13 @@ public class SimTFDevice( ) /** - * The identifier of a device. - */ - private val deviceId = AttributeKey.stringKey("device.id") - - /** * The usage of the device. */ private val _usage = meter.histogramBuilder("device.usage") .setDescription("The amount of device resources used") .setUnit("MHz") .build() + private var _resourceUsage = 0.0 /** * The power draw of the device. @@ -89,6 +84,8 @@ public class SimTFDevice( .setDescription("The power draw of the device") .setUnit("W") .build() + private var _powerUsage = 0.0 + private var _energyUsage = 0.0 /** * The workload that will be run by the device. @@ -175,7 +172,10 @@ public class SimTFDevice( override fun onConverge(conn: FlowConnection, now: Long) { _usage.record(conn.rate) + _resourceUsage = conn.rate _power.record(machine.psu.powerDraw) + _powerUsage = machine.powerUsage + _energyUsage = machine.energyUsage } } @@ -197,6 +197,10 @@ public class SimTFDevice( } } + override fun getDeviceStats(): TFDeviceStats { + return TFDeviceStats(_resourceUsage, _powerUsage, _energyUsage) + } + override fun close() { machine.cancel() scope.cancel() diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt index bbc34ed9..839ed8a9 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt @@ -47,4 +47,9 @@ public interface TFDevice : AutoCloseable { * Perform [flops] amount of computation on the device. */ public suspend fun compute(flops: Double) + + /** + * Collect device statistics. + */ + public fun getDeviceStats(): TFDeviceStats } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt new file mode 100644 index 00000000..016d2a8b --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.core + +/** + * Statistics about a TensorFlow [TFDevice]. + * + * @property resourceUsage The resource usage of the device (in MHz). + * @property powerUsage The instantaneous power draw of the device (in W). + * @property energyUsage Cumulative energy usage of the device since boot (in J). + */ +data class TFDeviceStats( + val resourceUsage: Double, + val powerUsage: Double, + val energyUsage: Double +) diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt index 28a2a319..0d5fbebb 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt @@ -25,6 +25,7 @@ package org.opendc.experiments.tf20.core import io.opentelemetry.api.metrics.MeterProvider import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.launch +import org.junit.jupiter.api.Assertions.assertAll import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test import org.opendc.simulator.compute.model.MemoryUnit @@ -57,6 +58,12 @@ internal class SimTFDeviceTest { launch { device.compute(1e6) } launch { device.compute(2e6) } } - assertEquals(3681, clock.millis()) + + val stats = device.getDeviceStats() + + assertAll( + { assertEquals(3681, clock.millis()) }, + { assertEquals(325.75, stats.energyUsage) } + ) } } |
