From 8e3905273c7a3f2df4df5d5840e4088d99b0dffb Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 6 May 2022 10:19:47 +0200 Subject: refactor(exp/tf20): Directly expose device stats stats to user This change updates the `TFDevice` interface to directly expose statistics about the accelerator device to the user. Previously, the user had to access these values through OpenTelemetry, which required substantial extra work. --- .../opendc/experiments/tf20/core/SimTFDevice.kt | 18 ++++++----- .../org/opendc/experiments/tf20/core/TFDevice.kt | 5 +++ .../opendc/experiments/tf20/core/TFDeviceStats.kt | 36 ++++++++++++++++++++++ .../experiments/tf20/core/SimTFDeviceTest.kt | 9 +++++- 4 files changed, 60 insertions(+), 8 deletions(-) create mode 100644 opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt (limited to 'opendc-experiments/opendc-experiments-tf20/src') diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt index 5245261c..99948c8e 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt @@ -22,7 +22,6 @@ package org.opendc.experiments.tf20.core -import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.api.metrics.Meter import kotlinx.coroutines.* import org.opendc.simulator.compute.SimBareMetalMachine @@ -52,7 +51,7 @@ public class SimTFDevice( context: CoroutineContext, clock: Clock, meter: Meter, - private val pu: ProcessingUnit, + pu: ProcessingUnit, private val memory: MemoryUnit, powerModel: PowerModel ) : TFDevice { @@ -69,11 +68,6 @@ public class SimTFDevice( SimplePowerDriver(powerModel) ) - /** - * The identifier of a device. - */ - private val deviceId = AttributeKey.stringKey("device.id") - /** * The usage of the device. */ @@ -81,6 +75,7 @@ public class SimTFDevice( .setDescription("The amount of device resources used") .setUnit("MHz") .build() + private var _resourceUsage = 0.0 /** * The power draw of the device. @@ -89,6 +84,8 @@ public class SimTFDevice( .setDescription("The power draw of the device") .setUnit("W") .build() + private var _powerUsage = 0.0 + private var _energyUsage = 0.0 /** * The workload that will be run by the device. @@ -175,7 +172,10 @@ public class SimTFDevice( override fun onConverge(conn: FlowConnection, now: Long) { _usage.record(conn.rate) + _resourceUsage = conn.rate _power.record(machine.psu.powerDraw) + _powerUsage = machine.powerUsage + _energyUsage = machine.energyUsage } } @@ -197,6 +197,10 @@ public class SimTFDevice( } } + override fun getDeviceStats(): TFDeviceStats { + return TFDeviceStats(_resourceUsage, _powerUsage, _energyUsage) + } + override fun close() { machine.cancel() scope.cancel() diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt index bbc34ed9..839ed8a9 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt @@ -47,4 +47,9 @@ public interface TFDevice : AutoCloseable { * Perform [flops] amount of computation on the device. */ public suspend fun compute(flops: Double) + + /** + * Collect device statistics. + */ + public fun getDeviceStats(): TFDeviceStats } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt new file mode 100644 index 00000000..016d2a8b --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.core + +/** + * Statistics about a TensorFlow [TFDevice]. + * + * @property resourceUsage The resource usage of the device (in MHz). + * @property powerUsage The instantaneous power draw of the device (in W). + * @property energyUsage Cumulative energy usage of the device since boot (in J). + */ +data class TFDeviceStats( + val resourceUsage: Double, + val powerUsage: Double, + val energyUsage: Double +) diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt index 28a2a319..0d5fbebb 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt @@ -25,6 +25,7 @@ package org.opendc.experiments.tf20.core import io.opentelemetry.api.metrics.MeterProvider import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.launch +import org.junit.jupiter.api.Assertions.assertAll import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test import org.opendc.simulator.compute.model.MemoryUnit @@ -57,6 +58,12 @@ internal class SimTFDeviceTest { launch { device.compute(1e6) } launch { device.compute(2e6) } } - assertEquals(3681, clock.millis()) + + val stats = device.getDeviceStats() + + assertAll( + { assertEquals(3681, clock.millis()) }, + { assertEquals(325.75, stats.energyUsage) } + ) } } -- cgit v1.2.3