diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-05-06 10:19:47 +0200 |
|---|---|---|
| committer | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-05-06 17:45:40 +0200 |
| commit | 8e3905273c7a3f2df4df5d5840e4088d99b0dffb (patch) | |
| tree | 65fc68b4fb23e1254c8e9374346824b264d350c2 /opendc-experiments | |
| parent | ddb57f774415579f97d43a5097381a816c7015ca (diff) | |
refactor(exp/tf20): Directly expose device stats stats to user
This change updates the `TFDevice` interface to directly expose
statistics about the accelerator device to the user. Previously, the
user had to access these values through OpenTelemetry, which required
substantial extra work.
Diffstat (limited to 'opendc-experiments')
5 files changed, 63 insertions, 12 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts index 43093abf..5762ce64 100644 --- a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts +++ b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts @@ -36,8 +36,7 @@ dependencies { implementation(projects.opendcCommon) implementation(libs.kotlin.logging) - implementation(libs.jackson.module.kotlin) { - exclude(group = "org.jetbrains.kotlin", module = "kotlin-reflect") - } - implementation("org.jetbrains.kotlin:kotlin-reflect:1.6.10") + implementation(libs.jackson.module.kotlin) + + testImplementation(libs.slf4j.simple) } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt index 5245261c..99948c8e 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt @@ -22,7 +22,6 @@ package org.opendc.experiments.tf20.core -import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.api.metrics.Meter import kotlinx.coroutines.* import org.opendc.simulator.compute.SimBareMetalMachine @@ -52,7 +51,7 @@ public class SimTFDevice( context: CoroutineContext, clock: Clock, meter: Meter, - private val pu: ProcessingUnit, + pu: ProcessingUnit, private val memory: MemoryUnit, powerModel: PowerModel ) : TFDevice { @@ -70,17 +69,13 @@ public class SimTFDevice( ) /** - * The identifier of a device. - */ - private val deviceId = AttributeKey.stringKey("device.id") - - /** * The usage of the device. */ private val _usage = meter.histogramBuilder("device.usage") .setDescription("The amount of device resources used") .setUnit("MHz") .build() + private var _resourceUsage = 0.0 /** * The power draw of the device. @@ -89,6 +84,8 @@ public class SimTFDevice( .setDescription("The power draw of the device") .setUnit("W") .build() + private var _powerUsage = 0.0 + private var _energyUsage = 0.0 /** * The workload that will be run by the device. @@ -175,7 +172,10 @@ public class SimTFDevice( override fun onConverge(conn: FlowConnection, now: Long) { _usage.record(conn.rate) + _resourceUsage = conn.rate _power.record(machine.psu.powerDraw) + _powerUsage = machine.powerUsage + _energyUsage = machine.energyUsage } } @@ -197,6 +197,10 @@ public class SimTFDevice( } } + override fun getDeviceStats(): TFDeviceStats { + return TFDeviceStats(_resourceUsage, _powerUsage, _energyUsage) + } + override fun close() { machine.cancel() scope.cancel() diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt index bbc34ed9..839ed8a9 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt @@ -47,4 +47,9 @@ public interface TFDevice : AutoCloseable { * Perform [flops] amount of computation on the device. */ public suspend fun compute(flops: Double) + + /** + * Collect device statistics. + */ + public fun getDeviceStats(): TFDeviceStats } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt new file mode 100644 index 00000000..016d2a8b --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.core + +/** + * Statistics about a TensorFlow [TFDevice]. + * + * @property resourceUsage The resource usage of the device (in MHz). + * @property powerUsage The instantaneous power draw of the device (in W). + * @property energyUsage Cumulative energy usage of the device since boot (in J). + */ +data class TFDeviceStats( + val resourceUsage: Double, + val powerUsage: Double, + val energyUsage: Double +) diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt index 28a2a319..0d5fbebb 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt @@ -25,6 +25,7 @@ package org.opendc.experiments.tf20.core import io.opentelemetry.api.metrics.MeterProvider import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.launch +import org.junit.jupiter.api.Assertions.assertAll import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test import org.opendc.simulator.compute.model.MemoryUnit @@ -57,6 +58,12 @@ internal class SimTFDeviceTest { launch { device.compute(1e6) } launch { device.compute(2e6) } } - assertEquals(3681, clock.millis()) + + val stats = device.getDeviceStats() + + assertAll( + { assertEquals(3681, clock.millis()) }, + { assertEquals(325.75, stats.energyUsage) } + ) } } |
