From 0e8ad565a78dd194e687003e5ccc8ccf9b28667f Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 6 May 2022 10:22:35 +0200 Subject: refactor(exp/tf20): Remove OpenTelemetry from TF20 experiment This change removes the OpenTelemetry integration from the OpenDC Tensorflow 2020 experiments. Previously, we chose to integrate OpenTelemetry to provide a unified way to report metrics to the users. See the previous commit removing it from the "Compute" modules for the reasoning behind this change. --- .../opendc-experiments-tf20/build.gradle.kts | 1 - .../opendc/experiments/tf20/TensorFlowExperiment.kt | 13 ++----------- .../org/opendc/experiments/tf20/core/SimTFDevice.kt | 18 +----------------- .../opendc/experiments/tf20/core/SimTFDeviceTest.kt | 14 +++++++++----- 4 files changed, 12 insertions(+), 34 deletions(-) (limited to 'opendc-experiments') diff --git a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts index 5762ce64..f61c8fef 100644 --- a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts +++ b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts @@ -32,7 +32,6 @@ dependencies { api(projects.opendcHarness.opendcHarnessApi) implementation(projects.opendcSimulator.opendcSimulatorCore) implementation(projects.opendcSimulator.opendcSimulatorCompute) - implementation(projects.opendcTelemetry.opendcTelemetrySdk) implementation(projects.opendcCommon) implementation(libs.kotlin.logging) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt index 2153a862..19236029 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt @@ -22,8 +22,6 @@ package org.opendc.experiments.tf20 -import io.opentelemetry.api.metrics.MeterProvider -import io.opentelemetry.sdk.metrics.SdkMeterProvider import org.opendc.experiments.tf20.core.SimTFDevice import org.opendc.experiments.tf20.distribute.* import org.opendc.experiments.tf20.keras.AlexNet @@ -32,7 +30,6 @@ import org.opendc.harness.dsl.Experiment import org.opendc.harness.dsl.anyOf import org.opendc.simulator.compute.power.LinearPowerModel import org.opendc.simulator.core.runBlockingSimulation -import org.opendc.telemetry.sdk.toOtelClock /** * Experiments with the TensorFlow simulation model. @@ -49,17 +46,11 @@ public class TensorFlowExperiment : Experiment(name = "tf20") { private val batchSize by anyOf(16, 32, 64, 128) override fun doRun(repeat: Int): Unit = runBlockingSimulation { - val meterProvider: MeterProvider = SdkMeterProvider - .builder() - .setClock(clock.toOtelClock()) - .build() - val meter = meterProvider.get("opendc-tf20") - val envInput = checkNotNull(TensorFlowExperiment::class.java.getResourceAsStream(environmentFile)) val def = MLEnvironmentReader().readEnvironment(envInput).first() val device = SimTFDevice( - def.uid, def.meta["gpu"] as Boolean, coroutineContext, clock, meter, def.model.cpus[0], - def.model.memory[0], LinearPowerModel(250.0, 60.0) + def.uid, def.meta["gpu"] as Boolean, coroutineContext, clock, def.model.cpus[0], def.model.memory[0], + LinearPowerModel(250.0, 60.0) ) val strategy = OneDeviceStrategy(device) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt index 99948c8e..d2105196 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt @@ -22,7 +22,6 @@ package org.opendc.experiments.tf20.core -import io.opentelemetry.api.metrics.Meter import kotlinx.coroutines.* import org.opendc.simulator.compute.SimBareMetalMachine import org.opendc.simulator.compute.SimMachine @@ -50,7 +49,6 @@ public class SimTFDevice( override val isGpu: Boolean, context: CoroutineContext, clock: Clock, - meter: Meter, pu: ProcessingUnit, private val memory: MemoryUnit, powerModel: PowerModel @@ -69,21 +67,9 @@ public class SimTFDevice( ) /** - * The usage of the device. + * Metrics collected by the device. */ - private val _usage = meter.histogramBuilder("device.usage") - .setDescription("The amount of device resources used") - .setUnit("MHz") - .build() private var _resourceUsage = 0.0 - - /** - * The power draw of the device. - */ - private val _power = meter.histogramBuilder("device.power") - .setDescription("The power draw of the device") - .setUnit("W") - .build() private var _powerUsage = 0.0 private var _energyUsage = 0.0 @@ -171,9 +157,7 @@ public class SimTFDevice( } override fun onConverge(conn: FlowConnection, now: Long) { - _usage.record(conn.rate) _resourceUsage = conn.rate - _power.record(machine.psu.powerDraw) _powerUsage = machine.powerUsage _energyUsage = machine.energyUsage } diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt index 0d5fbebb..fd18a3a7 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt @@ -22,7 +22,6 @@ package org.opendc.experiments.tf20.core -import io.opentelemetry.api.metrics.MeterProvider import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.launch import org.junit.jupiter.api.Assertions.assertAll @@ -41,14 +40,19 @@ import java.util.* internal class SimTFDeviceTest { @Test fun testSmoke() = runBlockingSimulation { - val meterProvider: MeterProvider = MeterProvider.noop() - val meter = meterProvider.get("opendc-tf20") - val puNode = ProcessingNode("NVIDIA", "Tesla V100", "unknown", 1) val pu = ProcessingUnit(puNode, 0, 960 * 1230.0) val memory = MemoryUnit("NVIDIA", "Tesla V100", 877.0, 32_000) - val device = SimTFDevice(UUID.randomUUID(), isGpu = true, coroutineContext, clock, meter, pu, memory, LinearPowerModel(250.0, 100.0)) + val device = SimTFDevice( + UUID.randomUUID(), + isGpu = true, + coroutineContext, + clock, + pu, + memory, + LinearPowerModel(250.0, 100.0) + ) // Load 1 GiB into GPU memory device.load(1000) -- cgit v1.2.3