summaryrefslogtreecommitdiff
path: root/opendc-experiments
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2022-05-06 19:04:03 +0200
committerGitHub <noreply@github.com>2022-05-06 19:04:03 +0200
commitc3d8d967f82f39f1ef461d5687eb68fb867336c5 (patch)
tree2e9938f63c42e5d02fe203e049377d1d17b5d782 /opendc-experiments
parenta9657e4fa3b15e2c1c11884b5a250b0861bcc21d (diff)
parent260e2228afea08868e8f7f07233b1861b2d7f0c7 (diff)
merge: Move OpenTelemetry integration outside core modules (#81)
This change removes the OpenTelemetry integration from the OpenDC modules. Previously, we chose to integrate OpenTelemetry to provide a unified way to report metrics to the users. Although this worked as expected, the overhead of the OpenTelemetry when collecting metrics during simulation was considerable and lacked more optimization opportunities (other than providing a separate API implementation). Furthermore, since we were tied to OpenTelemetry's SDK implementation, we experienced issues with throttling and registering multiple instruments. We will instead use another approach, where we expose the core metrics in OpenDC via specialized interfaces (see #80) such that access is fast and can be done without having to interface with OpenTelemetry. In addition, we will provide an adapter to that is able to forward these metrics to OpenTelemetry implementations, so we can still integrate with the wider ecosystem. ## Implementation Notes :hammer_and_pick: * Remove OpenTelemetry from "compute" modules * Remove OpenTelemetry from "workflow" modules * Remove OpenTelemetry from "FaaS" modules * Remove OpenTelemetry from TF20 experiment * Remove dependency on OpenTelemetry SDK ## External Dependencies :four_leaf_clover: * N/A ## Breaking API Changes :warning: * Metrics are not anymore directly exposed via OpenTelemetry. Instead, an adapter needs to be used to access the data via OpenTelemetry.
Diffstat (limited to 'opendc-experiments')
-rw-r--r--opendc-experiments/opendc-experiments-capelin/build.gradle.kts3
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/jmh/kotlin/org/opendc/experiments/capelin/CapelinBenchmarks.kt6
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/jmh/resources/topology.txt5
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt5
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt11
-rw-r--r--opendc-experiments/opendc-experiments-serverless20/build.gradle.kts1
-rw-r--r--opendc-experiments/opendc-experiments-serverless20/src/main/kotlin/org/opendc/experiments/serverless/ServerlessExperiment.kt16
-rw-r--r--opendc-experiments/opendc-experiments-tf20/build.gradle.kts1
-rw-r--r--opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt13
-rw-r--r--opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt18
-rw-r--r--opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt14
11 files changed, 29 insertions, 64 deletions
diff --git a/opendc-experiments/opendc-experiments-capelin/build.gradle.kts b/opendc-experiments/opendc-experiments-capelin/build.gradle.kts
index 9495f4ca..39cf101d 100644
--- a/opendc-experiments/opendc-experiments-capelin/build.gradle.kts
+++ b/opendc-experiments/opendc-experiments-capelin/build.gradle.kts
@@ -37,8 +37,6 @@ dependencies {
implementation(projects.opendcSimulator.opendcSimulatorCore)
implementation(projects.opendcSimulator.opendcSimulatorCompute)
implementation(projects.opendcCompute.opendcComputeSimulator)
- implementation(projects.opendcTelemetry.opendcTelemetrySdk)
- implementation(projects.opendcTelemetry.opendcTelemetryCompute)
implementation(libs.config)
implementation(libs.kotlin.logging)
@@ -46,7 +44,6 @@ dependencies {
implementation(libs.jackson.module.kotlin)
implementation(libs.jackson.dataformat.csv)
implementation(kotlin("reflect"))
- implementation(libs.opentelemetry.semconv)
runtimeOnly(projects.opendcTrace.opendcTraceOpendc)
diff --git a/opendc-experiments/opendc-experiments-capelin/src/jmh/kotlin/org/opendc/experiments/capelin/CapelinBenchmarks.kt b/opendc-experiments/opendc-experiments-capelin/src/jmh/kotlin/org/opendc/experiments/capelin/CapelinBenchmarks.kt
index 83b8c0c6..fd2c26f0 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/jmh/kotlin/org/opendc/experiments/capelin/CapelinBenchmarks.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/jmh/kotlin/org/opendc/experiments/capelin/CapelinBenchmarks.kt
@@ -22,14 +22,12 @@
package org.opendc.experiments.capelin
-import kotlinx.coroutines.ExperimentalCoroutinesApi
import org.opendc.compute.service.scheduler.FilterScheduler
import org.opendc.compute.service.scheduler.filters.ComputeFilter
import org.opendc.compute.service.scheduler.filters.RamFilter
import org.opendc.compute.service.scheduler.filters.VCpuFilter
import org.opendc.compute.service.scheduler.weights.CoreRamWeigher
import org.opendc.compute.workload.*
-import org.opendc.compute.workload.telemetry.NoopTelemetryManager
import org.opendc.compute.workload.topology.Topology
import org.opendc.compute.workload.topology.apply
import org.opendc.experiments.capelin.topology.clusterTopology
@@ -46,7 +44,6 @@ import java.util.concurrent.TimeUnit
@Fork(1)
@Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
-@OptIn(ExperimentalCoroutinesApi::class)
class CapelinBenchmarks {
private lateinit var vms: List<VirtualMachine>
private lateinit var topology: Topology
@@ -59,7 +56,7 @@ class CapelinBenchmarks {
val loader = ComputeWorkloadLoader(File("src/test/resources/trace"))
val source = trace("bitbrains-small")
vms = source.resolve(loader, Random(1L)).vms
- topology = checkNotNull(object {}.javaClass.getResourceAsStream("/env/topology.txt")).use { clusterTopology(it) }
+ topology = checkNotNull(object {}.javaClass.getResourceAsStream("/topology.txt")).use { clusterTopology(it) }
}
@Benchmark
@@ -71,7 +68,6 @@ class CapelinBenchmarks {
val runner = ComputeServiceHelper(
coroutineContext,
clock,
- NoopTelemetryManager(),
computeScheduler
)
diff --git a/opendc-experiments/opendc-experiments-capelin/src/jmh/resources/topology.txt b/opendc-experiments/opendc-experiments-capelin/src/jmh/resources/topology.txt
new file mode 100644
index 00000000..6b347bff
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-capelin/src/jmh/resources/topology.txt
@@ -0,0 +1,5 @@
+ClusterID;ClusterName;Cores;Speed;Memory;numberOfHosts;memoryCapacityPerHost;coreCountPerHost
+A01;A01;32;3.2;2048;1;256;32
+B01;B01;48;2.93;1256;6;64;8
+C01;C01;32;3.2;2048;2;128;16
+
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
index 6fd85e8c..0de8aa7b 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
@@ -30,7 +30,7 @@ import org.opendc.compute.workload.ComputeWorkloadLoader
import org.opendc.compute.workload.createComputeScheduler
import org.opendc.compute.workload.export.parquet.ParquetComputeMonitor
import org.opendc.compute.workload.grid5000
-import org.opendc.compute.workload.telemetry.NoopTelemetryManager
+import org.opendc.compute.workload.telemetry.ComputeMetricReader
import org.opendc.compute.workload.topology.apply
import org.opendc.experiments.capelin.model.OperationalPhenomena
import org.opendc.experiments.capelin.model.Topology
@@ -39,7 +39,6 @@ import org.opendc.experiments.capelin.topology.clusterTopology
import org.opendc.harness.dsl.Experiment
import org.opendc.harness.dsl.anyOf
import org.opendc.simulator.core.runBlockingSimulation
-import org.opendc.telemetry.compute.ComputeMetricReader
import java.io.File
import java.time.Duration
import java.util.*
@@ -99,11 +98,9 @@ abstract class Portfolio(name: String) : Experiment(name) {
else
null
val (vms, interferenceModel) = workload.source.resolve(workloadLoader, seeder)
- val telemetry = NoopTelemetryManager()
val runner = ComputeServiceHelper(
coroutineContext,
clock,
- telemetry,
computeScheduler,
failureModel,
interferenceModel?.withSeed(repeat.toLong())
diff --git a/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt b/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
index 62cdf123..fa2cd9c8 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
@@ -33,14 +33,13 @@ import org.opendc.compute.service.scheduler.filters.RamFilter
import org.opendc.compute.service.scheduler.filters.VCpuFilter
import org.opendc.compute.service.scheduler.weights.CoreRamWeigher
import org.opendc.compute.workload.*
-import org.opendc.compute.workload.telemetry.NoopTelemetryManager
+import org.opendc.compute.workload.telemetry.ComputeMetricReader
+import org.opendc.compute.workload.telemetry.ComputeMonitor
+import org.opendc.compute.workload.telemetry.table.HostTableReader
import org.opendc.compute.workload.topology.Topology
import org.opendc.compute.workload.topology.apply
import org.opendc.experiments.capelin.topology.clusterTopology
import org.opendc.simulator.core.runBlockingSimulation
-import org.opendc.telemetry.compute.ComputeMetricReader
-import org.opendc.telemetry.compute.ComputeMonitor
-import org.opendc.telemetry.compute.table.HostTableReader
import java.io.File
import java.time.Duration
import java.util.*
@@ -86,7 +85,6 @@ class CapelinIntegrationTest {
val runner = ComputeServiceHelper(
coroutineContext,
clock,
- NoopTelemetryManager(),
computeScheduler
)
val topology = createTopology()
@@ -136,7 +134,6 @@ class CapelinIntegrationTest {
val runner = ComputeServiceHelper(
coroutineContext,
clock,
- NoopTelemetryManager(),
computeScheduler
)
val topology = createTopology("single")
@@ -182,7 +179,6 @@ class CapelinIntegrationTest {
val simulator = ComputeServiceHelper(
coroutineContext,
clock,
- NoopTelemetryManager(),
computeScheduler,
interferenceModel = interferenceModel?.withSeed(seed.toLong())
)
@@ -226,7 +222,6 @@ class CapelinIntegrationTest {
val simulator = ComputeServiceHelper(
coroutineContext,
clock,
- NoopTelemetryManager(),
computeScheduler,
grid5000(Duration.ofDays(7))
)
diff --git a/opendc-experiments/opendc-experiments-serverless20/build.gradle.kts b/opendc-experiments/opendc-experiments-serverless20/build.gradle.kts
index b96647a6..a6391986 100644
--- a/opendc-experiments/opendc-experiments-serverless20/build.gradle.kts
+++ b/opendc-experiments/opendc-experiments-serverless20/build.gradle.kts
@@ -33,7 +33,6 @@ dependencies {
implementation(projects.opendcSimulator.opendcSimulatorCore)
implementation(projects.opendcFaas.opendcFaasService)
implementation(projects.opendcFaas.opendcFaasSimulator)
- implementation(projects.opendcTelemetry.opendcTelemetrySdk)
implementation(libs.kotlin.logging)
implementation(libs.config)
}
diff --git a/opendc-experiments/opendc-experiments-serverless20/src/main/kotlin/org/opendc/experiments/serverless/ServerlessExperiment.kt b/opendc-experiments/opendc-experiments-serverless20/src/main/kotlin/org/opendc/experiments/serverless/ServerlessExperiment.kt
index 3312d6c0..1c357f67 100644
--- a/opendc-experiments/opendc-experiments-serverless20/src/main/kotlin/org/opendc/experiments/serverless/ServerlessExperiment.kt
+++ b/opendc-experiments/opendc-experiments-serverless20/src/main/kotlin/org/opendc/experiments/serverless/ServerlessExperiment.kt
@@ -23,8 +23,6 @@
package org.opendc.experiments.serverless
import com.typesafe.config.ConfigFactory
-import io.opentelemetry.api.metrics.MeterProvider
-import io.opentelemetry.sdk.metrics.SdkMeterProvider
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
@@ -44,7 +42,6 @@ import org.opendc.simulator.compute.model.MemoryUnit
import org.opendc.simulator.compute.model.ProcessingNode
import org.opendc.simulator.compute.model.ProcessingUnit
import org.opendc.simulator.core.runBlockingSimulation
-import org.opendc.telemetry.sdk.toOtelClock
import java.io.File
import java.time.Duration
import java.util.*
@@ -76,17 +73,18 @@ public class ServerlessExperiment : Experiment("Serverless") {
private val coldStartModel by anyOf(ColdStartModel.LAMBDA, ColdStartModel.AZURE, ColdStartModel.GOOGLE)
override fun doRun(repeat: Int): Unit = runBlockingSimulation {
- val meterProvider: MeterProvider = SdkMeterProvider
- .builder()
- .setClock(clock.toOtelClock())
- .build()
-
val trace = ServerlessTraceReader().parse(File(config.getString("trace-path")))
val traceById = trace.associateBy { it.id }
val delayInjector = StochasticDelayInjector(coldStartModel, Random())
val deployer = SimFunctionDeployer(clock, this, createMachineModel(), delayInjector) { FunctionTraceWorkload(traceById.getValue(it.name)) }
val service =
- FaaSService(coroutineContext, clock, meterProvider, deployer, routingPolicy, FunctionTerminationPolicyFixed(coroutineContext, clock, timeout = Duration.ofMinutes(10)))
+ FaaSService(
+ coroutineContext,
+ clock,
+ deployer,
+ routingPolicy,
+ FunctionTerminationPolicyFixed(coroutineContext, clock, timeout = Duration.ofMinutes(10))
+ )
val client = service.newClient()
coroutineScope {
diff --git a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
index 5762ce64..f61c8fef 100644
--- a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
+++ b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
@@ -32,7 +32,6 @@ dependencies {
api(projects.opendcHarness.opendcHarnessApi)
implementation(projects.opendcSimulator.opendcSimulatorCore)
implementation(projects.opendcSimulator.opendcSimulatorCompute)
- implementation(projects.opendcTelemetry.opendcTelemetrySdk)
implementation(projects.opendcCommon)
implementation(libs.kotlin.logging)
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt
index 2153a862..19236029 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt
@@ -22,8 +22,6 @@
package org.opendc.experiments.tf20
-import io.opentelemetry.api.metrics.MeterProvider
-import io.opentelemetry.sdk.metrics.SdkMeterProvider
import org.opendc.experiments.tf20.core.SimTFDevice
import org.opendc.experiments.tf20.distribute.*
import org.opendc.experiments.tf20.keras.AlexNet
@@ -32,7 +30,6 @@ import org.opendc.harness.dsl.Experiment
import org.opendc.harness.dsl.anyOf
import org.opendc.simulator.compute.power.LinearPowerModel
import org.opendc.simulator.core.runBlockingSimulation
-import org.opendc.telemetry.sdk.toOtelClock
/**
* Experiments with the TensorFlow simulation model.
@@ -49,17 +46,11 @@ public class TensorFlowExperiment : Experiment(name = "tf20") {
private val batchSize by anyOf(16, 32, 64, 128)
override fun doRun(repeat: Int): Unit = runBlockingSimulation {
- val meterProvider: MeterProvider = SdkMeterProvider
- .builder()
- .setClock(clock.toOtelClock())
- .build()
- val meter = meterProvider.get("opendc-tf20")
-
val envInput = checkNotNull(TensorFlowExperiment::class.java.getResourceAsStream(environmentFile))
val def = MLEnvironmentReader().readEnvironment(envInput).first()
val device = SimTFDevice(
- def.uid, def.meta["gpu"] as Boolean, coroutineContext, clock, meter, def.model.cpus[0],
- def.model.memory[0], LinearPowerModel(250.0, 60.0)
+ def.uid, def.meta["gpu"] as Boolean, coroutineContext, clock, def.model.cpus[0], def.model.memory[0],
+ LinearPowerModel(250.0, 60.0)
)
val strategy = OneDeviceStrategy(device)
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt
index 99948c8e..d2105196 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt
@@ -22,7 +22,6 @@
package org.opendc.experiments.tf20.core
-import io.opentelemetry.api.metrics.Meter
import kotlinx.coroutines.*
import org.opendc.simulator.compute.SimBareMetalMachine
import org.opendc.simulator.compute.SimMachine
@@ -50,7 +49,6 @@ public class SimTFDevice(
override val isGpu: Boolean,
context: CoroutineContext,
clock: Clock,
- meter: Meter,
pu: ProcessingUnit,
private val memory: MemoryUnit,
powerModel: PowerModel
@@ -69,21 +67,9 @@ public class SimTFDevice(
)
/**
- * The usage of the device.
+ * Metrics collected by the device.
*/
- private val _usage = meter.histogramBuilder("device.usage")
- .setDescription("The amount of device resources used")
- .setUnit("MHz")
- .build()
private var _resourceUsage = 0.0
-
- /**
- * The power draw of the device.
- */
- private val _power = meter.histogramBuilder("device.power")
- .setDescription("The power draw of the device")
- .setUnit("W")
- .build()
private var _powerUsage = 0.0
private var _energyUsage = 0.0
@@ -171,9 +157,7 @@ public class SimTFDevice(
}
override fun onConverge(conn: FlowConnection, now: Long) {
- _usage.record(conn.rate)
_resourceUsage = conn.rate
- _power.record(machine.psu.powerDraw)
_powerUsage = machine.powerUsage
_energyUsage = machine.energyUsage
}
diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
index 0d5fbebb..fd18a3a7 100644
--- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
+++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt
@@ -22,7 +22,6 @@
package org.opendc.experiments.tf20.core
-import io.opentelemetry.api.metrics.MeterProvider
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.launch
import org.junit.jupiter.api.Assertions.assertAll
@@ -41,14 +40,19 @@ import java.util.*
internal class SimTFDeviceTest {
@Test
fun testSmoke() = runBlockingSimulation {
- val meterProvider: MeterProvider = MeterProvider.noop()
- val meter = meterProvider.get("opendc-tf20")
-
val puNode = ProcessingNode("NVIDIA", "Tesla V100", "unknown", 1)
val pu = ProcessingUnit(puNode, 0, 960 * 1230.0)
val memory = MemoryUnit("NVIDIA", "Tesla V100", 877.0, 32_000)
- val device = SimTFDevice(UUID.randomUUID(), isGpu = true, coroutineContext, clock, meter, pu, memory, LinearPowerModel(250.0, 100.0))
+ val device = SimTFDevice(
+ UUID.randomUUID(),
+ isGpu = true,
+ coroutineContext,
+ clock,
+ pu,
+ memory,
+ LinearPowerModel(250.0, 100.0)
+ )
// Load 1 GiB into GPU memory
device.load(1000)