diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-05-04 16:24:53 +0200 |
|---|---|---|
| committer | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-05-06 18:37:36 +0200 |
| commit | c7eec7904e08029b3ab31d3e7b21afa1ea9ab7e6 (patch) | |
| tree | 459724b394f5aca35733582a024fd5d99d06a7a4 /opendc-compute/opendc-compute-simulator/src/main | |
| parent | a9657e4fa3b15e2c1c11884b5a250b0861bcc21d (diff) | |
refactor(compute/service): Remove OpenTelemetry from "compute" modules
This change removes the OpenTelemetry integration from the OpenDC
Compute modules. Previously, we chose to integrate OpenTelemetry to
provide a unified way to report metrics to the users.
Although this worked as expected, the overhead of the OpenTelemetry when
collecting metrics during simulation was considerable and lacked more
optimization opportunities (other than providing a separate API
implementation). Furthermore, since we were tied to OpenTelemetry's SDK
implementation, we experienced issues with throttling and registering
multiple instruments.
We will instead use another approach, where we expose the core metrics
in OpenDC via specialized interfaces (see the commits before) such that
access is fast and can be done without having to interface with
OpenTelemetry. In addition, we will provide an adapter to that is able
to forward these metrics to OpenTelemetry implementations, so we can
still integrate with the wider ecosystem.
Diffstat (limited to 'opendc-compute/opendc-compute-simulator/src/main')
2 files changed, 10 insertions, 305 deletions
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 323ae4fe..c28239b4 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -22,12 +22,6 @@ package org.opendc.compute.simulator -import io.opentelemetry.api.common.AttributeKey -import io.opentelemetry.api.common.Attributes -import io.opentelemetry.api.metrics.Meter -import io.opentelemetry.api.metrics.MeterProvider -import io.opentelemetry.api.metrics.ObservableDoubleMeasurement -import io.opentelemetry.api.metrics.ObservableLongMeasurement import kotlinx.coroutines.* import org.opendc.compute.api.Flavor import org.opendc.compute.api.Server @@ -67,7 +61,6 @@ public class SimHost( override val meta: Map<String, Any>, context: CoroutineContext, engine: FlowEngine, - meterProvider: MeterProvider, hypervisorProvider: SimHypervisorProvider, scalingGovernor: ScalingGovernor = PerformanceScalingGovernor(), powerDriver: PowerDriver = SimplePowerDriver(ConstantPowerModel(0.0)), @@ -86,11 +79,6 @@ public class SimHost( private val clock = engine.clock /** - * The [Meter] to track metrics of the simulated host. - */ - private val meter = meterProvider.get("org.opendc.compute.simulator") - - /** * The event listeners registered with this host. */ private val listeners = mutableListOf<HostListener>() @@ -142,48 +130,6 @@ public class SimHost( init { launch() - - meter.upDownCounterBuilder("system.guests") - .setDescription("Number of guests on this host") - .setUnit("1") - .buildWithCallback(::collectGuests) - meter.gaugeBuilder("system.cpu.limit") - .setDescription("Amount of CPU resources available to the host") - .buildWithCallback(::collectCpuLimit) - meter.gaugeBuilder("system.cpu.demand") - .setDescription("Amount of CPU resources the guests would use if there were no CPU contention or CPU limits") - .setUnit("MHz") - .buildWithCallback { result -> result.record(hypervisor.cpuDemand) } - meter.gaugeBuilder("system.cpu.usage") - .setDescription("Amount of CPU resources used by the host") - .setUnit("MHz") - .buildWithCallback { result -> result.record(hypervisor.cpuUsage) } - meter.gaugeBuilder("system.cpu.utilization") - .setDescription("Utilization of the CPU resources of the host") - .setUnit("%") - .buildWithCallback { result -> result.record(hypervisor.cpuUsage / _cpuLimit) } - meter.counterBuilder("system.cpu.time") - .setDescription("Amount of CPU time spent by the host") - .setUnit("s") - .buildWithCallback(::collectCpuTime) - meter.gaugeBuilder("system.power.usage") - .setDescription("Power usage of the host ") - .setUnit("W") - .buildWithCallback { result -> result.record(machine.powerUsage) } - meter.counterBuilder("system.power.total") - .setDescription("Amount of energy used by the CPU") - .setUnit("J") - .ofDoubles() - .buildWithCallback { result -> result.record(machine.energyUsage) } - meter.counterBuilder("system.time") - .setDescription("The uptime of the host") - .setUnit("s") - .buildWithCallback(::collectUptime) - meter.gaugeBuilder("system.time.boot") - .setDescription("The boot time of the host") - .setUnit("1") - .ofLongs() - .buildWithCallback(::collectBootTime) } override fun canFit(server: Server): Boolean { @@ -278,7 +224,7 @@ public class SimHost( return HostSystemStats( Duration.ofMillis(_uptime), Duration.ofMillis(_downtime), - Instant.ofEpochMilli(_bootTime), + _bootTime, machine.powerUsage, machine.energyUsage, terminated, @@ -358,7 +304,7 @@ public class SimHost( _ctx = machine.startWorkload(object : SimWorkload { override fun onStart(ctx: SimMachineContext) { try { - _bootTime = clock.millis() + _bootTime = clock.instant() _state = HostState.UP hypervisor.onStart(ctx) } catch (cause: Throwable) { @@ -422,80 +368,11 @@ public class SimHost( return MachineModel(processingUnits, memoryUnits) } - private val STATE_KEY = AttributeKey.stringKey("state") - - private val terminatedState = Attributes.of(STATE_KEY, "terminated") - private val runningState = Attributes.of(STATE_KEY, "running") - private val errorState = Attributes.of(STATE_KEY, "error") - private val invalidState = Attributes.of(STATE_KEY, "invalid") - - /** - * Helper function to collect the guest counts on this host. - */ - private fun collectGuests(result: ObservableLongMeasurement) { - var terminated = 0L - var running = 0L - var error = 0L - var invalid = 0L - - val guests = _guests.listIterator() - for (guest in guests) { - when (guest.state) { - ServerState.TERMINATED -> terminated++ - ServerState.RUNNING -> running++ - ServerState.ERROR -> error++ - ServerState.DELETED -> { - // Remove guests that have been deleted - this.guests.remove(guest.server) - guests.remove() - } - else -> invalid++ - } - } - - result.record(terminated, terminatedState) - result.record(running, runningState) - result.record(error, errorState) - result.record(invalid, invalidState) - } - - private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } - - /** - * Helper function to collect the CPU limits of a machine. - */ - private fun collectCpuLimit(result: ObservableDoubleMeasurement) { - result.record(_cpuLimit) - - val guests = _guests - for (i in guests.indices) { - guests[i].collectCpuLimit(result) - } - } - - private val _activeState = Attributes.of(STATE_KEY, "active") - private val _stealState = Attributes.of(STATE_KEY, "steal") - private val _lostState = Attributes.of(STATE_KEY, "lost") - private val _idleState = Attributes.of(STATE_KEY, "idle") - - /** - * Helper function to track the CPU time of a machine. - */ - private fun collectCpuTime(result: ObservableLongMeasurement) { - val stats = getCpuStats() - - result.record(stats.activeTime, _activeState) - result.record(stats.idleTime, _idleState) - result.record(stats.stealTime, _stealState) - result.record(stats.lostTime, _lostState) - - val guests = _guests - for (i in guests.indices) { - guests[i].collectCpuTime(result) - } - } - private var _lastReport = clock.millis() + private var _uptime = 0L + private var _downtime = 0L + private var _bootTime: Instant? = null + private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } /** * Helper function to track the uptime of a machine. @@ -517,40 +394,4 @@ public class SimHost( guests[i].updateUptime() } } - - private var _uptime = 0L - private var _downtime = 0L - private val _upState = Attributes.of(STATE_KEY, "up") - private val _downState = Attributes.of(STATE_KEY, "down") - - /** - * Helper function to track the uptime of a machine. - */ - private fun collectUptime(result: ObservableLongMeasurement) { - updateUptime() - - result.record(_uptime, _upState) - result.record(_downtime, _downState) - - val guests = _guests - for (i in guests.indices) { - guests[i].collectUptime(result) - } - } - - private var _bootTime = Long.MIN_VALUE - - /** - * Helper function to track the boot time of a machine. - */ - private fun collectBootTime(result: ObservableLongMeasurement) { - if (_bootTime != Long.MIN_VALUE) { - result.record(_bootTime) - } - - val guests = _guests - for (i in guests.indices) { - guests[i].collectBootTime(result) - } - } } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt index 0d4c550d..ea3c6549 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -22,12 +22,6 @@ package org.opendc.compute.simulator.internal -import io.opentelemetry.api.common.AttributeKey -import io.opentelemetry.api.common.Attributes -import io.opentelemetry.api.common.AttributesBuilder -import io.opentelemetry.api.metrics.ObservableDoubleMeasurement -import io.opentelemetry.api.metrics.ObservableLongMeasurement -import io.opentelemetry.semconv.resource.attributes.ResourceAttributes import kotlinx.coroutines.* import mu.KotlinLogging import org.opendc.compute.api.Server @@ -77,11 +71,6 @@ internal class Guest( var state: ServerState = ServerState.TERMINATED /** - * The attributes of the guest. - */ - val attributes: Attributes = GuestAttributes(this) - - /** * Start the guest. */ suspend fun start() { @@ -158,7 +147,7 @@ internal class Guest( return GuestSystemStats( Duration.ofMillis(_uptime), Duration.ofMillis(_downtime), - Instant.ofEpochMilli(_bootTime) + _bootTime ) } @@ -235,7 +224,7 @@ internal class Guest( * This method is invoked when the guest was started on the host and has booted into a running state. */ private fun onStart() { - _bootTime = clock.millis() + _bootTime = clock.instant() state = ServerState.RUNNING listener.onStart(this) } @@ -250,18 +239,11 @@ internal class Guest( listener.onStop(this) } - private val STATE_KEY = AttributeKey.stringKey("state") - private var _uptime = 0L private var _downtime = 0L - private val _upState = attributes.toBuilder() - .put(STATE_KEY, "up") - .build() - private val _downState = attributes.toBuilder() - .put(STATE_KEY, "down") - .build() - private var _lastReport = clock.millis() + private var _bootTime: Instant? = null + private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } /** * Helper function to track the uptime and downtime of the guest. @@ -277,122 +259,4 @@ internal class Guest( _downtime += duration } } - - /** - * Helper function to track the uptime of the guest. - */ - fun collectUptime(result: ObservableLongMeasurement) { - updateUptime() - - result.record(_uptime, _upState) - result.record(_downtime, _downState) - } - - private var _bootTime = Long.MIN_VALUE - - /** - * Helper function to track the boot time of the guest. - */ - fun collectBootTime(result: ObservableLongMeasurement) { - if (_bootTime != Long.MIN_VALUE) { - result.record(_bootTime, attributes) - } - } - - private val _activeState = attributes.toBuilder() - .put(STATE_KEY, "active") - .build() - private val _stealState = attributes.toBuilder() - .put(STATE_KEY, "steal") - .build() - private val _lostState = attributes.toBuilder() - .put(STATE_KEY, "lost") - .build() - private val _idleState = attributes.toBuilder() - .put(STATE_KEY, "idle") - .build() - - /** - * Helper function to track the CPU time of a machine. - */ - fun collectCpuTime(result: ObservableLongMeasurement) { - val counters = machine.counters - counters.flush() - - result.record(counters.cpuActiveTime / 1000, _activeState) - result.record(counters.cpuIdleTime / 1000, _idleState) - result.record(counters.cpuStealTime / 1000, _stealState) - result.record(counters.cpuLostTime / 1000, _lostState) - } - - private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } - - /** - * Helper function to collect the CPU limits of a machine. - */ - fun collectCpuLimit(result: ObservableDoubleMeasurement) { - result.record(_cpuLimit, attributes) - } - - /** - * An optimized [Attributes] implementation. - */ - private class GuestAttributes(private val uid: String, private val attributes: Attributes) : Attributes by attributes { - /** - * Construct a [GuestAttributes] instance from a [Guest]. - */ - constructor(guest: Guest) : this( - guest.server.uid.toString(), - Attributes.builder() - .put(ResourceAttributes.HOST_NAME, guest.server.name) - .put(ResourceAttributes.HOST_ID, guest.server.uid.toString()) - .put(ResourceAttributes.HOST_TYPE, guest.server.flavor.name) - .put(AttributeKey.longKey("host.num_cpus"), guest.server.flavor.cpuCount.toLong()) - .put(AttributeKey.longKey("host.mem_capacity"), guest.server.flavor.memorySize) - .put(AttributeKey.stringArrayKey("host.labels"), guest.server.labels.map { (k, v) -> "$k:$v" }) - .put(ResourceAttributes.HOST_ARCH, ResourceAttributes.HostArchValues.AMD64) - .put(ResourceAttributes.HOST_IMAGE_NAME, guest.server.image.name) - .put(ResourceAttributes.HOST_IMAGE_ID, guest.server.image.uid.toString()) - .build() - ) - - override fun <T : Any?> get(key: AttributeKey<T>): T? { - // Optimize access to the HOST_ID key which is accessed quite often - if (key == ResourceAttributes.HOST_ID) { - @Suppress("UNCHECKED_CAST") - return uid as T? - } - return attributes.get(key) - } - - override fun toBuilder(): AttributesBuilder { - val delegate = attributes.toBuilder() - return object : AttributesBuilder { - - override fun putAll(attributes: Attributes): AttributesBuilder { - delegate.putAll(attributes) - return this - } - - override fun <T : Any?> put(key: AttributeKey<Long>, value: Int): AttributesBuilder { - delegate.put<T>(key, value) - return this - } - - override fun <T : Any?> put(key: AttributeKey<T>, value: T): AttributesBuilder { - delegate.put(key, value) - return this - } - - override fun build(): Attributes = GuestAttributes(uid, delegate.build()) - } - } - - override fun equals(other: Any?): Boolean = attributes == other - - // Cache hash code - private val _hash = attributes.hashCode() - - override fun hashCode(): Int = _hash - } } |
