diff options
Diffstat (limited to 'opendc-compute')
8 files changed, 311 insertions, 132 deletions
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt index 292feabe..27a6ecae 100644 --- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt +++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt @@ -123,12 +123,9 @@ internal class ComputeServiceImpl( .setDescription("Number of scheduling attempts") .setUnit("1") .build() - private val _schedulingAttemptsSuccess = _schedulingAttempts - .bind(Attributes.of(AttributeKey.stringKey("result"), "success")) - private val _schedulingAttemptsFailure = _schedulingAttempts - .bind(Attributes.of(AttributeKey.stringKey("result"), "failure")) - private val _schedulingAttemptsError = _schedulingAttempts - .bind(Attributes.of(AttributeKey.stringKey("result"), "error")) + private val _schedulingAttemptsSuccessAttr = Attributes.of(AttributeKey.stringKey("result"), "success") + private val _schedulingAttemptsFailureAttr = Attributes.of(AttributeKey.stringKey("result"), "failure") + private val _schedulingAttemptsErrorAttr = Attributes.of(AttributeKey.stringKey("result"), "error") /** * The response time of the service. @@ -146,8 +143,8 @@ internal class ComputeServiceImpl( .setDescription("Number of servers managed by the scheduler") .setUnit("1") .build() - private val _serversPending = _servers.bind(Attributes.of(AttributeKey.stringKey("state"), "pending")) - private val _serversActive = _servers.bind(Attributes.of(AttributeKey.stringKey("state"), "active")) + private val _serversPendingAttr = Attributes.of(AttributeKey.stringKey("state"), "pending") + private val _serversActiveAttr = Attributes.of(AttributeKey.stringKey("state"), "active") /** * The [TimerScheduler] to use for scheduling the scheduler cycles. @@ -171,8 +168,8 @@ internal class ComputeServiceImpl( val total = hostCount val available = availableHosts.size.toLong() - result.observe(available, upState) - result.observe(total - available, downState) + result.record(available, upState) + result.record(total - available, downState) } meter.gaugeBuilder("system.time.provision") @@ -336,7 +333,7 @@ internal class ComputeServiceImpl( server.lastProvisioningTimestamp = now queue.add(request) - _serversPending.add(1) + _servers.add(1, _serversPendingAttr) requestSchedulingCycle() return request } @@ -384,7 +381,7 @@ internal class ComputeServiceImpl( if (request.isCancelled) { queue.poll() - _serversPending.add(-1) + _servers.add(-1, _serversPendingAttr) continue } @@ -396,8 +393,8 @@ internal class ComputeServiceImpl( if (server.flavor.memorySize > maxMemory || server.flavor.cpuCount > maxCores) { // Remove the incoming image queue.poll() - _serversPending.add(-1) - _schedulingAttemptsFailure.add(1) + _servers.add(-1, _serversPendingAttr) + _schedulingAttempts.add(1, _schedulingAttemptsFailureAttr) logger.warn { "Failed to spawn $server: does not fit [${clock.instant()}]" } @@ -412,7 +409,7 @@ internal class ComputeServiceImpl( // Remove request from queue queue.poll() - _serversPending.add(-1) + _servers.add(-1, _serversPendingAttr) _schedulingLatency.record(now - request.submitTime, server.attributes) logger.info { "Assigned server $server to host $host." } @@ -429,8 +426,8 @@ internal class ComputeServiceImpl( host.spawn(server) activeServers[server] = host - _serversActive.add(1) - _schedulingAttemptsSuccess.add(1) + _servers.add(1, _serversActiveAttr) + _schedulingAttempts.add(1, _schedulingAttemptsSuccessAttr) } catch (e: Throwable) { logger.error(e) { "Failed to deploy VM" } @@ -438,7 +435,7 @@ internal class ComputeServiceImpl( hv.provisionedCores -= server.flavor.cpuCount hv.availableMemory += server.flavor.memorySize - _schedulingAttemptsError.add(1) + _schedulingAttempts.add(1, _schedulingAttemptsErrorAttr) } } } @@ -494,7 +491,7 @@ internal class ComputeServiceImpl( logger.info { "[${clock.instant()}] Server ${server.uid} ${server.name} ${server.flavor} finished." } if (activeServers.remove(server) != null) { - _serversActive.add(-1) + _servers.add(-1, _serversActiveAttr) } val hv = hostToView[host] @@ -516,7 +513,7 @@ internal class ComputeServiceImpl( */ private fun collectProvisionTime(result: ObservableLongMeasurement) { for ((_, server) in servers) { - result.observe(server.lastProvisioningTimestamp, server.attributes) + result.record(server.lastProvisioningTimestamp, server.attributes) } } } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 908a58e9..95921e8b 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -150,15 +150,15 @@ public class SimHost( meter.gaugeBuilder("system.cpu.demand") .setDescription("Amount of CPU resources the guests would use if there were no CPU contention or CPU limits") .setUnit("MHz") - .buildWithCallback { result -> result.observe(hypervisor.cpuDemand) } + .buildWithCallback { result -> result.record(hypervisor.cpuDemand) } meter.gaugeBuilder("system.cpu.usage") .setDescription("Amount of CPU resources used by the host") .setUnit("MHz") - .buildWithCallback { result -> result.observe(hypervisor.cpuUsage) } + .buildWithCallback { result -> result.record(hypervisor.cpuUsage) } meter.gaugeBuilder("system.cpu.utilization") .setDescription("Utilization of the CPU resources of the host") .setUnit("%") - .buildWithCallback { result -> result.observe(hypervisor.cpuUsage / _cpuLimit) } + .buildWithCallback { result -> result.record(hypervisor.cpuUsage / _cpuLimit) } meter.counterBuilder("system.cpu.time") .setDescription("Amount of CPU time spent by the host") .setUnit("s") @@ -166,12 +166,12 @@ public class SimHost( meter.gaugeBuilder("system.power.usage") .setDescription("Power usage of the host ") .setUnit("W") - .buildWithCallback { result -> result.observe(machine.powerUsage) } + .buildWithCallback { result -> result.record(machine.powerUsage) } meter.counterBuilder("system.power.total") .setDescription("Amount of energy used by the CPU") .setUnit("J") .ofDoubles() - .buildWithCallback { result -> result.observe(machine.energyUsage) } + .buildWithCallback { result -> result.record(machine.energyUsage) } meter.counterBuilder("system.time") .setDescription("The uptime of the host") .setUnit("s") @@ -382,10 +382,10 @@ public class SimHost( } } - result.observe(terminated, terminatedState) - result.observe(running, runningState) - result.observe(error, errorState) - result.observe(invalid, invalidState) + result.record(terminated, terminatedState) + result.record(running, runningState) + result.record(error, errorState) + result.record(invalid, invalidState) } private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } @@ -394,7 +394,7 @@ public class SimHost( * Helper function to collect the CPU limits of a machine. */ private fun collectCpuLimit(result: ObservableDoubleMeasurement) { - result.observe(_cpuLimit) + result.record(_cpuLimit) val guests = _guests for (i in guests.indices) { @@ -413,10 +413,10 @@ public class SimHost( private fun collectCpuTime(result: ObservableLongMeasurement) { val counters = hypervisor.counters - result.observe(counters.cpuActiveTime / 1000L, _activeState) - result.observe(counters.cpuIdleTime / 1000L, _idleState) - result.observe(counters.cpuStealTime / 1000L, _stealState) - result.observe(counters.cpuLostTime / 1000L, _lostState) + result.record(counters.cpuActiveTime / 1000L, _activeState) + result.record(counters.cpuIdleTime / 1000L, _idleState) + result.record(counters.cpuStealTime / 1000L, _stealState) + result.record(counters.cpuLostTime / 1000L, _lostState) val guests = _guests for (i in guests.indices) { @@ -458,8 +458,8 @@ public class SimHost( private fun collectUptime(result: ObservableLongMeasurement) { updateUptime() - result.observe(_uptime, _upState) - result.observe(_downtime, _downState) + result.record(_uptime, _upState) + result.record(_downtime, _downState) val guests = _guests for (i in guests.indices) { @@ -474,7 +474,7 @@ public class SimHost( */ private fun collectBootTime(result: ObservableLongMeasurement) { if (_bootTime != Long.MIN_VALUE) { - result.observe(_bootTime) + result.record(_bootTime) } val guests = _guests diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt index 9f3122db..f49c2824 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -239,8 +239,8 @@ internal class Guest( * Helper function to track the uptime of the guest. */ fun collectUptime(result: ObservableLongMeasurement) { - result.observe(_uptime, _upState) - result.observe(_downtime, _downState) + result.record(_uptime, _upState) + result.record(_downtime, _downState) } private var _bootTime = Long.MIN_VALUE @@ -250,7 +250,7 @@ internal class Guest( */ fun collectBootTime(result: ObservableLongMeasurement) { if (_bootTime != Long.MIN_VALUE) { - result.observe(_bootTime, attributes) + result.record(_bootTime, attributes) } } @@ -273,10 +273,10 @@ internal class Guest( fun collectCpuTime(result: ObservableLongMeasurement) { val counters = machine.counters - result.observe(counters.cpuActiveTime / 1000, _activeState) - result.observe(counters.cpuIdleTime / 1000, _idleState) - result.observe(counters.cpuStealTime / 1000, _stealState) - result.observe(counters.cpuLostTime / 1000, _lostState) + result.record(counters.cpuActiveTime / 1000, _activeState) + result.record(counters.cpuIdleTime / 1000, _idleState) + result.record(counters.cpuStealTime / 1000, _stealState) + result.record(counters.cpuLostTime / 1000, _lostState) } private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } @@ -285,7 +285,7 @@ internal class Guest( * Helper function to collect the CPU limits of a machine. */ fun collectCpuLimit(result: ObservableDoubleMeasurement) { - result.observe(_cpuLimit, attributes) + result.record(_cpuLimit, attributes) } /** diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 799a8cf0..dd13b60c 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -22,9 +22,7 @@ package org.opendc.compute.simulator -import io.opentelemetry.api.metrics.MeterProvider import io.opentelemetry.sdk.metrics.SdkMeterProvider -import io.opentelemetry.sdk.metrics.export.MetricProducer import io.opentelemetry.sdk.resources.Resource import kotlinx.coroutines.* import org.junit.jupiter.api.Assertions.assertEquals @@ -83,10 +81,26 @@ internal class SimHostTest { val hostResource = Resource.builder() .put(HOST_ID, hostId.toString()) .build() - val meterProvider: MeterProvider = SdkMeterProvider + + // Setup metric reader + val duration = 5 * 60L + val reader = CoroutineMetricReader( + this, + object : ComputeMetricExporter() { + override fun record(reader: HostTableReader) { + activeTime += reader.cpuActiveTime + idleTime += reader.cpuIdleTime + stealTime += reader.cpuStealTime + } + }, + exportInterval = Duration.ofSeconds(duration) + ) + + val meterProvider = SdkMeterProvider .builder() .setResource(hostResource) .setClock(clock.toOtelClock()) + .registerMetricReader(reader) .build() val engine = FlowEngine(coroutineContext, clock) @@ -100,7 +114,6 @@ internal class SimHostTest { meterProvider, SimFairShareHypervisorProvider() ) - val duration = 5 * 60L val vmImageA = MockImage( UUID.randomUUID(), "<unnamed>", @@ -136,19 +149,6 @@ internal class SimHostTest { val flavor = MockFlavor(2, 0) - // Setup metric reader - val reader = CoroutineMetricReader( - this, listOf(meterProvider as MetricProducer), - object : ComputeMetricExporter() { - override fun record(reader: HostTableReader) { - activeTime += reader.cpuActiveTime - idleTime += reader.cpuIdleTime - stealTime += reader.cpuStealTime - } - }, - exportInterval = Duration.ofSeconds(duration) - ) - coroutineScope { launch { virtDriver.spawn(MockServer(UUID.randomUUID(), "a", flavor, vmImageA)) } launch { virtDriver.spawn(MockServer(UUID.randomUUID(), "b", flavor, vmImageB)) } @@ -169,7 +169,7 @@ internal class SimHostTest { // Ensure last cycle is collected delay(1000L * duration) virtDriver.close() - reader.close() + meterProvider.close() assertAll( { assertEquals(658, activeTime, "Active time does not match") }, @@ -195,10 +195,32 @@ internal class SimHostTest { val hostResource = Resource.builder() .put(HOST_ID, hostId.toString()) .build() - val meterProvider: MeterProvider = SdkMeterProvider + + // Setup metric reader + val duration = 5 * 60L + val reader = CoroutineMetricReader( + this, + object : ComputeMetricExporter() { + override fun record(reader: HostTableReader) { + activeTime += reader.cpuActiveTime + idleTime += reader.cpuIdleTime + uptime += reader.uptime + downtime += reader.downtime + } + + override fun record(reader: ServerTableReader) { + guestUptime += reader.uptime + guestDowntime += reader.downtime + } + }, + exportInterval = Duration.ofSeconds(duration) + ) + + val meterProvider = SdkMeterProvider .builder() .setResource(hostResource) .setClock(clock.toOtelClock()) + .registerMetricReader(reader) .build() val engine = FlowEngine(coroutineContext, clock) @@ -212,7 +234,6 @@ internal class SimHostTest { meterProvider, SimFairShareHypervisorProvider() ) - val duration = 5 * 60L val image = MockImage( UUID.randomUUID(), "<unnamed>", @@ -232,25 +253,6 @@ internal class SimHostTest { val flavor = MockFlavor(2, 0) val server = MockServer(UUID.randomUUID(), "a", flavor, image) - // Setup metric reader - val reader = CoroutineMetricReader( - this, listOf(meterProvider as MetricProducer), - object : ComputeMetricExporter() { - override fun record(reader: HostTableReader) { - activeTime += reader.cpuActiveTime - idleTime += reader.cpuIdleTime - uptime += reader.uptime - downtime += reader.downtime - } - - override fun record(reader: ServerTableReader) { - guestUptime += reader.uptime - guestDowntime += reader.downtime - } - }, - exportInterval = Duration.ofSeconds(duration) - ) - coroutineScope { host.spawn(server) delay(5000L) @@ -273,7 +275,7 @@ internal class SimHostTest { // Ensure last cycle is collected delay(1000L * duration) - reader.close() + meterProvider.close() assertAll( { assertEquals(1175, idleTime, "Idle time does not match") }, diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt index 59203b66..a1a65da3 100644 --- a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt +++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt @@ -22,10 +22,6 @@ package org.opendc.compute.workload -import io.opentelemetry.sdk.metrics.SdkMeterProvider -import io.opentelemetry.sdk.metrics.export.MetricProducer -import io.opentelemetry.sdk.resources.Resource -import io.opentelemetry.semconv.resource.attributes.ResourceAttributes import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.delay import kotlinx.coroutines.launch @@ -33,12 +29,11 @@ import kotlinx.coroutines.yield import org.opendc.compute.service.ComputeService import org.opendc.compute.service.scheduler.ComputeScheduler import org.opendc.compute.simulator.SimHost +import org.opendc.compute.workload.telemetry.TelemetryManager import org.opendc.compute.workload.topology.HostSpec import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel import org.opendc.simulator.compute.workload.SimTraceWorkload import org.opendc.simulator.flow.FlowEngine -import org.opendc.telemetry.compute.* -import org.opendc.telemetry.sdk.toOtelClock import java.time.Clock import java.time.Duration import java.util.* @@ -50,6 +45,7 @@ import kotlin.math.max * * @param context [CoroutineContext] to run the simulation in. * @param clock [Clock] instance tracking simulation time. + * @param telemetry Helper class for managing telemetry. * @param scheduler [ComputeScheduler] implementation to use for the service. * @param failureModel A failure model to use for injecting failures. * @param interferenceModel The model to use for performance interference. @@ -58,6 +54,7 @@ import kotlin.math.max public class ComputeServiceHelper( private val context: CoroutineContext, private val clock: Clock, + private val telemetry: TelemetryManager, scheduler: ComputeScheduler, private val failureModel: FailureModel? = null, private val interferenceModel: VmInterferenceModel? = null, @@ -69,25 +66,17 @@ public class ComputeServiceHelper( public val service: ComputeService /** - * The [MetricProducer] that are used by the [ComputeService] and the simulated hosts. - */ - public val producers: List<MetricProducer> - get() = _metricProducers - private val _metricProducers = mutableListOf<MetricProducer>() - - /** * The [FlowEngine] to simulate the hosts. */ - private val engine = FlowEngine(context, clock) + private val _engine = FlowEngine(context, clock) /** * The hosts that belong to this class. */ - private val hosts = mutableSetOf<SimHost>() + private val _hosts = mutableSetOf<SimHost>() init { - val (service, serviceMeterProvider) = createService(scheduler, schedulingQuantum) - this._metricProducers.add(serviceMeterProvider) + val service = createService(scheduler, schedulingQuantum) this.service = service } @@ -165,27 +154,14 @@ public class ComputeServiceHelper( * @return The [SimHost] that has been constructed by the runner. */ public fun registerHost(spec: HostSpec, optimize: Boolean = false): SimHost { - val resource = Resource.builder() - .put(HOST_ID, spec.uid.toString()) - .put(HOST_NAME, spec.name) - .put(HOST_ARCH, ResourceAttributes.HostArchValues.AMD64) - .put(HOST_NCPUS, spec.model.cpus.size) - .put(HOST_MEM_CAPACITY, spec.model.memory.sumOf { it.size }) - .build() - - val meterProvider = SdkMeterProvider.builder() - .setClock(clock.toOtelClock()) - .setResource(resource) - .build() - _metricProducers.add(meterProvider) - + val meterProvider = telemetry.createMeterProvider(spec) val host = SimHost( spec.uid, spec.name, spec.model, spec.meta, context, - engine, + _engine, meterProvider, spec.hypervisor, powerDriver = spec.powerDriver, @@ -193,7 +169,7 @@ public class ComputeServiceHelper( optimize = optimize ) - hosts.add(host) + _hosts.add(host) service.addHost(host) return host @@ -202,27 +178,18 @@ public class ComputeServiceHelper( override fun close() { service.close() - for (host in hosts) { + for (host in _hosts) { host.close() } - hosts.clear() + _hosts.clear() } /** * Construct a [ComputeService] instance. */ - private fun createService(scheduler: ComputeScheduler, schedulingQuantum: Duration): Pair<ComputeService, SdkMeterProvider> { - val resource = Resource.builder() - .put(ResourceAttributes.SERVICE_NAME, "opendc-compute") - .build() - - val meterProvider = SdkMeterProvider.builder() - .setClock(clock.toOtelClock()) - .setResource(resource) - .build() - - val service = ComputeService(context, clock, meterProvider, scheduler, schedulingQuantum) - return service to meterProvider + private fun createService(scheduler: ComputeScheduler, schedulingQuantum: Duration): ComputeService { + val meterProvider = telemetry.createMeterProvider(scheduler) + return ComputeService(context, clock, meterProvider, scheduler, schedulingQuantum) } } diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/NoopTelemetryManager.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/NoopTelemetryManager.kt new file mode 100644 index 00000000..4e7d0b75 --- /dev/null +++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/NoopTelemetryManager.kt @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.workload.telemetry + +import io.opentelemetry.api.metrics.MeterProvider +import org.opendc.compute.service.scheduler.ComputeScheduler +import org.opendc.compute.workload.topology.HostSpec + +/** + * A [TelemetryManager] that does nothing. + */ +public class NoopTelemetryManager : TelemetryManager { + override fun createMeterProvider(host: HostSpec): MeterProvider = MeterProvider.noop() + + override fun createMeterProvider(scheduler: ComputeScheduler): MeterProvider = MeterProvider.noop() +} diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/SdkTelemetryManager.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/SdkTelemetryManager.kt new file mode 100644 index 00000000..478c0609 --- /dev/null +++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/SdkTelemetryManager.kt @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.workload.telemetry + +import io.opentelemetry.api.metrics.MeterProvider +import io.opentelemetry.sdk.common.CompletableResultCode +import io.opentelemetry.sdk.metrics.SdkMeterProvider +import io.opentelemetry.sdk.metrics.data.AggregationTemporality +import io.opentelemetry.sdk.metrics.data.MetricData +import io.opentelemetry.sdk.metrics.export.MetricProducer +import io.opentelemetry.sdk.metrics.export.MetricReader +import io.opentelemetry.sdk.metrics.export.MetricReaderFactory +import io.opentelemetry.sdk.resources.Resource +import io.opentelemetry.semconv.resource.attributes.ResourceAttributes +import org.opendc.compute.service.scheduler.ComputeScheduler +import org.opendc.compute.workload.topology.HostSpec +import org.opendc.telemetry.compute.* +import org.opendc.telemetry.sdk.toOtelClock +import java.time.Clock + +/** + * A [TelemetryManager] using the OpenTelemetry Java SDK. + */ +public class SdkTelemetryManager(private val clock: Clock) : TelemetryManager, AutoCloseable { + /** + * The [SdkMeterProvider]s that belong to the workload runner. + */ + private val _meterProviders = mutableListOf<SdkMeterProvider>() + + /** + * The internal [MetricProducer] registered with the runner. + */ + private val _metricProducers = mutableListOf<MetricProducer>() + + /** + * The list of [MetricReader]s that have been registered with the runner. + */ + private val _metricReaders = mutableListOf<MetricReader>() + + /** + * A [MetricProducer] that combines all the other metric producers. + */ + public val metricProducer: MetricProducer = object : MetricProducer { + private val producers = _metricProducers + + override fun collectAllMetrics(): Collection<MetricData> = producers.flatMap(MetricProducer::collectAllMetrics) + + override fun toString(): String = "SdkTelemetryManager.AggregateMetricProducer" + } + + /** + * Register a [MetricReader] for this manager. + * + * @param factory The factory for the reader to register. + */ + public fun registerMetricReader(factory: MetricReaderFactory) { + val reader = factory.apply(metricProducer) + _metricReaders.add(reader) + } + + override fun createMeterProvider(scheduler: ComputeScheduler): MeterProvider { + val resource = Resource.builder() + .put(ResourceAttributes.SERVICE_NAME, "opendc-compute") + .build() + + return createMeterProvider(resource) + } + + override fun createMeterProvider(host: HostSpec): MeterProvider { + val resource = Resource.builder() + .put(HOST_ID, host.uid.toString()) + .put(HOST_NAME, host.name) + .put(HOST_ARCH, ResourceAttributes.HostArchValues.AMD64) + .put(HOST_NCPUS, host.model.cpus.size) + .put(HOST_MEM_CAPACITY, host.model.memory.sumOf { it.size }) + .build() + + return createMeterProvider(resource) + } + + /** + * Construct a [SdkMeterProvider] for the specified [resource]. + */ + private fun createMeterProvider(resource: Resource): SdkMeterProvider { + val meterProvider = SdkMeterProvider.builder() + .setClock(clock.toOtelClock()) + .setResource(resource) + .registerMetricReader { producer -> + _metricProducers.add(producer) + object : MetricReader { + override fun getPreferredTemporality(): AggregationTemporality = AggregationTemporality.CUMULATIVE + override fun flush(): CompletableResultCode = CompletableResultCode.ofSuccess() + override fun shutdown(): CompletableResultCode = CompletableResultCode.ofSuccess() + } + } + .build() + _meterProviders.add(meterProvider) + return meterProvider + } + + override fun close() { + for (meterProvider in _meterProviders) { + meterProvider.close() + } + + _meterProviders.clear() + + for (metricReader in _metricReaders) { + metricReader.shutdown() + } + + _metricReaders.clear() + _metricProducers.clear() + } +} diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/TelemetryManager.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/TelemetryManager.kt new file mode 100644 index 00000000..b67050ce --- /dev/null +++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/telemetry/TelemetryManager.kt @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.workload.telemetry + +import io.opentelemetry.api.metrics.MeterProvider +import org.opendc.compute.service.scheduler.ComputeScheduler +import org.opendc.compute.workload.topology.HostSpec + +/** + * Helper class to manage the telemetry for a [ComputeServiceHelper] instance. + */ +public interface TelemetryManager { + /** + * Construct a [MeterProvider] for the specified [ComputeScheduler]. + */ + public fun createMeterProvider(scheduler: ComputeScheduler): MeterProvider + + /** + * Construct a [MeterProvider] for the specified [HostSpec]. + */ + public fun createMeterProvider(host: HostSpec): MeterProvider +} |
