From 9236b3cfb7be1e9d44fe60cbdd699c19c70f6411 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 17 Aug 2021 19:22:34 +0200 Subject: feat(compute): Track host up/down time This change adds new metrics for tracking the up and downtime of hosts due to failures. In addition, this change adds a test to verify whether the metrics are collected correctly. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'opendc-compute/opendc-compute-simulator/src/main') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 20e5a9db..e12bd37b 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -71,6 +71,11 @@ public class SimHost( */ override val scope: CoroutineScope = CoroutineScope(context + Job()) + /** + * The clock instance used by the host. + */ + private val clock = interpreter.clock + /** * The logger instance of this server. */ @@ -115,6 +120,8 @@ public class SimHost( _cpuDemand.record(cpuDemand) _cpuUsage.record(cpuUsage) _powerUsage.record(machine.powerDraw) + + reportTime() } } ) @@ -221,6 +228,33 @@ public class SimHost( .build() .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + /** + * The amount of time in the system. + */ + private val _totalTime = meter.counterBuilder("host.time.total") + .setDescription("The amount of time in the system") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + + /** + * The uptime of the host. + */ + private val _upTime = meter.counterBuilder("host.time.up") + .setDescription("The uptime of the host") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + + /** + * The downtime of the host. + */ + private val _downTime = meter.counterBuilder("host.time.down") + .setDescription("The downtime of the host") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + init { // Launch hypervisor onto machine scope.launch { @@ -238,6 +272,24 @@ public class SimHost( } } + private var _lastReport = clock.millis() + + private fun reportTime() { + if (!scope.isActive) + return + + val now = clock.millis() + val duration = now - _lastReport + + _totalTime.add(duration) + when (_state) { + HostState.UP -> _upTime.add(duration) + HostState.DOWN -> _downTime.add(duration) + } + + _lastReport = now + } + override fun canFit(server: Server): Boolean { val sufficientMemory = availableMemory > server.flavor.memorySize val enoughCpus = machine.model.cpus.size >= server.flavor.cpuCount @@ -291,6 +343,7 @@ public class SimHost( } override fun close() { + reportTime() scope.cancel() machine.close() } @@ -320,6 +373,7 @@ public class SimHost( } override suspend fun fail() { + reportTime() _state = HostState.DOWN for (guest in guests.values) { guest.fail() @@ -327,6 +381,7 @@ public class SimHost( } override suspend fun recover() { + reportTime() _state = HostState.UP for (guest in guests.values) { guest.start() -- cgit v1.2.3