From 9236b3cfb7be1e9d44fe60cbdd699c19c70f6411 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 17 Aug 2021 19:22:34 +0200 Subject: feat(compute): Track host up/down time This change adds new metrics for tracking the up and downtime of hosts due to failures. In addition, this change adds a test to verify whether the metrics are collected correctly. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 55 ++++++++++++++++++++++ .../org/opendc/compute/simulator/SimHostTest.kt | 10 ++++ 2 files changed, 65 insertions(+) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 20e5a9db..e12bd37b 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -71,6 +71,11 @@ public class SimHost( */ override val scope: CoroutineScope = CoroutineScope(context + Job()) + /** + * The clock instance used by the host. + */ + private val clock = interpreter.clock + /** * The logger instance of this server. */ @@ -115,6 +120,8 @@ public class SimHost( _cpuDemand.record(cpuDemand) _cpuUsage.record(cpuUsage) _powerUsage.record(machine.powerDraw) + + reportTime() } } ) @@ -221,6 +228,33 @@ public class SimHost( .build() .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + /** + * The amount of time in the system. + */ + private val _totalTime = meter.counterBuilder("host.time.total") + .setDescription("The amount of time in the system") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + + /** + * The uptime of the host. + */ + private val _upTime = meter.counterBuilder("host.time.up") + .setDescription("The uptime of the host") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + + /** + * The downtime of the host. + */ + private val _downTime = meter.counterBuilder("host.time.down") + .setDescription("The downtime of the host") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + init { // Launch hypervisor onto machine scope.launch { @@ -238,6 +272,24 @@ public class SimHost( } } + private var _lastReport = clock.millis() + + private fun reportTime() { + if (!scope.isActive) + return + + val now = clock.millis() + val duration = now - _lastReport + + _totalTime.add(duration) + when (_state) { + HostState.UP -> _upTime.add(duration) + HostState.DOWN -> _downTime.add(duration) + } + + _lastReport = now + } + override fun canFit(server: Server): Boolean { val sufficientMemory = availableMemory > server.flavor.memorySize val enoughCpus = machine.model.cpus.size >= server.flavor.cpuCount @@ -291,6 +343,7 @@ public class SimHost( } override fun close() { + reportTime() scope.cancel() machine.close() } @@ -320,6 +373,7 @@ public class SimHost( } override suspend fun fail() { + reportTime() _state = HostState.DOWN for (guest in guests.values) { guest.fail() @@ -327,6 +381,7 @@ public class SimHost( } override suspend fun recover() { + reportTime() _state = HostState.UP for (guest in guests.values) { guest.start() diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 1ba3a9a1..0a2ced7b 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -189,6 +189,8 @@ internal class SimHostTest { fun testFailure() = runBlockingSimulation { var requestedWork = 0L var grantedWork = 0L + var totalTime = 0L + var downTime = 0L val meterProvider: MeterProvider = SdkMeterProvider .builder() @@ -238,6 +240,12 @@ internal class SimHostTest { metricsByName["cpu.work.granted"]?.let { grantedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() } + metricsByName["host.time.total"]?.let { + totalTime = it.longSumData.points.first().value + } + metricsByName["host.time.down"]?.let { + downTime = it.longSumData.points.first().value + } return CompletableResultCode.ofSuccess() } @@ -275,6 +283,8 @@ internal class SimHostTest { assertAll( { assertEquals(2226039, requestedWork, "Total time does not match") }, { assertEquals(1086039, grantedWork, "Down time does not match") }, + { assertEquals(1200001, totalTime, "Total time does not match") }, + { assertEquals(5000, downTime, "Down time does not match") }, ) } -- cgit v1.2.3 From d097d65851619483a85ce16ab56f61a726bbe756 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 24 Aug 2021 16:55:42 +0200 Subject: fix(compute): Support overcommitted memory in SimHost This change enables host to overcommit their memory when testing whether new servers can fit on the host. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index e12bd37b..76cc7dfe 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -86,11 +86,6 @@ public class SimHost( */ private val listeners = mutableListOf() - /** - * Current total memory use of the images on this hypervisor. - */ - private var availableMemory: Long = model.memory.sumOf { it.size } - /** * The machine to run on. */ @@ -291,7 +286,7 @@ public class SimHost( } override fun canFit(server: Server): Boolean { - val sufficientMemory = availableMemory > server.flavor.memorySize + val sufficientMemory = machine.model.memory.size >= server.flavor.memorySize val enoughCpus = machine.model.cpus.size >= server.flavor.cpuCount val canFit = hypervisor.canFit(server.flavor.toMachineModel()) @@ -469,7 +464,6 @@ public class SimHost( else ServerState.ERROR - availableMemory += server.flavor.memorySize onGuestStop(this) } } -- cgit v1.2.3 From 83d7e45a6b749df0c208e56885402c66e54c4b23 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 24 Aug 2021 17:06:35 +0200 Subject: fix(compute): Start host even if it already exists on host --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 76cc7dfe..4526537d 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -294,16 +294,12 @@ public class SimHost( } override suspend fun spawn(server: Server, start: Boolean) { - // Return if the server already exists on this host - if (server in this) { - return + val guest = guests.computeIfAbsent(server) { key -> + require(canFit(key)) { "Server does not fit" } + _guests.add(1) + Guest(key, hypervisor.createMachine(key.flavor.toMachineModel(), key.name)) } - require(canFit(server)) { "Server does not fit" } - val guest = Guest(server, hypervisor.createMachine(server.flavor.toMachineModel(), server.name)) - guests[server] = guest - _guests.add(1) - if (start) { guest.start() } -- cgit v1.2.3 From 2c507c6ca4b7d809b410d351f0c1c5c3ddf7bb5c Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 25 Aug 2021 14:38:08 +0200 Subject: feat(compute): Track guest up/down time This change updates the SimHost implementation to track the up and downtime of hypervisor guests. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 54 +++++++++++++++++++++- .../org/opendc/compute/simulator/SimHostTest.kt | 10 ++++ 2 files changed, 63 insertions(+), 1 deletion(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 4526537d..9a1f05fc 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -22,6 +22,7 @@ package org.opendc.compute.simulator +import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.api.common.Attributes import io.opentelemetry.api.metrics.Meter import io.opentelemetry.semconv.resource.attributes.ResourceAttributes @@ -59,7 +60,7 @@ public class SimHost( override val meta: Map, context: CoroutineContext, interpreter: SimResourceInterpreter, - meter: Meter, + private val meter: Meter, hypervisor: SimHypervisorProvider, scalingGovernor: ScalingGovernor = PerformanceScalingGovernor(), powerDriver: PowerDriver = SimplePowerDriver(ConstantPowerModel(0.0)), @@ -282,6 +283,11 @@ public class SimHost( HostState.DOWN -> _downTime.add(duration) } + // Track time of guests + for (guest in guests.values) { + guest.reportTime() + } + _lastReport = now } @@ -385,6 +391,33 @@ public class SimHost( private inner class Guest(val server: Server, val machine: SimMachine) { var state: ServerState = ServerState.TERMINATED + /** + * The amount of time in the system. + */ + private val _totalTime = meter.counterBuilder("guest.time.total") + .setDescription("The amount of time in the system") + .setUnit("ms") + .build() + .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + + /** + * The uptime of the guest. + */ + private val _runningTime = meter.counterBuilder("guest.time.running") + .setDescription("The uptime of the guest") + .setUnit("ms") + .build() + .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + + /** + * The time the guest is in an error state. + */ + private val _errorTime = meter.counterBuilder("guest.time.error") + .setDescription("The time the guest is in an error state") + .setUnit("ms") + .build() + .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + suspend fun start() { when (state) { ServerState.TERMINATED, ServerState.ERROR -> { @@ -462,5 +495,24 @@ public class SimHost( onGuestStop(this) } + + private var _lastReport = clock.millis() + + fun reportTime() { + if (state == ServerState.DELETED) + return + + val now = clock.millis() + val duration = now - _lastReport + + _totalTime.add(duration) + when (state) { + ServerState.RUNNING -> _runningTime.add(duration) + ServerState.ERROR -> _errorTime.add(duration) + else -> {} + } + + _lastReport = now + } } } diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 0a2ced7b..31215e9a 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -191,6 +191,8 @@ internal class SimHostTest { var grantedWork = 0L var totalTime = 0L var downTime = 0L + var guestTotalTime = 0L + var guestDownTime = 0L val meterProvider: MeterProvider = SdkMeterProvider .builder() @@ -246,6 +248,12 @@ internal class SimHostTest { metricsByName["host.time.down"]?.let { downTime = it.longSumData.points.first().value } + metricsByName["guest.time.total"]?.let { + guestTotalTime = it.longSumData.points.first().value + } + metricsByName["guest.time.error"]?.let { + guestDownTime = it.longSumData.points.first().value + } return CompletableResultCode.ofSuccess() } @@ -284,7 +292,9 @@ internal class SimHostTest { { assertEquals(2226039, requestedWork, "Total time does not match") }, { assertEquals(1086039, grantedWork, "Down time does not match") }, { assertEquals(1200001, totalTime, "Total time does not match") }, + { assertEquals(1200001, guestTotalTime, "Guest total time does not match") }, { assertEquals(5000, downTime, "Down time does not match") }, + { assertEquals(5000, guestDownTime, "Guest down time does not match") }, ) } -- cgit v1.2.3 From b6a8c642b598bfb2eaaea2a8a7e6ad6702d349b6 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 26 Aug 2021 10:29:41 +0200 Subject: fix(compute): Use correct memory size for host memory This change fixes an issue where all servers could not be scheduled due to the memory size of the host being computed incorrectly. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 9a1f05fc..a4d24740 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -292,8 +292,8 @@ public class SimHost( } override fun canFit(server: Server): Boolean { - val sufficientMemory = machine.model.memory.size >= server.flavor.memorySize - val enoughCpus = machine.model.cpus.size >= server.flavor.cpuCount + val sufficientMemory = model.memorySize >= server.flavor.memorySize + val enoughCpus = model.cpuCount >= server.flavor.cpuCount val canFit = hypervisor.canFit(server.flavor.toMachineModel()) return sufficientMemory && enoughCpus && canFit -- cgit v1.2.3 From 561f07bd6547c8273627e9c860324ab892ba5fa7 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 26 Aug 2021 10:30:53 +0200 Subject: fix(compute): Do not allow failure of inactive guests This change fixes an issue in SimHost where guests that where inactive were also failed, causing an IllegalStateException. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index a4d24740..213d20ee 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -452,6 +452,9 @@ public class SimHost( } suspend fun fail() { + if (state != ServerState.RUNNING) { + return + } stop() state = ServerState.ERROR } -- cgit v1.2.3