diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-10-28 16:46:47 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-10-28 16:46:47 +0200 |
| commit | b96acc687f59b698fbc4d4c984d77b008cd4051b (patch) | |
| tree | ff4d99454259b92ec4484433bb716acd6319faa1 /opendc-compute/opendc-compute-simulator/src | |
| parent | c4cfb6f6e0507f335fd88935857f20e88c34abd0 (diff) | |
| parent | 8bf940eb7b59b5e5e326cfc06d51bdb54393f33b (diff) | |
Support custom start-up and clean-up time for VMs (#112)
This pull request implements customizable startup and clean-up time for virtual machine.
We achieve this by implementing `SimWorkload` chaining and modelling start-up and
clean-up time using `SimWorkload` as well.
Implements #33
## Implementation Notes :hammer_and_pick:
* Store method parameters in class files
* Add completion parameter to startWorkload
* Provide workload constructors in SimWorkloads
* Add support for resetting machine context
* Add support for chaining workloads
* Use workload chaining for boot delay
* Model host boot time
* Do not suspend on guest start
* Use static logger field
## Breaking API Changes :warning:
* `SimMachine#startWorkload` now has a third parameter `completion` which is invoked when
the workload finishes executing (either due to failure or success).
* `SimFlopsWorkload` and `SimRuntimeWorkload` can be instantiated via `SimWorkloads`.
Diffstat (limited to 'opendc-compute/opendc-compute-simulator/src')
5 files changed, 209 insertions, 119 deletions
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index c07649bd..b3e56f38 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -22,7 +22,6 @@ package org.opendc.compute.simulator -import kotlinx.coroutines.yield import org.opendc.compute.api.Flavor import org.opendc.compute.api.Server import org.opendc.compute.api.ServerState @@ -34,6 +33,7 @@ import org.opendc.compute.service.driver.telemetry.GuestCpuStats import org.opendc.compute.service.driver.telemetry.GuestSystemStats import org.opendc.compute.service.driver.telemetry.HostCpuStats import org.opendc.compute.service.driver.telemetry.HostSystemStats +import org.opendc.compute.simulator.internal.DefaultWorkloadMapper import org.opendc.compute.simulator.internal.Guest import org.opendc.compute.simulator.internal.GuestListener import org.opendc.simulator.compute.SimBareMetalMachine @@ -44,30 +44,37 @@ import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit import org.opendc.simulator.compute.workload.SimWorkload -import org.opendc.simulator.flow2.FlowGraph +import org.opendc.simulator.compute.workload.SimWorkloads +import java.time.Clock import java.time.Duration import java.time.Instant import java.util.UUID -import kotlin.coroutines.CoroutineContext +import java.util.function.Supplier /** - * A [Host] that is simulates virtual machines on a physical machine using [SimHypervisor]. + * A [Host] implementation that simulates virtual machines on a physical machine using [SimHypervisor]. + * + * @param uid The unique identifier of the host. + * @param name The name of the host. + * @param meta The metadata of the host. + * @param clock The (virtual) clock used to track time. + * @param machine The [SimBareMetalMachine] on which the host runs. + * @param hypervisor The [SimHypervisor] to run on top of the machine. + * @param mapper A [SimWorkloadMapper] to map a [Server] to a [SimWorkload]. + * @param bootModel A [Supplier] providing the [SimWorkload] to execute during the boot procedure of the hypervisor. + * @param optimize A flag to indicate to optimize the machine models of the virtual machines. */ public class SimHost( override val uid: UUID, override val name: String, override val meta: Map<String, Any>, - private val context: CoroutineContext, - graph: FlowGraph, + private val clock: Clock, private val machine: SimBareMetalMachine, private val hypervisor: SimHypervisor, - private val mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), + private val mapper: SimWorkloadMapper = DefaultWorkloadMapper, + private val bootModel: Supplier<SimWorkload?> = Supplier { null }, private val optimize: Boolean = false ) : Host, AutoCloseable { - /** - * The clock instance used by the host. - */ - private val clock = graph.engine.clock /** * The event listeners registered with this host. @@ -124,13 +131,12 @@ public class SimHost( return sufficientMemory && enoughCpus && canFit } - override suspend fun spawn(server: Server, start: Boolean) { - val guest = guests.computeIfAbsent(server) { key -> + override fun spawn(server: Server) { + guests.computeIfAbsent(server) { key -> require(canFit(key)) { "Server does not fit" } val machine = hypervisor.newMachine(key.flavor.toMachineModel()) val newGuest = Guest( - context, clock, this, hypervisor, @@ -143,27 +149,23 @@ public class SimHost( _guests.add(newGuest) newGuest } - - if (start) { - guest.start() - } } override fun contains(server: Server): Boolean { return server in guests } - override suspend fun start(server: Server) { + override fun start(server: Server) { val guest = requireNotNull(guests[server]) { "Unknown server ${server.uid} at host $uid" } guest.start() } - override suspend fun stop(server: Server) { + override fun stop(server: Server) { val guest = requireNotNull(guests[server]) { "Unknown server ${server.uid} at host $uid" } guest.stop() } - override suspend fun delete(server: Server) { + override fun delete(server: Server) { val guest = guests[server] ?: return guest.delete() } @@ -251,7 +253,7 @@ public class SimHost( override fun toString(): String = "SimHost[uid=$uid,name=$name,model=$model]" - public suspend fun fail() { + public fun fail() { reset(HostState.ERROR) for (guest in _guests) { @@ -259,55 +261,54 @@ public class SimHost( } } - public suspend fun recover() { + public fun recover() { updateUptime() launch() - - // Wait for the hypervisor to launch before recovering the guests - yield() - - for (guest in _guests) { - guest.recover() - } } /** - * The [Job] that represents the machine running the hypervisor. + * The [SimMachineContext] that represents the machine running the hypervisor. */ - private var _ctx: SimMachineContext? = null + private var ctx: SimMachineContext? = null /** * Launch the hypervisor. */ private fun launch() { - check(_ctx == null) { "Concurrent hypervisor running" } - - // Launch hypervisor onto machine - _ctx = machine.startWorkload( - object : SimWorkload { - override fun onStart(ctx: SimMachineContext) { - try { - _bootTime = clock.instant() - _state = HostState.UP - hypervisor.onStart(ctx) - } catch (cause: Throwable) { - _state = HostState.ERROR - _ctx = null - throw cause + check(ctx == null) { "Concurrent hypervisor running" } + + val bootWorkload = bootModel.get() + val hypervisor = hypervisor + val hypervisorWorkload = object : SimWorkload { + override fun onStart(ctx: SimMachineContext) { + try { + _bootTime = clock.instant() + _state = HostState.UP + hypervisor.onStart(ctx) + + // Recover the guests that were running on the hypervisor. + for (guest in _guests) { + guest.recover() } + } catch (cause: Throwable) { + _state = HostState.ERROR + throw cause } + } - override fun onStop(ctx: SimMachineContext) { - try { - hypervisor.onStop(ctx) - } finally { - _ctx = null - } - } - }, - emptyMap() - ) + override fun onStop(ctx: SimMachineContext) { + hypervisor.onStop(ctx) + } + } + + val workload = if (bootWorkload != null) SimWorkloads.chain(bootWorkload, hypervisorWorkload) else hypervisorWorkload + + // Launch hypervisor onto machine + ctx = machine.startWorkload(workload, emptyMap()) { cause -> + _state = if (cause != null) HostState.ERROR else HostState.DOWN + ctx = null + } } /** @@ -317,7 +318,7 @@ public class SimHost( updateUptime() // Stop the hypervisor - _ctx?.shutdown() + ctx?.shutdown() _state = state } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimWorkloadMapper.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimWorkloadMapper.kt index 7082c5cf..83baa61a 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimWorkloadMapper.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimWorkloadMapper.kt @@ -22,6 +22,7 @@ package org.opendc.compute.simulator +import org.opendc.compute.api.Image import org.opendc.compute.api.Server import org.opendc.simulator.compute.workload.SimWorkload diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/DefaultWorkloadMapper.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/DefaultWorkloadMapper.kt new file mode 100644 index 00000000..c5293a8d --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/DefaultWorkloadMapper.kt @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.internal + +import org.opendc.compute.api.Server +import org.opendc.compute.simulator.SimMetaWorkloadMapper +import org.opendc.compute.simulator.SimWorkloadMapper +import org.opendc.simulator.compute.workload.SimWorkload +import org.opendc.simulator.compute.workload.SimWorkloads +import java.time.Duration + +/** + * A [SimWorkloadMapper] to introduces a boot delay of 1 ms. This object exists to retain the old behavior while + * introducing the possibility of adding custom boot delays. + */ +internal object DefaultWorkloadMapper : SimWorkloadMapper { + private val delegate = SimMetaWorkloadMapper() + + override fun createWorkload(server: Server): SimWorkload { + val workload = delegate.createWorkload(server) + val bootWorkload = SimWorkloads.runtime(Duration.ofMillis(1), 0.8) + return SimWorkloads.chain(bootWorkload, workload) + } +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt index 790d8047..ca947625 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -22,12 +22,6 @@ package org.opendc.compute.simulator.internal -import kotlinx.coroutines.CancellationException -import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Job -import kotlinx.coroutines.cancel -import kotlinx.coroutines.delay -import kotlinx.coroutines.launch import mu.KotlinLogging import org.opendc.compute.api.Server import org.opendc.compute.api.ServerState @@ -35,20 +29,17 @@ import org.opendc.compute.service.driver.telemetry.GuestCpuStats import org.opendc.compute.service.driver.telemetry.GuestSystemStats import org.opendc.compute.simulator.SimHost import org.opendc.compute.simulator.SimWorkloadMapper +import org.opendc.simulator.compute.SimMachineContext import org.opendc.simulator.compute.kernel.SimHypervisor import org.opendc.simulator.compute.kernel.SimVirtualMachine -import org.opendc.simulator.compute.runWorkload -import org.opendc.simulator.compute.workload.SimWorkload import java.time.Clock import java.time.Duration import java.time.Instant -import kotlin.coroutines.CoroutineContext /** * A virtual machine instance that is managed by a [SimHost]. */ internal class Guest( - context: CoroutineContext, private val clock: Clock, val host: SimHost, private val hypervisor: SimHypervisor, @@ -58,35 +49,26 @@ internal class Guest( val machine: SimVirtualMachine ) { /** - * The [CoroutineScope] of the guest. - */ - private val scope: CoroutineScope = CoroutineScope(context + Job()) - - /** - * The logger instance of this guest. - */ - private val logger = KotlinLogging.logger {} - - /** * The state of the [Guest]. * * [ServerState.PROVISIONING] is an invalid value for a guest, since it applies before the host is selected for * a server. */ var state: ServerState = ServerState.TERMINATED + private set /** * Start the guest. */ - suspend fun start() { + fun start() { when (state) { ServerState.TERMINATED, ServerState.ERROR -> { - logger.info { "User requested to start server ${server.uid}" } + LOGGER.info { "User requested to start server ${server.uid}" } doStart() } ServerState.RUNNING -> return ServerState.DELETED -> { - logger.warn { "User tried to start deleted server" } + LOGGER.warn { "User tried to start deleted server" } throw IllegalArgumentException("Server is deleted") } else -> assert(false) { "Invalid state transition" } @@ -96,7 +78,7 @@ internal class Guest( /** * Stop the guest. */ - suspend fun stop() { + fun stop() { when (state) { ServerState.RUNNING -> doStop(ServerState.TERMINATED) ServerState.ERROR -> doRecover() @@ -111,12 +93,11 @@ internal class Guest( * This operation will stop the guest if it is running on the host and remove all resources associated with the * guest. */ - suspend fun delete() { + fun delete() { stop() state = ServerState.DELETED hypervisor.removeMachine(machine) - scope.cancel() } /** @@ -124,7 +105,7 @@ internal class Guest( * * This operation forcibly stops the guest and puts the server into an error state. */ - suspend fun fail() { + fun fail() { if (state != ServerState.RUNNING) { return } @@ -135,7 +116,7 @@ internal class Guest( /** * Recover the guest if it is in an error state. */ - suspend fun recover() { + fun recover() { if (state != ServerState.ERROR) { return } @@ -175,37 +156,34 @@ internal class Guest( } /** - * The [Job] representing the current active virtual machine instance or `null` if no virtual machine is active. + * The [SimMachineContext] representing the current active virtual machine instance or `null` if no virtual machine + * is active. */ - private var job: Job? = null + private var ctx: SimMachineContext? = null /** * Launch the guest on the simulated */ - private suspend fun doStart() { - assert(job == null) { "Concurrent job running" } - val workload = mapper.createWorkload(server) - - val job = scope.launch { runMachine(workload) } - this.job = job + private fun doStart() { + assert(ctx == null) { "Concurrent job running" } - state = ServerState.RUNNING onStart() - job.invokeOnCompletion { cause -> - this.job = null - onStop(if (cause != null && cause !is CancellationException) ServerState.ERROR else ServerState.TERMINATED) + val workload = mapper.createWorkload(server) + val meta = mapOf("driver" to host, "server" to server) + server.meta + ctx = machine.startWorkload(workload, meta) { cause -> + onStop(if (cause != null) ServerState.ERROR else ServerState.TERMINATED) + ctx = null } } /** * Attempt to stop the server and put it into [target] state. */ - private suspend fun doStop(target: ServerState) { - assert(job != null) { "Invalid job state" } - val job = job ?: return - job.cancel() - job.join() + private fun doStop(target: ServerState) { + assert(ctx != null) { "Invalid job state" } + val ctx = ctx ?: return + ctx.shutdown() state = target } @@ -218,14 +196,6 @@ internal class Guest( } /** - * Converge the process that models the virtual machine lifecycle as a coroutine. - */ - private suspend fun runMachine(workload: SimWorkload) { - delay(1) // TODO Introduce model for boot time - machine.runWorkload(workload, mapOf("driver" to host, "server" to server) + server.meta) - } - - /** * This method is invoked when the guest was started on the host and has booted into a running state. */ private fun onStart() { @@ -264,4 +234,9 @@ internal class Guest( _downtime += duration } } + + private companion object { + @JvmStatic + private val LOGGER = KotlinLogging.logger {} + } } diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index a5999bcd..fc581d3e 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -24,7 +24,6 @@ package org.opendc.compute.simulator import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.delay -import kotlinx.coroutines.launch import kotlinx.coroutines.suspendCancellableCoroutine import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.BeforeEach @@ -70,6 +69,73 @@ internal class SimHostTest { } /** + * Test a single virtual machine hosted by the hypervisor. + */ + @Test + fun testSingle() = runSimulation { + val duration = 5 * 60L + + val engine = FlowEngine.create(coroutineContext, clock) + val graph = engine.newGraph() + + val machine = SimBareMetalMachine.create(graph, machineModel) + val hypervisor = SimHypervisor.create(FlowMultiplexerFactory.maxMinMultiplexer(), SplittableRandom(1)) + + val host = SimHost( + uid = UUID.randomUUID(), + name = "test", + meta = emptyMap(), + clock, + machine, + hypervisor + ) + val vmImage = MockImage( + UUID.randomUUID(), + "<unnamed>", + emptyMap(), + mapOf( + "workload" to + SimTrace.ofFragments( + SimTraceFragment(0, duration * 1000, 2 * 28.0, 2), + SimTraceFragment(duration * 1000, duration * 1000, 2 * 3500.0, 2), + SimTraceFragment(duration * 2000, duration * 1000, 0.0, 2), + SimTraceFragment(duration * 3000, duration * 1000, 2 * 183.0, 2) + ).createWorkload(1) + ) + ) + + val flavor = MockFlavor(2, 0) + + suspendCancellableCoroutine { cont -> + host.addListener(object : HostListener { + private var finished = 0 + + override fun onStateChanged(host: Host, server: Server, newState: ServerState) { + if (newState == ServerState.TERMINATED && ++finished == 1) { + cont.resume(Unit) + } + } + }) + val server = MockServer(UUID.randomUUID(), "a", flavor, vmImage) + host.spawn(server) + host.start(server) + } + + // Ensure last cycle is collected + delay(1000L * duration) + host.close() + + val cpuStats = host.getCpuStats() + + assertAll( + { assertEquals(639, cpuStats.activeTime, "Active time does not match") }, + { assertEquals(2360, cpuStats.idleTime, "Idle time does not match") }, + { assertEquals(56, cpuStats.stealTime, "Steal time does not match") }, + { assertEquals(1500001, clock.millis()) } + ) + } + + /** * Test overcommitting of resources by the hypervisor. */ @Test @@ -86,8 +152,7 @@ internal class SimHostTest { uid = UUID.randomUUID(), name = "test", meta = emptyMap(), - coroutineContext, - graph, + clock, machine, hypervisor ) @@ -123,9 +188,6 @@ internal class SimHostTest { val flavor = MockFlavor(2, 0) coroutineScope { - launch { host.spawn(MockServer(UUID.randomUUID(), "a", flavor, vmImageA)) } - launch { host.spawn(MockServer(UUID.randomUUID(), "b", flavor, vmImageB)) } - suspendCancellableCoroutine { cont -> host.addListener(object : HostListener { private var finished = 0 @@ -136,6 +198,13 @@ internal class SimHostTest { } } }) + val serverA = MockServer(UUID.randomUUID(), "a", flavor, vmImageA) + host.spawn(serverA) + val serverB = MockServer(UUID.randomUUID(), "b", flavor, vmImageB) + host.spawn(serverB) + + host.start(serverA) + host.start(serverB) } } @@ -169,8 +238,7 @@ internal class SimHostTest { uid = UUID.randomUUID(), name = "test", meta = emptyMap(), - coroutineContext, - graph, + clock, machine, hypervisor ) @@ -193,12 +261,13 @@ internal class SimHostTest { coroutineScope { host.spawn(server) + host.start(server) delay(5000L) host.fail() delay(duration * 1000) host.recover() - suspendCancellableCoroutine<Unit> { cont -> + suspendCancellableCoroutine { cont -> host.addListener(object : HostListener { override fun onStateChanged(host: Host, server: Server, newState: ServerState) { if (newState == ServerState.TERMINATED) { |
