From 8bb44da762a78adb0444c825dc645c2fc84f901b Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 9 Mar 2020 21:18:10 +0100 Subject: feat: Terminate server instance on power off --- .../atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index c7dc74cf..6343bee5 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -115,7 +115,12 @@ public class SimpleBareMetalDriver( node.image, ServerState.BUILD ) - PowerState.POWER_ON to PowerState.POWER_OFF -> null // TODO Terminate existing image + PowerState.POWER_ON to PowerState.POWER_OFF -> { + // We terminate the image running on the machine + job?.cancel() + job = null + null + } PowerState.POWER_ON to PowerState.POWER_ON -> node.server else -> throw IllegalStateException() } -- cgit v1.2.3 From 99cc96fc51f1b894c8c05b1cde69d60463cc732c Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 13 Mar 2020 14:15:27 +0100 Subject: feat: Revamp bare-metal state management --- .../opendc/compute/core/monitor/ServerMonitor.kt | 2 +- .../com/atlarge/opendc/compute/metal/Node.kt | 4 +- .../com/atlarge/opendc/compute/metal/NodeState.kt | 55 +++++++++++ .../com/atlarge/opendc/compute/metal/PowerState.kt | 40 -------- .../opendc/compute/metal/driver/BareMetalDriver.kt | 19 +++- .../compute/metal/driver/SimpleBareMetalDriver.kt | 101 ++++++++++++--------- .../opendc/compute/metal/monitor/NodeMonitor.kt | 42 +++++++++ .../metal/service/SimpleProvisioningService.kt | 7 +- .../metal/driver/SimpleBareMetalDriverTest.kt | 13 ++- .../virt/driver/hypervisor/HypervisorTest.kt | 7 +- 10 files changed, 189 insertions(+), 101 deletions(-) create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/NodeState.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/PowerState.kt create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt index fbfd0ad6..26b94ba5 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt @@ -37,5 +37,5 @@ public interface ServerMonitor { * @param server The server which state was updated. * @param previousState The previous state of the server. */ - public suspend fun onUpdate(server: Server, previousState: ServerState) + public suspend fun onUpdate(server: Server, previousState: ServerState) {} } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt index a43abfe9..7df5d99b 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt @@ -44,9 +44,9 @@ data class Node( public override val name: String, /** - * The power state of the node. + * The last known state of the compute node. */ - public val powerState: PowerState, + public val state: NodeState, /** * The boot image of the node. diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/NodeState.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/NodeState.kt new file mode 100644 index 00000000..ca9cf509 --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/NodeState.kt @@ -0,0 +1,55 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.metal + +/** + * An enumeration describing the possible states of a bare-metal compute node. + */ +public enum class NodeState { + /** + * The node is booting. + */ + BOOT, + + /** + * The node is powered off. + */ + SHUTOFF, + + /** + * The node is active and running. + */ + ACTIVE, + + /** + * The node is in error. + */ + ERROR, + + /** + * The state of the node is unknown. + */ + UNKNOWN, +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/PowerState.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/PowerState.kt deleted file mode 100644 index 5fce3f48..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/PowerState.kt +++ /dev/null @@ -1,40 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.compute.metal - -/** - * The power state of a compute node. - */ -public enum class PowerState { - /** - * Node is powered on. - */ - POWER_ON, - - /** - * Node is powered off. - */ - POWER_OFF, -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt index 1214dd36..fb2ff355 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt @@ -26,9 +26,8 @@ package com.atlarge.opendc.compute.metal.driver import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.Node -import com.atlarge.opendc.compute.metal.PowerState +import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.core.power.Powerable import com.atlarge.opendc.core.services.AbstractServiceKey import kotlinx.coroutines.flow.Flow @@ -47,12 +46,22 @@ public interface BareMetalDriver : Powerable { /** * Initialize the driver. */ - public suspend fun init(monitor: ServerMonitor): Node + public suspend fun init(monitor: NodeMonitor): Node /** - * Update the power state of the compute node. + * Start the bare metal node with the specified boot disk image. */ - public suspend fun setPower(powerState: PowerState): Node + public suspend fun start(): Node + + /** + * Stop the bare metal node if it is running. + */ + public suspend fun stop(): Node + + /** + * Reboot the bare metal node. + */ + public suspend fun reboot(): Node /** * Update the boot disk image of the compute node. diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 6343bee5..2d803aa5 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -35,9 +35,9 @@ import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.execution.ServerManagementContext import com.atlarge.opendc.compute.core.image.EmptyImage import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.Node -import com.atlarge.opendc.compute.metal.PowerState +import com.atlarge.opendc.compute.metal.NodeState +import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.compute.metal.power.ConstantPowerModel import com.atlarge.opendc.core.power.PowerModel import kotlinx.coroutines.CancellationException @@ -72,12 +72,27 @@ public class SimpleBareMetalDriver( /** * The monitor to use. */ - private lateinit var monitor: ServerMonitor + private lateinit var monitor: NodeMonitor /** * The machine state. */ - private var node: Node = Node(uid, name, PowerState.POWER_OFF, EmptyImage, null) + private var node: Node = Node(uid, name, NodeState.SHUTOFF, EmptyImage, null) + set(value) { + if (field.state != value.state) { + domain.launch { + monitor.onUpdate(value, field.state) + } + } + + if (field.server != null && value.server != null && field.server!!.state != value.server.state) { + domain.launch { + monitor.onUpdate(value.server, field.server!!.state) + } + } + + field = value + } /** * The flavor that corresponds to this machine. @@ -98,42 +113,49 @@ public class SimpleBareMetalDriver( override val powerDraw: Flow = powerModel(this) - override suspend fun init(monitor: ServerMonitor): Node = withContext(domain.coroutineContext) { + override suspend fun init(monitor: NodeMonitor): Node = withContext(domain.coroutineContext) { this@SimpleBareMetalDriver.monitor = monitor return@withContext node } - override suspend fun setPower(powerState: PowerState): Node = withContext(domain.coroutineContext) { - val previousPowerState = node.powerState - val server = when (node.powerState to powerState) { - PowerState.POWER_OFF to PowerState.POWER_OFF -> null - PowerState.POWER_OFF to PowerState.POWER_ON -> Server( - UUID.randomUUID(), - node.name, - emptyMap(), - flavor, - node.image, - ServerState.BUILD - ) - PowerState.POWER_ON to PowerState.POWER_OFF -> { - // We terminate the image running on the machine - job?.cancel() - job = null - null - } - PowerState.POWER_ON to PowerState.POWER_ON -> node.server - else -> throw IllegalStateException() + override suspend fun start(): Node = withContext(domain.coroutineContext) { + if (node.state != NodeState.SHUTOFF) { + return@withContext node } - server?.serviceRegistry?.set(BareMetalDriver.Key, this@SimpleBareMetalDriver) - node = node.copy(powerState = powerState, server = server) - if (powerState != previousPowerState && server != null) { - launch() + val server = Server( + UUID.randomUUID(), + node.name, + emptyMap(), + flavor, + node.image, + ServerState.BUILD + ) + + server.serviceRegistry[BareMetalDriver.Key] = this@SimpleBareMetalDriver + node = node.copy(state = NodeState.BOOT, server = server) + launch() + return@withContext node + } + + override suspend fun stop(): Node = withContext(domain.coroutineContext) { + if (node.state == NodeState.SHUTOFF) { + return@withContext node } + // We terminate the image running on the machine + job?.cancel() + job = null + + node = node.copy(state = NodeState.SHUTOFF, server = null) return@withContext node } + override suspend fun reboot(): Node = withContext(domain.coroutineContext) { + stop() + start() + } + override suspend fun setImage(image: Image): Node = withContext(domain.coroutineContext) { node = node.copy(image = image) return@withContext node @@ -163,29 +185,26 @@ public class SimpleBareMetalDriver( override val cpus: List = this@SimpleBareMetalDriver.cpus - override var server: Server + override val server: Server get() = node.server!! - set(value) { - node = node.copy(server = value) - } override suspend fun init() { if (initialized) { throw IllegalStateException() } - - val previousState = server.state - server = server.copy(state = ServerState.ACTIVE) - monitor.onUpdate(server, previousState) initialized = true + + val server = server.copy(state = ServerState.ACTIVE) + node = node.copy(state = NodeState.ACTIVE, server = server) } override suspend fun exit(cause: Throwable?) { - val previousState = server.state - val state = if (cause == null) ServerState.SHUTOFF else ServerState.ERROR - server = server.copy(state = state) initialized = false - domain.launch { monitor.onUpdate(server, previousState) } + + val serverState = if (cause == null) ServerState.SHUTOFF else ServerState.ERROR + val nodeState = if (cause == null) node.state else NodeState.ERROR + val server = server.copy(state = serverState) + node = node.copy(state = nodeState, server = server) } private var flush: Job? = null diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt new file mode 100644 index 00000000..f35cf57b --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt @@ -0,0 +1,42 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.metal.monitor + +import com.atlarge.opendc.compute.core.monitor.ServerMonitor +import com.atlarge.opendc.compute.metal.Node +import com.atlarge.opendc.compute.metal.NodeState + +/** + * An interface for monitoring bare-metal nodes. + */ +public interface NodeMonitor : ServerMonitor { + /** + * This method is invoked when the state of a bare metal machine updates. + * + * @param node The node for which state was updated. + * @param previousState The previous state of the node. + */ + public suspend fun onUpdate(node: Node, previousState: NodeState) {} +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt index b18a4006..117e502c 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt @@ -30,14 +30,14 @@ import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.Node -import com.atlarge.opendc.compute.metal.PowerState import com.atlarge.opendc.compute.metal.driver.BareMetalDriver +import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import kotlinx.coroutines.withContext /** * A very basic implementation of the [ProvisioningService]. */ -public class SimpleProvisioningService(val domain: Domain) : ProvisioningService, ServerMonitor { +public class SimpleProvisioningService(val domain: Domain) : ProvisioningService, NodeMonitor { /** * The active nodes in this service. */ @@ -64,8 +64,7 @@ public class SimpleProvisioningService(val domain: Domain) : ProvisioningService val driver = nodes[node]!! driver.setImage(image) - driver.setPower(PowerState.POWER_OFF) - val newNode = driver.setPower(PowerState.POWER_ON) + val newNode = driver.reboot() monitors[newNode.server!!] = monitor return@withContext newNode } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index b8882eda..24a65b40 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -31,8 +31,9 @@ import com.atlarge.opendc.compute.core.ProcessingUnit import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.FlopsApplicationImage -import com.atlarge.opendc.compute.core.monitor.ServerMonitor -import com.atlarge.opendc.compute.metal.PowerState +import com.atlarge.opendc.compute.metal.Node +import com.atlarge.opendc.compute.metal.NodeState +import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext @@ -57,7 +58,11 @@ internal class SimpleBareMetalDriverTest { val cpus = List(4) { ProcessingUnit(cpuNode, it, 2400.0) } val driver = SimpleBareMetalDriver(dom, UUID.randomUUID(), "test", cpus, emptyList()) - val monitor = object : ServerMonitor { + val monitor = object : NodeMonitor { + override suspend fun onUpdate(node: Node, previousState: NodeState) { + println(node) + } + override suspend fun onUpdate(server: Server, previousState: ServerState) { println("[${simulationContext.clock.millis()}] $server") finalState = server.state @@ -69,7 +74,7 @@ internal class SimpleBareMetalDriverTest { withContext(dom.coroutineContext) { driver.init(monitor) driver.setImage(image) - driver.setPower(PowerState.POWER_ON) + driver.start() } } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt index 254ad5fe..57a7150e 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt @@ -32,9 +32,8 @@ import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.ProcessingNode import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.FlopsApplicationImage -import com.atlarge.opendc.compute.core.monitor.ServerMonitor -import com.atlarge.opendc.compute.metal.PowerState import com.atlarge.opendc.compute.metal.driver.SimpleBareMetalDriver +import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.compute.virt.driver.VirtDriver import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor import kotlinx.coroutines.delay @@ -71,7 +70,7 @@ internal class HypervisorTest { }) val workloadA = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1_000, 1) val workloadB = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 2_000, 1) - val monitor = object : ServerMonitor { + val monitor = object : NodeMonitor { override suspend fun onUpdate(server: Server, previousState: ServerState) { println("[${simulationContext.clock.millis()}]: $server") } @@ -85,7 +84,7 @@ internal class HypervisorTest { metalDriver.init(monitor) metalDriver.setImage(vmm) - metalDriver.setPower(PowerState.POWER_ON) + metalDriver.start() delay(5) -- cgit v1.2.3 From 59a7470853957d6055c120e9bf8658b4b7b48879 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 13 Mar 2020 15:50:58 +0100 Subject: feat: Add infrastructure for failures --- .../compute/core/execution/ShutdownException.kt | 53 +++++++++++++ .../compute/core/image/FlopsApplicationImage.kt | 9 +-- .../opendc/compute/metal/driver/BareMetalDriver.kt | 3 +- .../compute/metal/driver/SimpleBareMetalDriver.kt | 90 ++++++++++++++-------- .../virt/driver/hypervisor/HypervisorVirtDriver.kt | 9 ++- .../metal/driver/SimpleBareMetalDriverTest.kt | 3 +- .../atlarge/opendc/core/failure/FailureDomain.kt | 35 +++++++++ 7 files changed, 158 insertions(+), 44 deletions(-) create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ShutdownException.kt create mode 100644 opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ShutdownException.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ShutdownException.kt new file mode 100644 index 00000000..abf6f8db --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ShutdownException.kt @@ -0,0 +1,53 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.core.execution + +import kotlinx.coroutines.CancellationException + +/** + * This exception is thrown by the underlying [ServerContext] to indicate that a shutdown signal + * has been sent to the server. + */ +public class ShutdownException(message: String? = null, override val cause: Throwable? = null) : CancellationException(message) + +/** + * This method terminates the current active coroutine if the specified [CancellationException] is caused + * by a shutdown. + */ +public fun CancellationException.assertShutdown() { + if (this is ShutdownException) { + throw this + } +} + +/** + * This method terminates the current active coroutine if the specified [CancellationException] is caused + * by a failure. + */ +public fun CancellationException.assertFailure() { + if (this is ShutdownException && cause != null) { + throw this + } +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsApplicationImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsApplicationImage.kt index 107237ea..1596b3b9 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsApplicationImage.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsApplicationImage.kt @@ -26,7 +26,9 @@ package com.atlarge.opendc.compute.core.image import com.atlarge.opendc.compute.core.execution.ServerContext import com.atlarge.opendc.core.resource.TagContainer +import kotlinx.coroutines.ensureActive import kotlinx.coroutines.isActive +import java.lang.Exception import java.util.UUID import kotlin.coroutines.coroutineContext import kotlin.math.min @@ -64,11 +66,8 @@ data class FlopsApplicationImage( val burst = LongArray(cores) { flops / cores } val maxUsage = DoubleArray(cores) { i -> ctx.cpus[i].frequency * utilization } - while (coroutineContext.isActive) { - if (burst.all { it == 0L }) { - break - } - + while (burst.any { it != 0L }) { + coroutineContext.ensureActive() ctx.run(burst, maxUsage, Long.MAX_VALUE) } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt index fb2ff355..3956338b 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt @@ -28,6 +28,7 @@ import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.monitor.NodeMonitor +import com.atlarge.opendc.core.failure.FailureDomain import com.atlarge.opendc.core.power.Powerable import com.atlarge.opendc.core.services.AbstractServiceKey import kotlinx.coroutines.flow.Flow @@ -36,7 +37,7 @@ import java.util.UUID /** * A driver interface for the management interface of a bare-metal compute node. */ -public interface BareMetalDriver : Powerable { +public interface BareMetalDriver : Powerable, FailureDomain { /** * The amount of work done by the machine in percentage with respect to the total amount of processing power * available. diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 2d803aa5..5f5dfb66 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -33,6 +33,8 @@ import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.MemoryUnit import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.execution.ServerManagementContext +import com.atlarge.opendc.compute.core.execution.ShutdownException +import com.atlarge.opendc.compute.core.execution.assertFailure import com.atlarge.opendc.compute.core.image.EmptyImage import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.metal.Node @@ -42,6 +44,7 @@ import com.atlarge.opendc.compute.metal.power.ConstantPowerModel import com.atlarge.opendc.core.power.PowerModel import kotlinx.coroutines.CancellationException import kotlinx.coroutines.Job +import kotlinx.coroutines.cancel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.launch @@ -50,6 +53,7 @@ import kotlin.math.ceil import kotlin.math.max import kotlin.math.min import kotlinx.coroutines.withContext +import java.lang.Exception /** * A basic implementation of the [BareMetalDriver] that simulates an [Image] running on a bare-metal machine. @@ -100,9 +104,9 @@ public class SimpleBareMetalDriver( private val flavor = Flavor(cpus.size, memoryUnits.map { it.size }.sum()) /** - * The job that is running the image. + * The current active server context. */ - private var job: Job? = null + private var serverContext: BareMetalServerContext? = null /** * The signal containing the load of the server. @@ -134,7 +138,7 @@ public class SimpleBareMetalDriver( server.serviceRegistry[BareMetalDriver.Key] = this@SimpleBareMetalDriver node = node.copy(state = NodeState.BOOT, server = server) - launch() + serverContext = BareMetalServerContext() return@withContext node } @@ -144,8 +148,8 @@ public class SimpleBareMetalDriver( } // We terminate the image running on the machine - job?.cancel() - job = null + serverContext!!.cancel(fail = false) + serverContext = null node = node.copy(state = NodeState.SHUTOFF, server = null) return@withContext node @@ -163,46 +167,56 @@ public class SimpleBareMetalDriver( override suspend fun refresh(): Node = withContext(domain.coroutineContext) { node } - /** - * Launch the server image on the machine. - */ - private suspend fun launch() { - val serverContext = serverCtx - - job = domain.launch { - serverContext.init() - try { - node.server!!.image(serverContext) - serverContext.exit() - } catch (cause: Throwable) { - serverContext.exit(cause) - } - } - } - - private val serverCtx = object : ServerManagementContext { - private var initialized: Boolean = false + private inner class BareMetalServerContext : ServerManagementContext { + private val job: Job + private var finalized: Boolean = false override val cpus: List = this@SimpleBareMetalDriver.cpus override val server: Server get() = node.server!! - override suspend fun init() { - if (initialized) { - throw IllegalStateException() + init { + job = domain.launch { + init() + try { + server.image(this@BareMetalServerContext) + exit() + } catch (cause: Throwable) { + exit(cause) + } } - initialized = true + } + /** + * Cancel the image running on the machine. + */ + suspend fun cancel(fail: Boolean) { + if (fail) + job.cancel(ShutdownException(cause = Exception("Random failure"))) + else + job.cancel(ShutdownException()) + job.join() + } + + override suspend fun init() { val server = server.copy(state = ServerState.ACTIVE) node = node.copy(state = NodeState.ACTIVE, server = server) } override suspend fun exit(cause: Throwable?) { - initialized = false - - val serverState = if (cause == null) ServerState.SHUTOFF else ServerState.ERROR - val nodeState = if (cause == null) node.state else NodeState.ERROR + finalized = true + + val serverState = + if (cause == null || (cause is ShutdownException && cause.cause == null)) + ServerState.SHUTOFF + else + ServerState.ERROR + val nodeState = + if (cause == null || (cause is ShutdownException && cause.cause != null)) + node.state + else + NodeState.ERROR val server = server.copy(state = serverState) node = node.copy(state = nodeState, server = server) } @@ -211,6 +225,7 @@ public class SimpleBareMetalDriver( override suspend fun run(burst: LongArray, limit: DoubleArray, deadline: Long) { require(burst.size == limit.size) { "Array dimensions do not match" } + assert(!finalized) { "Server instance is already finalized" } // If run is called in at the same timestamp as the previous call, cancel the load flush flush?.cancel() @@ -237,8 +252,9 @@ public class SimpleBareMetalDriver( try { delay(duration) - } catch (_: CancellationException) { - // On cancellation, we compute and return the remaining burst + } catch (e: CancellationException) { + // On non-failure cancellation, we compute and return the remaining burst + e.assertFailure() } val end = simulationContext.clock.millis() @@ -259,4 +275,10 @@ public class SimpleBareMetalDriver( } } } + + override suspend fun fail() { + withContext(domain.coroutineContext) { + serverContext?.cancel(fail = true) + } + } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt index 430e5a37..8bce7d9d 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt @@ -32,6 +32,7 @@ import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.execution.ServerContext import com.atlarge.opendc.compute.core.execution.ServerManagementContext +import com.atlarge.opendc.compute.core.execution.assertFailure import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.virt.driver.VirtDriver @@ -297,11 +298,13 @@ class HypervisorVirtDriver( activeVms += this reschedule() chan.receive() - } catch (_: CancellationException) { + } catch (e: CancellationException) { // On cancellation, we compute and return the remaining burst + e.assertFailure() + } finally { + activeVms -= this + reschedule() } - activeVms -= this - reschedule() } } } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index 24a65b40..b78c0b8c 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -34,6 +34,7 @@ import com.atlarge.opendc.compute.core.image.FlopsApplicationImage import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.NodeState import com.atlarge.opendc.compute.metal.monitor.NodeMonitor +import kotlinx.coroutines.delay import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext @@ -83,6 +84,6 @@ internal class SimpleBareMetalDriverTest { system.terminate() } - assertEquals(finalState, ServerState.SHUTOFF) + assertEquals(ServerState.SHUTOFF, finalState) } } diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt new file mode 100644 index 00000000..b1c7ccd3 --- /dev/null +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt @@ -0,0 +1,35 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.core.failure + +/** + * A logical or physical component in a computing environment which may fail. + */ +public interface FailureDomain { + /** + * Fail the domain externally. + */ + public suspend fun fail() +} -- cgit v1.2.3 From afe62e3cb8e2050544b4df0f8bbf071abe0e8dce Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 13 Mar 2020 20:39:00 +0100 Subject: feat: Propagate machine failure to VM --- .../compute/core/image/FlopsApplicationImage.kt | 2 - .../compute/metal/driver/SimpleBareMetalDriver.kt | 21 ++++--- .../virt/driver/hypervisor/HypervisorImage.kt | 12 ++-- .../virt/driver/hypervisor/HypervisorVirtDriver.kt | 65 ++++++++++++++-------- .../metal/driver/SimpleBareMetalDriverTest.kt | 1 - 5 files changed, 59 insertions(+), 42 deletions(-) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsApplicationImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsApplicationImage.kt index 1596b3b9..e77b55a6 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsApplicationImage.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsApplicationImage.kt @@ -27,8 +27,6 @@ package com.atlarge.opendc.compute.core.image import com.atlarge.opendc.compute.core.execution.ServerContext import com.atlarge.opendc.core.resource.TagContainer import kotlinx.coroutines.ensureActive -import kotlinx.coroutines.isActive -import java.lang.Exception import java.util.UUID import kotlin.coroutines.coroutineContext import kotlin.math.min diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 5f5dfb66..fbc5c0ce 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -168,7 +168,6 @@ public class SimpleBareMetalDriver( override suspend fun refresh(): Node = withContext(domain.coroutineContext) { node } private inner class BareMetalServerContext : ServerManagementContext { - private val job: Job private var finalized: Boolean = false override val cpus: List = this@SimpleBareMetalDriver.cpus @@ -176,15 +175,13 @@ public class SimpleBareMetalDriver( override val server: Server get() = node.server!! - init { - job = domain.launch { - init() - try { - server.image(this@BareMetalServerContext) - exit() - } catch (cause: Throwable) { - exit(cause) - } + private val job = domain.launch { + init() + try { + server.image(this@BareMetalServerContext) + exit() + } catch (cause: Throwable) { + exit(cause) } } @@ -200,6 +197,8 @@ public class SimpleBareMetalDriver( } override suspend fun init() { + assert(!finalized) { "Machine is already finalized" } + val server = server.copy(state = ServerState.ACTIVE) node = node.copy(state = NodeState.ACTIVE, server = server) } @@ -259,7 +258,7 @@ public class SimpleBareMetalDriver( val end = simulationContext.clock.millis() // Flush the load if the do not receive a new run call for the same timestamp - flush = domain.launch { + flush = domain.launch(job) { delay(1) usageSignal.value = 0.0 } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt index 8d055953..0f4d3c15 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt @@ -29,6 +29,7 @@ import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.virt.driver.VirtDriver import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor import com.atlarge.opendc.core.resource.TagContainer +import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.suspendCancellableCoroutine import java.util.UUID @@ -43,11 +44,12 @@ class HypervisorImage( override val tags: TagContainer = emptyMap() override suspend fun invoke(ctx: ServerContext) { - val driver = HypervisorVirtDriver(ctx, hypervisorMonitor) + coroutineScope { + val driver = HypervisorVirtDriver(ctx, hypervisorMonitor, this) + ctx.publishService(VirtDriver.Key, driver) - ctx.publishService(VirtDriver.Key, driver) - - // Suspend image until it is cancelled - suspendCancellableCoroutine {} + // Suspend image until it is cancelled + suspendCancellableCoroutine {} + } } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt index 8bce7d9d..1ff33c0c 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt @@ -24,7 +24,7 @@ package com.atlarge.opendc.compute.virt.driver.hypervisor -import com.atlarge.odcsim.SimulationContext +import com.atlarge.odcsim.Domain import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.ProcessingUnit @@ -32,6 +32,7 @@ import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.execution.ServerContext import com.atlarge.opendc.compute.core.execution.ServerManagementContext +import com.atlarge.opendc.compute.core.execution.ShutdownException import com.atlarge.opendc.compute.core.execution.assertFailure import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.core.monitor.ServerMonitor @@ -41,7 +42,9 @@ import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor import com.atlarge.opendc.core.workload.IMAGE_PERF_INTERFERENCE_MODEL import com.atlarge.opendc.core.workload.PerformanceInterferenceModel import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Job +import kotlinx.coroutines.cancelAndJoin import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.launch import java.util.UUID @@ -54,7 +57,8 @@ import kotlin.math.min */ class HypervisorVirtDriver( private val hostContext: ServerContext, - private val monitor: HypervisorMonitor + private val monitor: HypervisorMonitor, + private val coroutineScope: CoroutineScope ) : VirtDriver { /** * A set for tracking the VM context objects. @@ -80,7 +84,7 @@ class HypervisorVirtDriver( val server = Server(UUID.randomUUID(), "", emptyMap(), flavor, image, ServerState.BUILD) availableMemory -= requiredMemory - vms.add(VmServerContext(server, monitor, simulationContext)) + vms.add(VmServerContext(server, monitor, simulationContext.domain)) monitors.forEach { it.onUpdate(vms.size, availableMemory) } return server } @@ -93,6 +97,11 @@ class HypervisorVirtDriver( monitors.remove(monitor) } + /** + * A flag to indicate the driver is stopped. + */ + private var stopped: Boolean = false + /** * The set of [VmServerContext] instances that is being scheduled at the moment. */ @@ -109,12 +118,12 @@ class HypervisorVirtDriver( private suspend fun reschedule() { flush() - // Do not schedule a call if there is no work to schedule - if (activeVms.isEmpty()) { + // Do not schedule a call if there is no work to schedule or the driver stopped. + if (stopped || activeVms.isEmpty()) { return } - val call = simulationContext.domain.launch { + val call = coroutineScope.launch { val start = simulationContext.clock.millis() val vms = activeVms.toSet() @@ -210,17 +219,17 @@ class HypervisorVirtDriver( ) } this.call = call - call.invokeOnCompletion { this.call = null } } /** * Flush the progress of the current active VMs. */ - private fun flush() { + private suspend fun flush() { val call = call ?: return // If there is no active call, there is nothing to flush - // The progress is actually flushed in the coroutine when it notices we cancel it and wait for its + // The progress is actually flushed in the coroutine when it notices: we cancel it and wait for its // completion. - call.cancel() + call.cancelAndJoin() + this.call = null } /** @@ -241,15 +250,16 @@ class HypervisorVirtDriver( internal inner class VmServerContext( override var server: Server, val monitor: ServerMonitor, - ctx: SimulationContext + val domain: Domain ) : ServerManagementContext { + private var finalized: Boolean = false lateinit var requests: List lateinit var burst: LongArray var deadline: Long = 0L var chan = Channel(Channel.RENDEZVOUS) private var initialized: Boolean = false - internal val job: Job = ctx.domain.launch { + internal val job: Job = coroutineScope.launch { init() try { server.image(this@VmServerContext) @@ -259,27 +269,36 @@ class HypervisorVirtDriver( } } + private suspend fun setServer(value: Server) { + val field = server + if (field.state != value.state) { + monitor.onUpdate(value, field.state) + } + + server = value + } + override val cpus: List = hostContext.cpus.take(server.flavor.cpuCount) override suspend fun init() { - if (initialized) { - throw IllegalStateException() - } + assert(!finalized) { "VM is already finalized" } - val previousState = server.state - server = server.copy(state = ServerState.ACTIVE) - monitor.onUpdate(server, previousState) + setServer(server.copy(state = ServerState.ACTIVE)) initialized = true } override suspend fun exit(cause: Throwable?) { - val previousState = server.state - val state = if (cause == null) ServerState.SHUTOFF else ServerState.ERROR - server = server.copy(state = state) + finalized = true + + val serverState = + if (cause == null || (cause is ShutdownException && cause.cause == null)) + ServerState.SHUTOFF + else + ServerState.ERROR + setServer(server.copy(state = serverState)) availableMemory += server.flavor.memorySize - monitor.onUpdate(server, previousState) - initialized = false vms.remove(this) + monitors.forEach { it.onUpdate(vms.size, availableMemory) } } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index b78c0b8c..166e93b8 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -34,7 +34,6 @@ import com.atlarge.opendc.compute.core.image.FlopsApplicationImage import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.NodeState import com.atlarge.opendc.compute.metal.monitor.NodeMonitor -import kotlinx.coroutines.delay import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext -- cgit v1.2.3 From 95c9ae8a7c4efae57caba9863dfc3e10df23c2fd Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 13 Mar 2020 20:55:55 +0100 Subject: [ci skip] feat: Prototype design for FailureInjector --- .../metal/driver/SimpleBareMetalDriverTest.kt | 6 +++ .../atlarge/opendc/core/failure/FailureInjector.kt | 60 ++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureInjector.kt diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index 166e93b8..1a2440c2 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -34,6 +34,8 @@ import com.atlarge.opendc.compute.core.image.FlopsApplicationImage import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.NodeState import com.atlarge.opendc.compute.metal.monitor.NodeMonitor +import com.atlarge.opendc.core.failure.FailureInjector +import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext @@ -76,6 +78,10 @@ internal class SimpleBareMetalDriverTest { driver.setImage(image) driver.start() } + + + val injector = FailureInjector(listOf(driver)) + injector() } runBlocking { diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureInjector.kt new file mode 100644 index 00000000..456e18bb --- /dev/null +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureInjector.kt @@ -0,0 +1,60 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.core.failure + +import kotlinx.coroutines.delay +import kotlinx.coroutines.isActive +import java.util.Random +import kotlin.coroutines.coroutineContext +import kotlin.math.ln +import kotlin.random.asKotlinRandom + +/** + * An entity that injects failures into a system. + * + * @param failureDomains The failure domains to be included. + */ +public class FailureInjector(private val failureDomains: List) { + /** + * The [Random] instance to generate the failures. + */ + private val random = Random() + + /** + * Start the failure injector process. + */ + public suspend operator fun invoke() { + val targets = HashSet(failureDomains) + val mu = 20.0 + while (targets.isNotEmpty() && coroutineContext.isActive) { + delay(random.expovariate(mu)) + val target = targets.random(random.asKotlinRandom()) + targets -= target + target.fail() + } + } + + private fun Random.expovariate(mu: Double) = (-mu * ln(1 - nextDouble())).toLong() +} -- cgit v1.2.3 From c9cd6bb12eee73562ed9078f01aa041c7f5ed8ae Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 15 Mar 2020 17:20:08 +0100 Subject: feat: Make FaultInjector more generic --- .../compute/metal/driver/SimpleBareMetalDriver.kt | 5 +- .../metal/driver/SimpleBareMetalDriverTest.kt | 7 ++- .../atlarge/opendc/core/failure/FailureDomain.kt | 7 +++ .../atlarge/opendc/core/failure/FailureInjector.kt | 60 ---------------------- .../atlarge/opendc/core/failure/FaultInjector.kt | 35 +++++++++++++ .../core/failure/UncorrelatedFaultInjector.kt | 48 +++++++++++++++++ 6 files changed, 97 insertions(+), 65 deletions(-) delete mode 100644 opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureInjector.kt create mode 100644 opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FaultInjector.kt create mode 100644 opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index fbc5c0ce..4fe8d740 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -43,8 +43,8 @@ import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.compute.metal.power.ConstantPowerModel import com.atlarge.opendc.core.power.PowerModel import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Job -import kotlinx.coroutines.cancel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.launch @@ -275,6 +275,9 @@ public class SimpleBareMetalDriver( } } + override val scope: CoroutineScope + get() = domain + override suspend fun fail() { withContext(domain.coroutineContext) { serverContext?.cancel(fail = true) diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index 1a2440c2..9378b5d7 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -34,8 +34,7 @@ import com.atlarge.opendc.compute.core.image.FlopsApplicationImage import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.NodeState import com.atlarge.opendc.compute.metal.monitor.NodeMonitor -import com.atlarge.opendc.core.failure.FailureInjector -import kotlinx.coroutines.channels.Channel +import com.atlarge.opendc.core.failure.FaultInjector import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext @@ -80,8 +79,8 @@ internal class SimpleBareMetalDriverTest { } - val injector = FailureInjector(listOf(driver)) - injector() + val injector = FaultInjector() + injector.enqueue(driver) } runBlocking { diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt index b1c7ccd3..91ca9b83 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt @@ -24,10 +24,17 @@ package com.atlarge.opendc.core.failure +import kotlinx.coroutines.CoroutineScope + /** * A logical or physical component in a computing environment which may fail. */ public interface FailureDomain { + /** + * The lifecycle of the failure domain to which a [FaultInjector] will attach. + */ + public val scope: CoroutineScope + /** * Fail the domain externally. */ diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureInjector.kt deleted file mode 100644 index 456e18bb..00000000 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureInjector.kt +++ /dev/null @@ -1,60 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.core.failure - -import kotlinx.coroutines.delay -import kotlinx.coroutines.isActive -import java.util.Random -import kotlin.coroutines.coroutineContext -import kotlin.math.ln -import kotlin.random.asKotlinRandom - -/** - * An entity that injects failures into a system. - * - * @param failureDomains The failure domains to be included. - */ -public class FailureInjector(private val failureDomains: List) { - /** - * The [Random] instance to generate the failures. - */ - private val random = Random() - - /** - * Start the failure injector process. - */ - public suspend operator fun invoke() { - val targets = HashSet(failureDomains) - val mu = 20.0 - while (targets.isNotEmpty() && coroutineContext.isActive) { - delay(random.expovariate(mu)) - val target = targets.random(random.asKotlinRandom()) - targets -= target - target.fail() - } - } - - private fun Random.expovariate(mu: Double) = (-mu * ln(1 - nextDouble())).toLong() -} diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FaultInjector.kt new file mode 100644 index 00000000..ac7a08de --- /dev/null +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FaultInjector.kt @@ -0,0 +1,35 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.core.failure + +/** + * An interface for stochastically injecting faults into a running system. + */ +public interface FaultInjector { + /** + * Enqueue the specified [FailureDomain] into the queue as candidate for failure injection in the future. + */ + public fun enqueue(domain: FailureDomain) +} diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt new file mode 100644 index 00000000..95127deb --- /dev/null +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt @@ -0,0 +1,48 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.core.failure + +import kotlinx.coroutines.delay +import kotlinx.coroutines.launch +import kotlin.math.ln +import kotlin.random.Random + +/** + * A [FaultInjector] that injects uncorrelated faults into the system, meaning that failures of the subsystems are + * independent. + */ +public class UncorrelatedFaultInjector(private val mu: Double = 256.0, private val random: Random = Random.Default) : FaultInjector { + /** + * Enqueue the specified [FailureDomain] to fail some time in the future. + */ + override fun enqueue(domain: FailureDomain) { + domain.scope.launch { + delay(random.expovariate(mu)) + domain.fail() + } + } + + private fun Random.expovariate(mu: Double) = (-mu * ln(1 - nextDouble())).toLong() +} -- cgit v1.2.3 From bafbf78e9af83d4fab41e10f3d9168d2cbe71353 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 15 Mar 2020 17:35:20 +0100 Subject: feat: Add support for failing hypervisors in provisioner --- .../metal/service/SimpleProvisioningService.kt | 1 - .../opendc/compute/virt/service/HypervisorView.kt | 11 ++++++ .../opendc/compute/virt/service/NodeView.kt | 11 ------ .../virt/service/SimpleVirtProvisioningService.kt | 45 +++++++++++----------- .../virt/service/allocation/AllocationPolicy.kt | 4 +- .../allocation/AvailableMemoryAllocationPolicy.kt | 4 +- .../NumberOfActiveServersAllocationPolicy.kt | 4 +- .../metal/driver/SimpleBareMetalDriverTest.kt | 4 +- 8 files changed, 42 insertions(+), 42 deletions(-) create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/HypervisorView.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/NodeView.kt diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt index 117e502c..d8fe0dd9 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt @@ -62,7 +62,6 @@ public class SimpleProvisioningService(val domain: Domain) : ProvisioningService override suspend fun deploy(node: Node, image: Image, monitor: ServerMonitor): Node = withContext(domain.coroutineContext) { val driver = nodes[node]!! - driver.setImage(image) val newNode = driver.reboot() monitors[newNode.server!!] = monitor diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/HypervisorView.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/HypervisorView.kt new file mode 100644 index 00000000..996bd8eb --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/HypervisorView.kt @@ -0,0 +1,11 @@ +package com.atlarge.opendc.compute.virt.service + +import com.atlarge.opendc.compute.core.Server +import com.atlarge.opendc.compute.virt.driver.hypervisor.HypervisorImage + +class HypervisorView( + var server: Server, + val hypervisor: HypervisorImage, + var numberOfActiveServers: Int, + var availableMemory: Long +) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/NodeView.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/NodeView.kt deleted file mode 100644 index 41e67624..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/NodeView.kt +++ /dev/null @@ -1,11 +0,0 @@ -package com.atlarge.opendc.compute.virt.service - -import com.atlarge.opendc.compute.metal.Node -import com.atlarge.opendc.compute.virt.driver.hypervisor.HypervisorImage - -class NodeView( - val node: Node, - val hypervisor: HypervisorImage, - var numberOfActiveServers: Int, - var availableMemory: Long -) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index 17960186..f0bb4e25 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -6,7 +6,6 @@ import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.core.monitor.ServerMonitor -import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.driver.VirtDriver import com.atlarge.opendc.compute.virt.driver.VirtDriverMonitor @@ -24,24 +23,24 @@ class SimpleVirtProvisioningService( private val hypervisorMonitor: HypervisorMonitor ) : VirtProvisioningService, ServerMonitor { /** - * The nodes that are controlled by the service. + * The hypervisors that have been launched by the service. */ - internal lateinit var nodes: List + private val hypervisors: MutableMap = mutableMapOf() /** - * The available nodes. + * The available hypervisors. */ - internal val availableNodes: MutableSet = mutableSetOf() + private val availableHypervisors: MutableSet = mutableSetOf() /** * The incoming images to be processed by the provisioner. */ - internal val incomingImages: MutableSet = mutableSetOf() + private val incomingImages: MutableSet = mutableSetOf() /** * The active images in the system. */ - internal val activeImages: MutableSet = mutableSetOf() + private val activeImages: MutableSet = mutableSetOf() init { ctx.domain.launch { @@ -49,23 +48,23 @@ class SimpleVirtProvisioningService( val deployedNodes = provisionedNodes.map { node -> val hypervisorImage = HypervisorImage(hypervisorMonitor) val deployedNode = provisioningService.deploy(node, hypervisorImage, this@SimpleVirtProvisioningService) - val nodeView = NodeView( - deployedNode, + val server = deployedNode.server!! + val hvView = HypervisorView( + server, hypervisorImage, 0, - deployedNode.server!!.flavor.memorySize + server.flavor.memorySize ) yield() - deployedNode.server.serviceRegistry[VirtDriver.Key].addMonitor(object : VirtDriverMonitor { + server.serviceRegistry[VirtDriver.Key].addMonitor(object : VirtDriverMonitor { override suspend fun onUpdate(numberOfActiveServers: Int, availableMemory: Long) { - nodeView.numberOfActiveServers = numberOfActiveServers - nodeView.availableMemory = availableMemory + hvView.numberOfActiveServers = numberOfActiveServers + hvView.availableMemory = availableMemory } }) - nodeView + server to hvView } - nodes = deployedNodes.map { it.node } - availableNodes.addAll(deployedNodes) + hypervisors.putAll(deployedNodes) } } @@ -86,11 +85,9 @@ class SimpleVirtProvisioningService( for (imageInstance in imagesToBeScheduled) { println("Spawning $imageInstance") - - val selectedNode = availableNodes.minWith(allocationPolicy().thenBy { it.node.uid }) - + val selectedNode = availableHypervisors.minWith(allocationPolicy().thenBy { it.server.uid }) ?: break try { - imageInstance.server = selectedNode?.node!!.server!!.serviceRegistry[VirtDriver.Key].spawn( + imageInstance.server = selectedNode.server.serviceRegistry[VirtDriver.Key].spawn( imageInstance.image, imageInstance.monitor, imageInstance.flavor @@ -107,10 +104,14 @@ class SimpleVirtProvisioningService( override suspend fun onUpdate(server: Server, previousState: ServerState) { when (server.state) { ServerState.ACTIVE -> { - // TODO handle hypervisor server becoming active + val hv = hypervisors[server] ?: return + availableHypervisors += hv + requestCycle() } ServerState.SHUTOFF, ServerState.ERROR -> { - // TODO handle hypervisor server shutting down or failing + val hv = hypervisors[server] ?: return + availableHypervisors -= hv + requestCycle() } else -> throw IllegalStateException() } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/AllocationPolicy.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/AllocationPolicy.kt index a1c0ab9a..e2871cca 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/AllocationPolicy.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/AllocationPolicy.kt @@ -1,7 +1,7 @@ package com.atlarge.opendc.compute.virt.service.allocation import com.atlarge.opendc.compute.metal.Node -import com.atlarge.opendc.compute.virt.service.NodeView +import com.atlarge.opendc.compute.virt.service.HypervisorView /** * A policy for selecting the [Node] an image should be deployed to, @@ -10,5 +10,5 @@ interface AllocationPolicy { /** * Builds the logic of the policy. */ - operator fun invoke(): Comparator + operator fun invoke(): Comparator } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/AvailableMemoryAllocationPolicy.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/AvailableMemoryAllocationPolicy.kt index b3e9d77e..f095849b 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/AvailableMemoryAllocationPolicy.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/AvailableMemoryAllocationPolicy.kt @@ -1,12 +1,12 @@ package com.atlarge.opendc.compute.virt.service.allocation -import com.atlarge.opendc.compute.virt.service.NodeView +import com.atlarge.opendc.compute.virt.service.HypervisorView /** * Allocation policy that selects the node with the most available memory. */ class AvailableMemoryAllocationPolicy : AllocationPolicy { - override fun invoke(): Comparator = Comparator { o1, o2 -> + override fun invoke(): Comparator = Comparator { o1, o2 -> compareValuesBy(o1, o2) { -it.availableMemory } } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/NumberOfActiveServersAllocationPolicy.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/NumberOfActiveServersAllocationPolicy.kt index 9d6582dd..59e48465 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/NumberOfActiveServersAllocationPolicy.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/NumberOfActiveServersAllocationPolicy.kt @@ -1,13 +1,13 @@ package com.atlarge.opendc.compute.virt.service.allocation -import com.atlarge.opendc.compute.virt.service.NodeView +import com.atlarge.opendc.compute.virt.service.HypervisorView import kotlinx.coroutines.runBlocking /** * Allocation policy that selects the node with the least amount of active servers. */ class NumberOfActiveServersAllocationPolicy : AllocationPolicy { - override fun invoke(): Comparator = Comparator { o1, o2 -> + override fun invoke(): Comparator = Comparator { o1, o2 -> runBlocking { compareValuesBy(o1, o2) { it.numberOfActiveServers } } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index 9378b5d7..1b7a47d5 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -35,6 +35,7 @@ import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.NodeState import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.core.failure.FaultInjector +import com.atlarge.opendc.core.failure.UncorrelatedFaultInjector import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext @@ -78,8 +79,7 @@ internal class SimpleBareMetalDriverTest { driver.start() } - - val injector = FaultInjector() + val injector = UncorrelatedFaultInjector() injector.enqueue(driver) } -- cgit v1.2.3 From a5d22796a95b187bc07cbd55a2289185bd9092b8 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 16 Mar 2020 22:42:58 +0100 Subject: feat: Track VM failures in SC20 experiment --- .../metal/driver/SimpleBareMetalDriverTest.kt | 5 --- .../experiments/sc20/Sc20HypervisorMonitor.kt | 40 ------------------ .../atlarge/opendc/experiments/sc20/Sc20Monitor.kt | 49 ++++++++++++++++++++++ .../opendc/experiments/sc20/TestExperiment.kt | 14 ++----- 4 files changed, 52 insertions(+), 56 deletions(-) delete mode 100644 opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20HypervisorMonitor.kt create mode 100644 opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index 1b7a47d5..166e93b8 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -34,8 +34,6 @@ import com.atlarge.opendc.compute.core.image.FlopsApplicationImage import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.NodeState import com.atlarge.opendc.compute.metal.monitor.NodeMonitor -import com.atlarge.opendc.core.failure.FaultInjector -import com.atlarge.opendc.core.failure.UncorrelatedFaultInjector import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext @@ -78,9 +76,6 @@ internal class SimpleBareMetalDriverTest { driver.setImage(image) driver.start() } - - val injector = UncorrelatedFaultInjector() - injector.enqueue(driver) } runBlocking { diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20HypervisorMonitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20HypervisorMonitor.kt deleted file mode 100644 index 9e8f0fa8..00000000 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20HypervisorMonitor.kt +++ /dev/null @@ -1,40 +0,0 @@ -package com.atlarge.opendc.experiments.sc20 - -import com.atlarge.opendc.compute.core.Server -import com.atlarge.opendc.compute.metal.driver.BareMetalDriver -import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor -import kotlinx.coroutines.flow.first -import java.io.BufferedWriter -import java.io.Closeable -import java.io.FileWriter - -class Sc20HypervisorMonitor( - destination: String -) : HypervisorMonitor, Closeable { - private val outputFile = BufferedWriter(FileWriter(destination)) - - init { - outputFile.write("time,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw\n") - } - - override suspend fun onSliceFinish( - time: Long, - requestedBurst: Long, - grantedBurst: Long, - numberOfDeployedImages: Int, - hostServer: Server - ) { - // Assume for now that the host is not virtualized and measure the current power draw - val driver = hostServer.serviceRegistry[BareMetalDriver.Key] - val usage = driver.usage.first() - val powerDraw = driver.powerDraw.first() - - outputFile.write("$time,$requestedBurst,$grantedBurst,$numberOfDeployedImages,${hostServer.uid},$usage,$powerDraw") - outputFile.newLine() - } - - override fun close() { - outputFile.flush() - outputFile.close() - } -} diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt new file mode 100644 index 00000000..6ce9cefa --- /dev/null +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -0,0 +1,49 @@ +package com.atlarge.opendc.experiments.sc20 + +import com.atlarge.opendc.compute.core.Server +import com.atlarge.opendc.compute.core.ServerState +import com.atlarge.opendc.compute.core.monitor.ServerMonitor +import com.atlarge.opendc.compute.metal.driver.BareMetalDriver +import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor +import kotlinx.coroutines.flow.first +import java.io.BufferedWriter +import java.io.Closeable +import java.io.FileWriter + +class Sc20Monitor( + destination: String +) : HypervisorMonitor, ServerMonitor, Closeable { + private val outputFile = BufferedWriter(FileWriter(destination)) + private var failed: Int = 0 + + init { + outputFile.write("time,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw\n") + } + + override suspend fun onUpdate(server: Server, previousState: ServerState) { + if (server.state == ServerState.ERROR) { + failed++ + } + } + + override suspend fun onSliceFinish( + time: Long, + requestedBurst: Long, + grantedBurst: Long, + numberOfDeployedImages: Int, + hostServer: Server + ) { + // Assume for now that the host is not virtualized and measure the current power draw + val driver = hostServer.serviceRegistry[BareMetalDriver.Key] + val usage = driver.usage.first() + val powerDraw = driver.powerDraw.first() + + outputFile.write("$time,$requestedBurst,$grantedBurst,$numberOfDeployedImages,${hostServer.uid},$usage,$powerDraw,$failed") + outputFile.newLine() + } + + override fun close() { + outputFile.flush() + outputFile.close() + } +} diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index f0d3fc8d..e47438f0 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -27,9 +27,6 @@ package com.atlarge.opendc.experiments.sc20 import com.atlarge.odcsim.SimulationEngineProvider import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Flavor -import com.atlarge.opendc.compute.core.Server -import com.atlarge.opendc.compute.core.ServerState -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.service.SimpleVirtProvisioningService import com.atlarge.opendc.compute.virt.service.allocation.AvailableMemoryAllocationPolicy @@ -85,12 +82,7 @@ class ExperimentParameters(parser: ArgParser) { */ fun main(args: Array) { ArgParser(args).parseInto(::ExperimentParameters).run { - val hypervisorMonitor = Sc20HypervisorMonitor(outputFile) - val monitor = object : ServerMonitor { - override suspend fun onUpdate(server: Server, previousState: ServerState) { - println(server) - } - } + val monitor = Sc20Monitor(outputFile) val provider = ServiceLoader.load(SimulationEngineProvider::class.java).first() val system = provider("test") @@ -115,7 +107,7 @@ fun main(args: Array) { AvailableMemoryAllocationPolicy(), simulationContext, environment.platforms[0].zones[0].services[ProvisioningService.Key], - hypervisorMonitor + monitor ) val reader = Sc20TraceReader(File(traceDirectory), performanceInterferenceModel, getSelectedVmList()) @@ -134,6 +126,6 @@ fun main(args: Array) { } // Explicitly close the monitor to flush its buffer - hypervisorMonitor.close() + monitor.close() } } -- cgit v1.2.3 From 5ff443c799322836d532fffb3ff8f720806c32b6 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 16 Mar 2020 23:07:22 +0100 Subject: feat: Add failures to SC20 experiment --- .../kotlin/com/atlarge/opendc/compute/metal/Node.kt | 5 +++++ .../compute/metal/driver/SimpleBareMetalDriver.kt | 2 +- .../virt/service/SimpleVirtProvisioningService.kt | 13 ++++++------- .../core/failure/UncorrelatedFaultInjector.kt | 5 +++-- .../atlarge/opendc/experiments/sc20/Sc20Monitor.kt | 1 + .../opendc/experiments/sc20/TestExperiment.kt | 21 ++++++++++++++++++--- 6 files changed, 34 insertions(+), 13 deletions(-) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt index 7df5d99b..55948d3c 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt @@ -43,6 +43,11 @@ data class Node( */ public override val name: String, + /** + * Meta data of the node. + */ + public val metadata: Map = emptyMap(), + /** * The last known state of the compute node. */ diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 4fe8d740..4b9a03a6 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -81,7 +81,7 @@ public class SimpleBareMetalDriver( /** * The machine state. */ - private var node: Node = Node(uid, name, NodeState.SHUTOFF, EmptyImage, null) + private var node: Node = Node(uid, name, mapOf("driver" to this), NodeState.SHUTOFF, EmptyImage, null) set(value) { if (field.state != value.state) { domain.launch { diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index f0bb4e25..9fad2396 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -44,8 +44,8 @@ class SimpleVirtProvisioningService( init { ctx.domain.launch { - val provisionedNodes = provisioningService.nodes().toList() - val deployedNodes = provisionedNodes.map { node -> + val provisionedNodes = provisioningService.nodes() + provisionedNodes.forEach { node -> val hypervisorImage = HypervisorImage(hypervisorMonitor) val deployedNode = provisioningService.deploy(node, hypervisorImage, this@SimpleVirtProvisioningService) val server = deployedNode.server!! @@ -55,6 +55,7 @@ class SimpleVirtProvisioningService( 0, server.flavor.memorySize ) + hypervisors[server] = hvView yield() server.serviceRegistry[VirtDriver.Key].addMonitor(object : VirtDriverMonitor { override suspend fun onUpdate(numberOfActiveServers: Int, availableMemory: Long) { @@ -62,9 +63,7 @@ class SimpleVirtProvisioningService( hvView.availableMemory = availableMemory } }) - server to hvView } - hypervisors.putAll(deployedNodes) } } @@ -84,9 +83,10 @@ class SimpleVirtProvisioningService( val imagesToBeScheduled = incomingImages.toSet() for (imageInstance in imagesToBeScheduled) { - println("Spawning $imageInstance") val selectedNode = availableHypervisors.minWith(allocationPolicy().thenBy { it.server.uid }) ?: break try { + println("Spawning ${imageInstance.image}") + incomingImages -= imageInstance imageInstance.server = selectedNode.server.serviceRegistry[VirtDriver.Key].spawn( imageInstance.image, imageInstance.monitor, @@ -97,7 +97,6 @@ class SimpleVirtProvisioningService( println("Unable to deploy image due to insufficient memory") } - incomingImages -= imageInstance } } @@ -117,7 +116,7 @@ class SimpleVirtProvisioningService( } } - class ImageView( + data class ImageView( val image: Image, val monitor: ServerMonitor, val flavor: Flavor, diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt index 95127deb..5155a25a 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt @@ -33,13 +33,14 @@ import kotlin.random.Random * A [FaultInjector] that injects uncorrelated faults into the system, meaning that failures of the subsystems are * independent. */ -public class UncorrelatedFaultInjector(private val mu: Double = 256.0, private val random: Random = Random.Default) : FaultInjector { +public class UncorrelatedFaultInjector(private val mu: Double = 1024.0, private val random: Random = Random.Default) : FaultInjector { /** * Enqueue the specified [FailureDomain] to fail some time in the future. */ override fun enqueue(domain: FailureDomain) { domain.scope.launch { - delay(random.expovariate(mu)) + val d = random.expovariate(mu) + delay(d) domain.fail() } } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index 6ce9cefa..d3b2d5c6 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -1,5 +1,6 @@ package com.atlarge.opendc.experiments.sc20 +import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.monitor.ServerMonitor diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index e47438f0..a1619fe2 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -30,6 +30,8 @@ import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.service.SimpleVirtProvisioningService import com.atlarge.opendc.compute.virt.service.allocation.AvailableMemoryAllocationPolicy +import com.atlarge.opendc.core.failure.FailureDomain +import com.atlarge.opendc.core.failure.UncorrelatedFaultInjector import com.atlarge.opendc.format.environment.sc20.Sc20ClusterEnvironmentReader import com.atlarge.opendc.format.trace.sc20.Sc20PerformanceInterferenceReader import com.atlarge.opendc.format.trace.sc20.Sc20TraceReader @@ -37,11 +39,11 @@ import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper import com.fasterxml.jackson.module.kotlin.readValue import com.xenomachina.argparser.ArgParser import com.xenomachina.argparser.default +import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import java.io.File -import java.io.FileInputStream import java.io.FileReader import java.util.ServiceLoader import kotlin.math.max @@ -88,12 +90,14 @@ fun main(args: Array) { val system = provider("test") val root = system.newDomain("root") + val chan = Channel(Channel.CONFLATED) + root.launch { val environment = Sc20ClusterEnvironmentReader(File(environmentFile)) .use { it.construct(root) } val performanceInterferenceStream = if (performanceInterferenceFile != null) { - FileInputStream(File(performanceInterferenceFile!!)) + File(performanceInterferenceFile!!).inputStream().buffered() } else { object {}.javaClass.getResourceAsStream("/env/performance-interference.json") } @@ -103,17 +107,28 @@ fun main(args: Array) { println(simulationContext.clock.instant()) + val bareMetalProvisioner = environment.platforms[0].zones[0].services[ProvisioningService.Key] + val scheduler = SimpleVirtProvisioningService( AvailableMemoryAllocationPolicy(), simulationContext, - environment.platforms[0].zones[0].services[ProvisioningService.Key], + bareMetalProvisioner, monitor ) + root.launch { + chan.receive() + val faultInjector = UncorrelatedFaultInjector(mu = 2e7) + for (node in bareMetalProvisioner.nodes()) { + faultInjector.enqueue(node.metadata["driver"] as FailureDomain) + } + } + val reader = Sc20TraceReader(File(traceDirectory), performanceInterferenceModel, getSelectedVmList()) while (reader.hasNext()) { val (time, workload) = reader.next() delay(max(0, time - simulationContext.clock.millis())) + chan.send(Unit) scheduler.deploy(workload.image, monitor, Flavor(workload.image.cores, workload.image.requiredMemory)) } -- cgit v1.2.3 From 43f1376a00342338f4d0affde5e1f2f540ab7e32 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 17 Mar 2020 14:56:21 +0100 Subject: perf: Upgrade to kotlinx-coroutines 1.3.5 due to memory leak This change upgrades the dependency on kotlinx-coroutines due a memory leak that was introduced in 1.3.2, causing simulations to run out of memory. --- buildSrc/src/main/kotlin/library.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildSrc/src/main/kotlin/library.kt b/buildSrc/src/main/kotlin/library.kt index 6333e351..3b05f3a4 100644 --- a/buildSrc/src/main/kotlin/library.kt +++ b/buildSrc/src/main/kotlin/library.kt @@ -45,5 +45,5 @@ object Library { /** * Kotlin coroutines support */ - val KOTLINX_COROUTINES = "1.3.4" + val KOTLINX_COROUTINES = "1.3.5" } -- cgit v1.2.3 From b3e8e3d196de8b8c1bb904bfb3c6641415cf72bb Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 17 Mar 2020 15:52:10 +0100 Subject: feat: Use Weilbull distribution for failures --- .../virt/service/SimpleVirtProvisioningService.kt | 1 - .../opendc/core/failure/UncorrelatedFaultInjector.kt | 19 ++++++++++++++----- .../atlarge/opendc/experiments/sc20/Sc20Monitor.kt | 3 ++- .../atlarge/opendc/experiments/sc20/TestExperiment.kt | 4 ++-- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index 9fad2396..0f38eca1 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -96,7 +96,6 @@ class SimpleVirtProvisioningService( } catch (e: InsufficientMemoryOnServerException) { println("Unable to deploy image due to insufficient memory") } - } } diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt index 5155a25a..56706824 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt @@ -24,26 +24,35 @@ package com.atlarge.opendc.core.failure +import com.atlarge.odcsim.simulationContext import kotlinx.coroutines.delay import kotlinx.coroutines.launch -import kotlin.math.ln +import kotlin.math.ln1p +import kotlin.math.pow import kotlin.random.Random /** * A [FaultInjector] that injects uncorrelated faults into the system, meaning that failures of the subsystems are * independent. */ -public class UncorrelatedFaultInjector(private val mu: Double = 1024.0, private val random: Random = Random.Default) : FaultInjector { +public class UncorrelatedFaultInjector(private val alpha: Double, private val beta: Double, private val random: Random = Random) : FaultInjector { /** * Enqueue the specified [FailureDomain] to fail some time in the future. */ override fun enqueue(domain: FailureDomain) { domain.scope.launch { - val d = random.expovariate(mu) - delay(d) + val d = random.weibull(alpha, beta) * 1e3 // Make sure to convert delay to milliseconds + + // Handle long overflow + if (simulationContext.clock.millis() + d <= 0) { + return@launch + } + + delay(d.toLong()) domain.fail() } } - private fun Random.expovariate(mu: Double) = (-mu * ln(1 - nextDouble())).toLong() + // XXX We should extract this in some common package later on. + private fun Random.weibull(alpha: Double, beta: Double) = (beta * (-ln1p(-nextDouble())).pow(1.0 / alpha)) } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index d3b2d5c6..40cb9719 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -18,10 +18,11 @@ class Sc20Monitor( private var failed: Int = 0 init { - outputFile.write("time,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw\n") + outputFile.write("time,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw,failedVms\n") } override suspend fun onUpdate(server: Server, previousState: ServerState) { + println("${simulationContext.clock.instant()} ${server.uid} ${server.state}") if (server.state == ServerState.ERROR) { failed++ } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index a1619fe2..69174f0f 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -89,7 +89,6 @@ fun main(args: Array) { val provider = ServiceLoader.load(SimulationEngineProvider::class.java).first() val system = provider("test") val root = system.newDomain("root") - val chan = Channel(Channel.CONFLATED) root.launch { @@ -118,7 +117,8 @@ fun main(args: Array) { root.launch { chan.receive() - val faultInjector = UncorrelatedFaultInjector(mu = 2e7) + // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009 + val faultInjector = UncorrelatedFaultInjector(alpha = 9.66772, beta = 12.23796) for (node in bareMetalProvisioner.nodes()) { faultInjector.enqueue(node.metadata["driver"] as FailureDomain) } -- cgit v1.2.3 From 6b10881f123f5e6a8e7bce1045d02eba5e48c3a2 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 17 Mar 2020 17:37:41 +0100 Subject: [ci skip] feat: Add support for correlated failures --- .../compute/metal/driver/SimpleBareMetalDriver.kt | 37 ++++--- .../virt/service/SimpleVirtProvisioningService.kt | 17 ++-- .../metal/service/SimpleProvisioningServiceTest.kt | 2 +- .../opendc/core/failure/CorrelatedFaultInjector.kt | 111 +++++++++++++++++++++ .../core/failure/UncorrelatedFaultInjector.kt | 2 +- .../opendc/core/services/ServiceRegistry.kt | 2 +- .../opendc/core/services/ServiceRegistryImpl.kt | 2 + .../opendc/experiments/sc20/TestExperiment.kt | 11 +- 8 files changed, 151 insertions(+), 33 deletions(-) create mode 100644 opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 4b9a03a6..a8f3d781 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -45,6 +45,7 @@ import com.atlarge.opendc.core.power.PowerModel import kotlinx.coroutines.CancellationException import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Job +import kotlinx.coroutines.cancel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.launch @@ -82,22 +83,20 @@ public class SimpleBareMetalDriver( * The machine state. */ private var node: Node = Node(uid, name, mapOf("driver" to this), NodeState.SHUTOFF, EmptyImage, null) - set(value) { - if (field.state != value.state) { - domain.launch { - monitor.onUpdate(value, field.state) - } - } - if (field.server != null && value.server != null && field.server!!.state != value.server.state) { - domain.launch { - monitor.onUpdate(value.server, field.server!!.state) - } - } + private suspend fun setNode(value: Node) { + val field = node + if (field.state != value.state) { + monitor.onUpdate(value, field.state) + } - field = value + if (field.server != null && value.server != null && field.server.state != value.server.state) { + monitor.onUpdate(value.server, field.server.state) } + node = value + } + /** * The flavor that corresponds to this machine. */ @@ -137,7 +136,7 @@ public class SimpleBareMetalDriver( ) server.serviceRegistry[BareMetalDriver.Key] = this@SimpleBareMetalDriver - node = node.copy(state = NodeState.BOOT, server = server) + setNode(node.copy(state = NodeState.BOOT, server = server)) serverContext = BareMetalServerContext() return@withContext node } @@ -151,7 +150,7 @@ public class SimpleBareMetalDriver( serverContext!!.cancel(fail = false) serverContext = null - node = node.copy(state = NodeState.SHUTOFF, server = null) + setNode(node.copy(state = NodeState.SHUTOFF, server = null)) return@withContext node } @@ -161,7 +160,7 @@ public class SimpleBareMetalDriver( } override suspend fun setImage(image: Image): Node = withContext(domain.coroutineContext) { - node = node.copy(image = image) + setNode(node.copy(image = image)) return@withContext node } @@ -190,9 +189,9 @@ public class SimpleBareMetalDriver( */ suspend fun cancel(fail: Boolean) { if (fail) - job.cancel(ShutdownException(cause = Exception("Random failure"))) + domain.cancel(ShutdownException(cause = Exception("Random failure"))) else - job.cancel(ShutdownException()) + domain.cancel(ShutdownException()) job.join() } @@ -200,7 +199,7 @@ public class SimpleBareMetalDriver( assert(!finalized) { "Machine is already finalized" } val server = server.copy(state = ServerState.ACTIVE) - node = node.copy(state = NodeState.ACTIVE, server = server) + setNode(node.copy(state = NodeState.ACTIVE, server = server)) } override suspend fun exit(cause: Throwable?) { @@ -217,7 +216,7 @@ public class SimpleBareMetalDriver( else NodeState.ERROR val server = server.copy(state = serverState) - node = node.copy(state = nodeState, server = server) + setNode(node.copy(state = nodeState, server = server)) } private var flush: Job? = null diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index 0f38eca1..a50292a7 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -14,7 +14,6 @@ import com.atlarge.opendc.compute.virt.driver.hypervisor.InsufficientMemoryOnSer import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor import com.atlarge.opendc.compute.virt.service.allocation.AllocationPolicy import kotlinx.coroutines.launch -import kotlinx.coroutines.yield class SimpleVirtProvisioningService( public override val allocationPolicy: AllocationPolicy, @@ -56,13 +55,6 @@ class SimpleVirtProvisioningService( server.flavor.memorySize ) hypervisors[server] = hvView - yield() - server.serviceRegistry[VirtDriver.Key].addMonitor(object : VirtDriverMonitor { - override suspend fun onUpdate(numberOfActiveServers: Int, availableMemory: Long) { - hvView.numberOfActiveServers = numberOfActiveServers - hvView.availableMemory = availableMemory - } - }) } } } @@ -100,10 +92,19 @@ class SimpleVirtProvisioningService( } override suspend fun onUpdate(server: Server, previousState: ServerState) { + println("${server.uid} ${server.state} ${hypervisors[server]}") when (server.state) { ServerState.ACTIVE -> { val hv = hypervisors[server] ?: return availableHypervisors += hv + + server.serviceRegistry[VirtDriver.Key].addMonitor(object : VirtDriverMonitor { + override suspend fun onUpdate(numberOfActiveServers: Int, availableMemory: Long) { + hv.numberOfActiveServers = numberOfActiveServers + hv.availableMemory = availableMemory + } + }) + requestCycle() } ServerState.SHUTOFF, ServerState.ERROR -> { diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt index a837130d..ef19427e 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt @@ -63,7 +63,7 @@ internal class SimpleProvisioningServiceTest { val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 4) val cpus = List(4) { ProcessingUnit(cpuNode, it, 2400.0) } - val driver = SimpleBareMetalDriver(dom, UUID.randomUUID(), "test", cpus, emptyList()) + val driver = SimpleBareMetalDriver(dom.newDomain(), UUID.randomUUID(), "test", cpus, emptyList()) val provisioner = SimpleProvisioningService(dom) provisioner.create(driver) diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt new file mode 100644 index 00000000..41412195 --- /dev/null +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt @@ -0,0 +1,111 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.core.failure + +import com.atlarge.odcsim.Domain +import com.atlarge.odcsim.simulationContext +import kotlinx.coroutines.Job +import kotlinx.coroutines.delay +import kotlinx.coroutines.ensureActive +import kotlinx.coroutines.launch +import kotlin.math.exp +import kotlin.random.Random +import kotlin.random.asJavaRandom + +/** + * A [FaultInjector] that injects fault in the system which are correlated to each other. Failures do not occur in + * isolation, but will trigger other faults. + */ +public class CorrelatedFaultInjector( + private val domain: Domain, + private val iatScale: Double, + private val iatShape: Double, + private val sizeScale: Double, + private val sizeShape: Double, + random: Random = Random +) : FaultInjector { + /** + * The active failure domains that have been registered. + */ + private val active = mutableSetOf() + + /** + * The [Job] that awaits the nearest fault in the system. + */ + private var job: Job? = null + + /** + * The [Random] instance to use. + */ + private val random: java.util.Random = random.asJavaRandom() + + /** + * Enqueue the specified [FailureDomain] to fail some time in the future. + */ + override fun enqueue(domain: FailureDomain) { + active += domain + + // Clean up the domain if it finishes + domain.scope.coroutineContext[Job]!!.invokeOnCompletion { + this@CorrelatedFaultInjector.domain.launch { + println("CANCELLED") + active -= domain + + if (active.isEmpty()) { + job?.cancel() + job = null + } + } + } + + if (job != null) { + return + } + + job = this.domain.launch { + while (true) { + ensureActive() + + val d = lognvariate(iatScale, iatShape) * 1e3 // Make sure to convert delay to milliseconds + + // Handle long overflow + if (simulationContext.clock.millis() + d <= 0) { + return@launch + } + + delay(d.toLong()) + + val n = lognvariate(sizeScale, sizeShape).toInt() + + for (failureDomain in active.shuffled(random).take(n)) { + failureDomain.fail() + } + } + } + } + + // XXX We should extract this in some common package later on. + private fun lognvariate(scale: Double, shape: Double) = exp(scale + shape * random.nextGaussian()) +} diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt index 56706824..3883eb11 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/UncorrelatedFaultInjector.kt @@ -42,7 +42,7 @@ public class UncorrelatedFaultInjector(private val alpha: Double, private val be override fun enqueue(domain: FailureDomain) { domain.scope.launch { val d = random.weibull(alpha, beta) * 1e3 // Make sure to convert delay to milliseconds - + // Handle long overflow if (simulationContext.clock.millis() + d <= 0) { return@launch diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistry.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistry.kt index d9a85231..a036a705 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistry.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistry.kt @@ -48,5 +48,5 @@ public interface ServiceRegistry { /** * Register the specified [ServiceKey] in this registry. */ - public operator fun set(key: ServiceKey, service: T) + public operator fun set(key: ServiceKey, service: T): ServiceRegistry } diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistryImpl.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistryImpl.kt index 91147839..e3fa171d 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistryImpl.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistryImpl.kt @@ -43,4 +43,6 @@ public class ServiceRegistryImpl : ServiceRegistry { @Suppress("UNCHECKED_CAST") return services[key] as T } + + override fun toString(): String = services.toString() } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 69174f0f..09b6592e 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -30,8 +30,8 @@ import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.service.SimpleVirtProvisioningService import com.atlarge.opendc.compute.virt.service.allocation.AvailableMemoryAllocationPolicy +import com.atlarge.opendc.core.failure.CorrelatedFaultInjector import com.atlarge.opendc.core.failure.FailureDomain -import com.atlarge.opendc.core.failure.UncorrelatedFaultInjector import com.atlarge.opendc.format.environment.sc20.Sc20ClusterEnvironmentReader import com.atlarge.opendc.format.trace.sc20.Sc20PerformanceInterferenceReader import com.atlarge.opendc.format.trace.sc20.Sc20TraceReader @@ -39,6 +39,7 @@ import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper import com.fasterxml.jackson.module.kotlin.readValue import com.xenomachina.argparser.ArgParser import com.xenomachina.argparser.default +import kotlinx.coroutines.Job import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay import kotlinx.coroutines.launch @@ -115,10 +116,14 @@ fun main(args: Array) { monitor ) - root.launch { + val faultInjectorDomain = root.newDomain(name = "failures") + faultInjectorDomain.launch { chan.receive() // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009 - val faultInjector = UncorrelatedFaultInjector(alpha = 9.66772, beta = 12.23796) + val faultInjector = CorrelatedFaultInjector(faultInjectorDomain, + iatScale = -1.39, iatShape = 1.03, + sizeScale = 1.88, sizeShape = 1.25 + ) for (node in bareMetalProvisioner.nodes()) { faultInjector.enqueue(node.metadata["driver"] as FailureDomain) } -- cgit v1.2.3 From b1cf9b2bd9559328c3c9d26e73123e67d2bfea05 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 17 Mar 2020 22:26:15 +0100 Subject: refactor: Rework monitor interfaces --- .../com/atlarge/opendc/compute/core/Server.kt | 3 +- .../opendc/compute/core/execution/ServerContext.kt | 8 +-- .../opendc/compute/core/monitor/ServerMonitor.kt | 13 ++++- .../compute/metal/driver/SimpleBareMetalDriver.kt | 44 ++++++++------- .../opendc/compute/metal/monitor/NodeMonitor.kt | 4 +- .../metal/service/SimpleProvisioningService.kt | 14 ++++- .../virt/driver/hypervisor/HypervisorVirtDriver.kt | 26 +++++---- .../opendc/compute/virt/service/HypervisorView.kt | 7 ++- .../virt/service/SimpleVirtProvisioningService.kt | 56 ++++++++++--------- .../metal/driver/SimpleBareMetalDriverTest.kt | 6 +- .../metal/service/SimpleProvisioningServiceTest.kt | 2 +- .../virt/driver/hypervisor/HypervisorTest.kt | 7 +-- .../opendc/core/services/ServiceRegistry.kt | 19 +++++-- .../opendc/core/services/ServiceRegistryImpl.kt | 20 +++---- .../atlarge/opendc/experiments/sc20/Sc20Monitor.kt | 7 +-- .../opendc/experiments/sc20/TestExperiment.kt | 1 - .../environment/sc18/Sc18EnvironmentReader.kt | 6 +- .../sc20/Sc20ClusterEnvironmentReader.kt | 5 +- .../environment/sc20/Sc20EnvironmentReader.kt | 5 +- .../workflows/service/StageWorkflowService.kt | 64 +++++++++++----------- 20 files changed, 174 insertions(+), 143 deletions(-) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/Server.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/Server.kt index 86ec9a5b..31b070a4 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/Server.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/Server.kt @@ -28,7 +28,6 @@ import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.core.resource.Resource import com.atlarge.opendc.core.resource.TagContainer import com.atlarge.opendc.core.services.ServiceRegistry -import com.atlarge.opendc.core.services.ServiceRegistryImpl import java.util.UUID /** @@ -68,7 +67,7 @@ public data class Server( /** * The services published by this server. */ - public val serviceRegistry: ServiceRegistry = ServiceRegistryImpl() + public val services: ServiceRegistry = ServiceRegistry() ) : Resource { override fun hashCode(): Int = uid.hashCode() override fun equals(other: Any?): Boolean = other is Server && uid == other.uid diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ServerContext.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ServerContext.kt index b09a5a7d..c8caaca6 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ServerContext.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ServerContext.kt @@ -27,7 +27,7 @@ package com.atlarge.opendc.compute.core.execution import com.atlarge.opendc.compute.core.ProcessingUnit import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.core.services.AbstractServiceKey +import com.atlarge.opendc.core.services.ServiceKey /** * Represents the execution context in which an bootable [Image] runs on a [Server]. @@ -44,11 +44,9 @@ public interface ServerContext { public val cpus: List /** - * Publishes the given [service] with key [serviceKey] in the server's registry. + * Publish the specified [service] at the given [ServiceKey]. */ - public suspend fun publishService(serviceKey: AbstractServiceKey, service: T) { - server.serviceRegistry[serviceKey] = service - } + public suspend fun publishService(key: ServiceKey, service: T) /** * Request the specified burst time from the processor cores and suspend execution until a processor core finishes diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt index 26b94ba5..c2b30b9d 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt @@ -26,16 +26,25 @@ package com.atlarge.opendc.compute.core.monitor import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState +import com.atlarge.opendc.core.services.ServiceKey /** * An interface for monitoring the state of a machine. */ public interface ServerMonitor { /** - * This method is invoked when the state of a machine updates. + * This method is synchronously invoked when the state of a machine updates. * * @param server The server which state was updated. * @param previousState The previous state of the server. */ - public suspend fun onUpdate(server: Server, previousState: ServerState) {} + public fun stateChanged(server: Server, previousState: ServerState) {} + + /** + * This method is synchronously invoked when the server publishes a service. + * + * @param server The server that published the service. + * @param key The key of the service that was published. + */ + public fun servicePublished(server: Server, key: ServiceKey<*>) {} } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index a8f3d781..46b4c30c 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -42,6 +42,8 @@ import com.atlarge.opendc.compute.metal.NodeState import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.compute.metal.power.ConstantPowerModel import com.atlarge.opendc.core.power.PowerModel +import com.atlarge.opendc.core.services.ServiceKey +import com.atlarge.opendc.core.services.ServiceRegistry import kotlinx.coroutines.CancellationException import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Job @@ -83,20 +85,18 @@ public class SimpleBareMetalDriver( * The machine state. */ private var node: Node = Node(uid, name, mapOf("driver" to this), NodeState.SHUTOFF, EmptyImage, null) + set(value) { + if (field.state != value.state) { + monitor.stateChanged(value, field.state) + } - private suspend fun setNode(value: Node) { - val field = node - if (field.state != value.state) { - monitor.onUpdate(value, field.state) - } + if (field.server != null && value.server != null && field.server!!.state != value.server.state) { + monitor.stateChanged(value.server, field.server!!.state) + } - if (field.server != null && value.server != null && field.server.state != value.server.state) { - monitor.onUpdate(value.server, field.server.state) + field = value } - node = value - } - /** * The flavor that corresponds to this machine. */ @@ -132,11 +132,11 @@ public class SimpleBareMetalDriver( emptyMap(), flavor, node.image, - ServerState.BUILD + ServerState.BUILD, + ServiceRegistry().put(BareMetalDriver, this@SimpleBareMetalDriver) ) - server.serviceRegistry[BareMetalDriver.Key] = this@SimpleBareMetalDriver - setNode(node.copy(state = NodeState.BOOT, server = server)) + node = node.copy(state = NodeState.BOOT, server = server) serverContext = BareMetalServerContext() return@withContext node } @@ -150,7 +150,7 @@ public class SimpleBareMetalDriver( serverContext!!.cancel(fail = false) serverContext = null - setNode(node.copy(state = NodeState.SHUTOFF, server = null)) + node = node.copy(state = NodeState.SHUTOFF, server = null) return@withContext node } @@ -160,7 +160,7 @@ public class SimpleBareMetalDriver( } override suspend fun setImage(image: Image): Node = withContext(domain.coroutineContext) { - setNode(node.copy(image = image)) + node = node.copy(image = image) return@withContext node } @@ -195,11 +195,17 @@ public class SimpleBareMetalDriver( job.join() } + override suspend fun publishService(key: ServiceKey, service: T) { + val server = server.copy(services = server.services.put(key, service)) + node = node.copy(server = server) + monitor.servicePublished(server, key) + } + override suspend fun init() { assert(!finalized) { "Machine is already finalized" } val server = server.copy(state = ServerState.ACTIVE) - setNode(node.copy(state = NodeState.ACTIVE, server = server)) + node = node.copy(state = NodeState.ACTIVE, server = server) } override suspend fun exit(cause: Throwable?) { @@ -216,7 +222,7 @@ public class SimpleBareMetalDriver( else NodeState.ERROR val server = server.copy(state = serverState) - setNode(node.copy(state = nodeState, server = server)) + node = node.copy(state = nodeState, server = server) } private var flush: Job? = null @@ -278,8 +284,6 @@ public class SimpleBareMetalDriver( get() = domain override suspend fun fail() { - withContext(domain.coroutineContext) { - serverContext?.cancel(fail = true) - } + serverContext?.cancel(fail = true) } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt index f35cf57b..bd4b40d8 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt @@ -33,10 +33,10 @@ import com.atlarge.opendc.compute.metal.NodeState */ public interface NodeMonitor : ServerMonitor { /** - * This method is invoked when the state of a bare metal machine updates. + * This method is synchronously invoked when the state of a bare metal machine updates. * * @param node The node for which state was updated. * @param previousState The previous state of the node. */ - public suspend fun onUpdate(node: Node, previousState: NodeState) {} + public fun stateChanged(node: Node, previousState: NodeState) {} } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt index d8fe0dd9..e5cd0a77 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt @@ -32,6 +32,8 @@ import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.driver.BareMetalDriver import com.atlarge.opendc.compute.metal.monitor.NodeMonitor +import com.atlarge.opendc.core.services.ServiceKey +import kotlinx.coroutines.launch import kotlinx.coroutines.withContext /** @@ -68,9 +70,15 @@ public class SimpleProvisioningService(val domain: Domain) : ProvisioningService return@withContext newNode } - override suspend fun onUpdate(server: Server, previousState: ServerState) { - withContext(domain.coroutineContext) { - monitors[server]?.onUpdate(server, previousState) + override fun stateChanged(server: Server, previousState: ServerState) { + domain.launch { + monitors[server]?.stateChanged(server, previousState) + } + } + + override fun servicePublished(server: Server, key: ServiceKey<*>) { + domain.launch { + monitors[server]?.servicePublished(server, key) } } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt index 1ff33c0c..98d8092c 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt @@ -39,6 +39,7 @@ import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.virt.driver.VirtDriver import com.atlarge.opendc.compute.virt.driver.VirtDriverMonitor import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor +import com.atlarge.opendc.core.services.ServiceKey import com.atlarge.opendc.core.workload.IMAGE_PERF_INTERFERENCE_MODEL import com.atlarge.opendc.core.workload.PerformanceInterferenceModel import kotlinx.coroutines.CancellationException @@ -248,7 +249,7 @@ class HypervisorVirtDriver( } internal inner class VmServerContext( - override var server: Server, + server: Server, val monitor: ServerMonitor, val domain: Domain ) : ServerManagementContext { @@ -269,21 +270,26 @@ class HypervisorVirtDriver( } } - private suspend fun setServer(value: Server) { - val field = server - if (field.state != value.state) { - monitor.onUpdate(value, field.state) - } + override var server: Server = server + set(value) { + if (field.state != value.state) { + monitor.stateChanged(value, field.state) + } - server = value - } + field = value + } override val cpus: List = hostContext.cpus.take(server.flavor.cpuCount) + override suspend fun publishService(key: ServiceKey, service: T) { + server = server.copy(services = server.services.put(key, service)) + monitor.servicePublished(server, key) + } + override suspend fun init() { assert(!finalized) { "VM is already finalized" } - setServer(server.copy(state = ServerState.ACTIVE)) + server = server.copy(state = ServerState.ACTIVE) initialized = true } @@ -295,7 +301,7 @@ class HypervisorVirtDriver( ServerState.SHUTOFF else ServerState.ERROR - setServer(server.copy(state = serverState)) + server = server.copy(state = serverState) availableMemory += server.flavor.memorySize vms.remove(this) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/HypervisorView.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/HypervisorView.kt index 996bd8eb..97842f18 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/HypervisorView.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/HypervisorView.kt @@ -1,11 +1,12 @@ package com.atlarge.opendc.compute.virt.service import com.atlarge.opendc.compute.core.Server -import com.atlarge.opendc.compute.virt.driver.hypervisor.HypervisorImage +import com.atlarge.opendc.compute.virt.driver.VirtDriver class HypervisorView( var server: Server, - val hypervisor: HypervisorImage, var numberOfActiveServers: Int, var availableMemory: Long -) +) { + lateinit var driver: VirtDriver +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index a50292a7..6fb821d7 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -8,11 +8,12 @@ import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.driver.VirtDriver -import com.atlarge.opendc.compute.virt.driver.VirtDriverMonitor import com.atlarge.opendc.compute.virt.driver.hypervisor.HypervisorImage import com.atlarge.opendc.compute.virt.driver.hypervisor.InsufficientMemoryOnServerException import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor import com.atlarge.opendc.compute.virt.service.allocation.AllocationPolicy +import com.atlarge.opendc.core.services.ServiceKey +import kotlinx.coroutines.Job import kotlinx.coroutines.launch class SimpleVirtProvisioningService( @@ -46,15 +47,7 @@ class SimpleVirtProvisioningService( val provisionedNodes = provisioningService.nodes() provisionedNodes.forEach { node -> val hypervisorImage = HypervisorImage(hypervisorMonitor) - val deployedNode = provisioningService.deploy(node, hypervisorImage, this@SimpleVirtProvisioningService) - val server = deployedNode.server!! - val hvView = HypervisorView( - server, - hypervisorImage, - 0, - server.flavor.memorySize - ) - hypervisors[server] = hvView + provisioningService.deploy(node, hypervisorImage, this@SimpleVirtProvisioningService) } } } @@ -65,21 +58,29 @@ class SimpleVirtProvisioningService( requestCycle() } + private var call: Job? = null + private fun requestCycle() { - ctx.domain.launch { + if (call != null) { + return + } + + val call = ctx.domain.launch { schedule() } + call.invokeOnCompletion { this.call = null } + this.call = call } private suspend fun schedule() { val imagesToBeScheduled = incomingImages.toSet() for (imageInstance in imagesToBeScheduled) { - val selectedNode = availableHypervisors.minWith(allocationPolicy().thenBy { it.server.uid }) ?: break + val selectedHv = availableHypervisors.minWith(allocationPolicy().thenBy { it.server.uid }) ?: break try { println("Spawning ${imageInstance.image}") incomingImages -= imageInstance - imageInstance.server = selectedNode.server.serviceRegistry[VirtDriver.Key].spawn( + imageInstance.server = selectedHv.driver.spawn( imageInstance.image, imageInstance.monitor, imageInstance.flavor @@ -91,21 +92,15 @@ class SimpleVirtProvisioningService( } } - override suspend fun onUpdate(server: Server, previousState: ServerState) { - println("${server.uid} ${server.state} ${hypervisors[server]}") + override fun stateChanged(server: Server, previousState: ServerState) { when (server.state) { ServerState.ACTIVE -> { - val hv = hypervisors[server] ?: return - availableHypervisors += hv - - server.serviceRegistry[VirtDriver.Key].addMonitor(object : VirtDriverMonitor { - override suspend fun onUpdate(numberOfActiveServers: Int, availableMemory: Long) { - hv.numberOfActiveServers = numberOfActiveServers - hv.availableMemory = availableMemory - } - }) - - requestCycle() + val hvView = HypervisorView( + server, + 0, + server.flavor.memorySize + ) + hypervisors[server] = hvView } ServerState.SHUTOFF, ServerState.ERROR -> { val hv = hypervisors[server] ?: return @@ -116,6 +111,15 @@ class SimpleVirtProvisioningService( } } + override fun servicePublished(server: Server, key: ServiceKey<*>) { + if (key == VirtDriver.Key) { + val hv = hypervisors[server] ?: return + hv.driver = server.services[VirtDriver] + availableHypervisors += hv + requestCycle() + } + } + data class ImageView( val image: Image, val monitor: ServerMonitor, diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index 166e93b8..c5c0441c 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -59,12 +59,12 @@ internal class SimpleBareMetalDriverTest { val driver = SimpleBareMetalDriver(dom, UUID.randomUUID(), "test", cpus, emptyList()) val monitor = object : NodeMonitor { - override suspend fun onUpdate(node: Node, previousState: NodeState) { + override fun stateChanged(node: Node, previousState: NodeState) { println(node) } - override suspend fun onUpdate(server: Server, previousState: ServerState) { - println("[${simulationContext.clock.millis()}] $server") + override fun stateChanged(server: Server, previousState: ServerState) { + println("$server") finalState = server.state } } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt index ef19427e..9cbb9baa 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt @@ -54,7 +54,7 @@ internal class SimpleProvisioningServiceTest { root.launch { val image = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1000, 2) val monitor = object : ServerMonitor { - override suspend fun onUpdate(server: Server, previousState: ServerState) { + override fun stateChanged(server: Server, previousState: ServerState) { println(server) } } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt index 57a7150e..9ceaf704 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt @@ -25,7 +25,6 @@ package com.atlarge.opendc.compute.virt.driver.hypervisor import com.atlarge.odcsim.SimulationEngineProvider -import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.ProcessingUnit import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.Flavor @@ -71,8 +70,8 @@ internal class HypervisorTest { val workloadA = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1_000, 1) val workloadB = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 2_000, 1) val monitor = object : NodeMonitor { - override suspend fun onUpdate(server: Server, previousState: ServerState) { - println("[${simulationContext.clock.millis()}]: $server") + override fun stateChanged(server: Server, previousState: ServerState) { + println("$server") } } @@ -89,7 +88,7 @@ internal class HypervisorTest { delay(5) val flavor = Flavor(1, 0) - val vmDriver = metalDriver.refresh().server!!.serviceRegistry[VirtDriver] + val vmDriver = metalDriver.refresh().server!!.services[VirtDriver] vmDriver.spawn(workloadA, monitor, flavor) vmDriver.spawn(workloadB, monitor, flavor) } diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistry.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistry.kt index a036a705..75aa778f 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistry.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistry.kt @@ -25,9 +25,14 @@ package com.atlarge.opendc.core.services /** - * A service registry for a datacenter zone. + * An immutable service registry interface. */ public interface ServiceRegistry { + /** + * The keys in this registry. + */ + public val keys: Collection> + /** * Determine if this map contains the service with the specified [ServiceKey]. * @@ -41,12 +46,18 @@ public interface ServiceRegistry { * * @param key The key of the service to obtain. * @return The references to the service. - * @throws IllegalArgumentException if the key does not exists in the map. + * @throws IllegalArgumentException if the key does not exist in the map. */ public operator fun get(key: ServiceKey): T /** - * Register the specified [ServiceKey] in this registry. + * Return the result of associating the specified [service] with the given [key] in this registry. */ - public operator fun set(key: ServiceKey, service: T): ServiceRegistry + public fun put(key: ServiceKey, service: T): ServiceRegistry } + +/** + * Construct an empty [ServiceRegistry]. + */ +@Suppress("FunctionName") +public fun ServiceRegistry(): ServiceRegistry = ServiceRegistryImpl(emptyMap()) diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistryImpl.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistryImpl.kt index e3fa171d..0686ebaf 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistryImpl.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/services/ServiceRegistryImpl.kt @@ -27,22 +27,18 @@ package com.atlarge.opendc.core.services /** * Default implementation of the [ServiceRegistry] interface. */ -public class ServiceRegistryImpl : ServiceRegistry { - /** - * The map containing the registered services. - */ - private val services: MutableMap, Any> = mutableMapOf() +internal class ServiceRegistryImpl(private val map: Map, Any>) : ServiceRegistry { + override val keys: Collection> + get() = map.keys - override fun set(key: ServiceKey, service: T) { - services[key] = service - } - - override fun contains(key: ServiceKey<*>): Boolean = key in services + override fun contains(key: ServiceKey<*>): Boolean = key in map override fun get(key: ServiceKey): T { @Suppress("UNCHECKED_CAST") - return services[key] as T + return map[key] as T } - override fun toString(): String = services.toString() + override fun put(key: ServiceKey, service: T): ServiceRegistry = ServiceRegistryImpl(map.plus(key to service)) + + override fun toString(): String = map.toString() } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index 40cb9719..0f4d0c1b 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -1,6 +1,5 @@ package com.atlarge.opendc.experiments.sc20 -import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.monitor.ServerMonitor @@ -21,8 +20,8 @@ class Sc20Monitor( outputFile.write("time,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw,failedVms\n") } - override suspend fun onUpdate(server: Server, previousState: ServerState) { - println("${simulationContext.clock.instant()} ${server.uid} ${server.state}") + override fun stateChanged(server: Server, previousState: ServerState) { + println("${server.uid} ${server.state}") if (server.state == ServerState.ERROR) { failed++ } @@ -36,7 +35,7 @@ class Sc20Monitor( hostServer: Server ) { // Assume for now that the host is not virtualized and measure the current power draw - val driver = hostServer.serviceRegistry[BareMetalDriver.Key] + val driver = hostServer.services[BareMetalDriver.Key] val usage = driver.usage.first() val powerDraw = driver.powerDraw.first() diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 09b6592e..efc85653 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -39,7 +39,6 @@ import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper import com.fasterxml.jackson.module.kotlin.readValue import com.xenomachina.argparser.ArgParser import com.xenomachina.argparser.default -import kotlinx.coroutines.Job import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay import kotlinx.coroutines.launch diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc18/Sc18EnvironmentReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc18/Sc18EnvironmentReader.kt index 0d4bd125..ab9f272f 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc18/Sc18EnvironmentReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc18/Sc18EnvironmentReader.kt @@ -34,7 +34,7 @@ import com.atlarge.opendc.compute.metal.service.SimpleProvisioningService import com.atlarge.opendc.core.Environment import com.atlarge.opendc.core.Platform import com.atlarge.opendc.core.Zone -import com.atlarge.opendc.core.services.ServiceRegistryImpl +import com.atlarge.opendc.core.services.ServiceRegistry import com.atlarge.opendc.format.environment.EnvironmentReader import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper @@ -89,9 +89,7 @@ class Sc18EnvironmentReader(input: InputStream, mapper: ObjectMapper = jacksonOb provisioningService.create(node) } - val serviceRegistry = ServiceRegistryImpl() - serviceRegistry[ProvisioningService.Key] = provisioningService - + val serviceRegistry = ServiceRegistry().put(ProvisioningService, provisioningService) val platform = Platform( UUID.randomUUID(), "sc18-platform", listOf( Zone(UUID.randomUUID(), "zone", serviceRegistry) diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20ClusterEnvironmentReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20ClusterEnvironmentReader.kt index ae0ba550..c6a393e1 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20ClusterEnvironmentReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20ClusterEnvironmentReader.kt @@ -35,7 +35,7 @@ import com.atlarge.opendc.compute.metal.service.SimpleProvisioningService import com.atlarge.opendc.core.Environment import com.atlarge.opendc.core.Platform import com.atlarge.opendc.core.Zone -import com.atlarge.opendc.core.services.ServiceRegistryImpl +import com.atlarge.opendc.core.services.ServiceRegistry import com.atlarge.opendc.format.environment.EnvironmentReader import java.io.BufferedReader import java.io.File @@ -119,8 +119,7 @@ class Sc20ClusterEnvironmentReader( provisioningService.create(node) } - val serviceRegistry = ServiceRegistryImpl() - serviceRegistry[ProvisioningService.Key] = provisioningService + val serviceRegistry = ServiceRegistry().put(ProvisioningService, provisioningService) val platform = Platform( UUID.randomUUID(), "sc20-platform", listOf( diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20EnvironmentReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20EnvironmentReader.kt index a954a308..07309341 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20EnvironmentReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20EnvironmentReader.kt @@ -35,7 +35,7 @@ import com.atlarge.opendc.compute.metal.service.SimpleProvisioningService import com.atlarge.opendc.core.Environment import com.atlarge.opendc.core.Platform import com.atlarge.opendc.core.Zone -import com.atlarge.opendc.core.services.ServiceRegistryImpl +import com.atlarge.opendc.core.services.ServiceRegistry import com.atlarge.opendc.format.environment.EnvironmentReader import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper @@ -103,8 +103,7 @@ class Sc20EnvironmentReader(input: InputStream, mapper: ObjectMapper = jacksonOb provisioningService.create(node) } - val serviceRegistry = ServiceRegistryImpl() - serviceRegistry[ProvisioningService.Key] = provisioningService + val serviceRegistry = ServiceRegistry().put(ProvisioningService, provisioningService) val platform = Platform( UUID.randomUUID(), "sc20-platform", listOf( diff --git a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowService.kt b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowService.kt index 008cd1ee..a055a3fe 100644 --- a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowService.kt +++ b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowService.kt @@ -294,42 +294,44 @@ class StageWorkflowService( } } - override suspend fun onUpdate(server: Server, previousState: ServerState) = withContext(domain.coroutineContext) { - when (server.state) { - ServerState.ACTIVE -> { - val task = taskByServer.getValue(server) - task.startedAt = simulationContext.clock.millis() - task.job.monitor.onTaskStart(task.job.job, task.task, simulationContext.clock.millis()) - rootListener.taskStarted(task) - } - ServerState.SHUTOFF, ServerState.ERROR -> { - val task = taskByServer.remove(server) ?: throw IllegalStateException() - val job = task.job - task.state = TaskStatus.FINISHED - task.finishedAt = simulationContext.clock.millis() - job.tasks.remove(task) - available += task.host!! - activeTasks -= task - job.monitor.onTaskFinish(job.job, task.task, 0, simulationContext.clock.millis()) - rootListener.taskFinished(task) - - // Add job roots to the scheduling queue - for (dependent in task.dependents) { - if (dependent.state != TaskStatus.READY) { - continue + override fun stateChanged(server: Server, previousState: ServerState) { + domain.launch { + when (server.state) { + ServerState.ACTIVE -> { + val task = taskByServer.getValue(server) + task.startedAt = simulationContext.clock.millis() + task.job.monitor.onTaskStart(task.job.job, task.task, simulationContext.clock.millis()) + rootListener.taskStarted(task) + } + ServerState.SHUTOFF, ServerState.ERROR -> { + val task = taskByServer.remove(server) ?: throw IllegalStateException() + val job = task.job + task.state = TaskStatus.FINISHED + task.finishedAt = simulationContext.clock.millis() + job.tasks.remove(task) + available += task.host!! + activeTasks -= task + job.monitor.onTaskFinish(job.job, task.task, 0, simulationContext.clock.millis()) + rootListener.taskFinished(task) + + // Add job roots to the scheduling queue + for (dependent in task.dependents) { + if (dependent.state != TaskStatus.READY) { + continue + } + + incomingTasks += dependent + rootListener.taskReady(dependent) } - incomingTasks += dependent - rootListener.taskReady(dependent) - } + if (job.isFinished) { + finishJob(job) + } - if (job.isFinished) { - finishJob(job) + requestCycle() } - - requestCycle() + else -> throw IllegalStateException() } - else -> throw IllegalStateException() } } -- cgit v1.2.3 From bc64182612ad06f15bff5b48637ed7d241e293b2 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 18 Mar 2020 00:50:05 +0100 Subject: [ci skip] refactor: Refactor monitors into EventFlow --- .../kotlin/com/atlarge/odcsim/flow/EventFlow.kt | 99 ++++++++++++++++++++++ .../kotlin/com/atlarge/odcsim/flow/StateFlow.kt | 90 ++++++++++++++++++++ .../kotlin/com/atlarge/odcsim/signal/Signal.kt | 90 -------------------- .../com/atlarge/opendc/compute/core/Server.kt | 8 +- .../com/atlarge/opendc/compute/core/ServerEvent.kt | 53 ++++++++++++ .../compute/core/execution/ShutdownException.kt | 2 +- .../opendc/compute/core/monitor/ServerMonitor.kt | 50 ----------- .../com/atlarge/opendc/compute/metal/Node.kt | 10 ++- .../com/atlarge/opendc/compute/metal/NodeEvent.kt | 43 ++++++++++ .../opendc/compute/metal/driver/BareMetalDriver.kt | 3 +- .../compute/metal/driver/SimpleBareMetalDriver.kt | 62 +++++++------- .../opendc/compute/metal/monitor/NodeMonitor.kt | 42 --------- .../compute/metal/service/ProvisioningService.kt | 3 +- .../metal/service/SimpleProvisioningService.kt | 30 +------ .../opendc/compute/virt/driver/VirtDriver.kt | 24 ++---- .../opendc/compute/virt/driver/VirtDriverEvent.kt | 59 +++++++++++++ .../compute/virt/driver/VirtDriverMonitor.kt | 14 --- .../virt/driver/hypervisor/HypervisorImage.kt | 13 +-- .../virt/driver/hypervisor/HypervisorVirtDriver.kt | 67 ++++++++------- .../compute/virt/monitor/HypervisorMonitor.kt | 25 ------ .../virt/service/SimpleVirtProvisioningService.kt | 36 +++++--- .../virt/service/VirtProvisioningService.kt | 4 +- .../metal/driver/SimpleBareMetalDriverTest.kt | 27 ++---- .../metal/service/SimpleProvisioningServiceTest.kt | 13 +-- .../virt/driver/hypervisor/HypervisorTest.kt | 34 +++----- .../opendc/experiments/sc20/TestExperiment.kt | 3 +- 26 files changed, 496 insertions(+), 408 deletions(-) create mode 100644 odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/EventFlow.kt create mode 100644 odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt delete mode 100644 odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/signal/Signal.kt create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/ServerEvent.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/NodeEvent.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverEvent.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverMonitor.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/monitor/HypervisorMonitor.kt diff --git a/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/EventFlow.kt b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/EventFlow.kt new file mode 100644 index 00000000..5d9af9ec --- /dev/null +++ b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/EventFlow.kt @@ -0,0 +1,99 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.odcsim.flow + +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.FlowPreview +import kotlinx.coroutines.InternalCoroutinesApi +import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.channels.SendChannel +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.FlowCollector +import kotlinx.coroutines.flow.consumeAsFlow +import java.util.WeakHashMap + +/** + * A [Flow] that can be used to emit events. + */ +public interface EventFlow : Flow { + /** + * Emit the specified [event]. + */ + public fun emit(event: T) + + /** + * Close the flow. + */ + public fun close() +} + +/** + * Creates a new [EventFlow]. + */ +@Suppress("FunctionName") +public fun EventFlow(): EventFlow = EventFlowImpl() + +/** + * Internal implementation of the [EventFlow] class. + */ +@OptIn(ExperimentalCoroutinesApi::class, FlowPreview::class) +private class EventFlowImpl : EventFlow { + private var closed: Boolean = false + private val subscribers = WeakHashMap, Unit>() + + override fun emit(event: T) { + synchronized(this) { + for ((chan, _) in subscribers) { + chan.offer(event) + } + } + } + + override fun close() { + synchronized(this) { + closed = true + + for ((chan, _) in subscribers) { + chan.close() + } + } + } + + @InternalCoroutinesApi + override suspend fun collect(collector: FlowCollector) { + val channel: Channel + synchronized(this) { + if (closed) { + return + } + + channel = Channel(Channel.UNLIMITED) + subscribers[channel] = Unit + } + channel.consumeAsFlow().collect(collector) + } + + override fun toString(): String = "EventFlow" +} diff --git a/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt new file mode 100644 index 00000000..429d932b --- /dev/null +++ b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt @@ -0,0 +1,90 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.odcsim.flow + +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.FlowPreview +import kotlinx.coroutines.InternalCoroutinesApi +import kotlinx.coroutines.channels.BroadcastChannel +import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.FlowCollector +import kotlinx.coroutines.flow.asFlow + +/** + * A [Flow] that contains a single value that changes over time. + * + * This class exists to implement the DataFlow/StateFlow functionality that will be implemented in `kotlinx-coroutines` + * in the future, but is not available yet. + * See: https://github.com/Kotlin/kotlinx.coroutines/pull/1354 + */ +public interface StateFlow : Flow { + /** + * The current value of this flow. + * + * Setting a value that is [equal][Any.equals] to the previous one does nothing. + */ + public var value: T +} + +/** + * Creates a [StateFlow] with a given initial [value]. + */ +@Suppress("FunctionName") +public fun StateFlow(value: T): StateFlow = StateFlowImpl(value) + +/** + * Internal implementation of the [StateFlow] interface. + */ +private class StateFlowImpl(initialValue: T) : StateFlow { + /** + * The [BroadcastChannel] to back this flow. + */ + @OptIn(ExperimentalCoroutinesApi::class) + private val chan = BroadcastChannel(Channel.CONFLATED) + + /** + * The internal [Flow] backing this flow. + */ + @OptIn(FlowPreview::class) + private val flow = chan.asFlow() + + init { + @OptIn(ExperimentalCoroutinesApi::class) + chan.offer(initialValue) + } + + @OptIn(ExperimentalCoroutinesApi::class) + public override var value: T = initialValue + set(value) { + if (field != value) { + chan.offer(value) + field = value + } + } + + @InternalCoroutinesApi + override suspend fun collect(collector: FlowCollector) = flow.collect(collector) +} diff --git a/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/signal/Signal.kt b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/signal/Signal.kt deleted file mode 100644 index da6298a3..00000000 --- a/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/signal/Signal.kt +++ /dev/null @@ -1,90 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.odcsim.signal - -import kotlinx.coroutines.ExperimentalCoroutinesApi -import kotlinx.coroutines.FlowPreview -import kotlinx.coroutines.InternalCoroutinesApi -import kotlinx.coroutines.channels.BroadcastChannel -import kotlinx.coroutines.channels.Channel -import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.flow.FlowCollector -import kotlinx.coroutines.flow.asFlow - -/** - * A [Flow] that contains a single value that changes over time. - * - * This class exists to implement the DataFlow/StateFlow functionality that will be implemented in `kotlinx-coroutines` - * in the future, but is not available yet. - * See: https://github.com/Kotlin/kotlinx.coroutines/pull/1354 - */ -public interface Signal : Flow { - /** - * The current value of this signal. - * - * Setting a value that is [equal][Any.equals] to the previous one does nothing. - */ - public var value: T -} - -/** - * Creates a [Signal] with a given initial [value]. - */ -@Suppress("FunctionName") -public fun Signal(value: T): Signal = SignalImpl(value) - -/** - * Internal implementation of the [Signal] interface. - */ -private class SignalImpl(initialValue: T) : Signal { - /** - * The [BroadcastChannel] to back this signal. - */ - @OptIn(ExperimentalCoroutinesApi::class) - private val chan = BroadcastChannel(Channel.CONFLATED) - - /** - * The internal [Flow] backing this signal. - */ - @OptIn(FlowPreview::class) - private val flow = chan.asFlow() - - init { - @OptIn(ExperimentalCoroutinesApi::class) - chan.offer(initialValue) - } - - @OptIn(ExperimentalCoroutinesApi::class) - public override var value: T = initialValue - set(value) { - if (field != value) { - chan.offer(value) - field = value - } - } - - @InternalCoroutinesApi - override suspend fun collect(collector: FlowCollector) = flow.collect(collector) -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/Server.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/Server.kt index 31b070a4..01968cd8 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/Server.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/Server.kt @@ -28,6 +28,7 @@ import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.core.resource.Resource import com.atlarge.opendc.core.resource.TagContainer import com.atlarge.opendc.core.services.ServiceRegistry +import kotlinx.coroutines.flow.Flow import java.util.UUID /** @@ -67,7 +68,12 @@ public data class Server( /** * The services published by this server. */ - public val services: ServiceRegistry = ServiceRegistry() + public val services: ServiceRegistry, + + /** + * The events that are emitted by the server. + */ + public val events: Flow ) : Resource { override fun hashCode(): Int = uid.hashCode() override fun equals(other: Any?): Boolean = other is Server && uid == other.uid diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/ServerEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/ServerEvent.kt new file mode 100644 index 00000000..1595937c --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/ServerEvent.kt @@ -0,0 +1,53 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.core + +import com.atlarge.opendc.core.services.ServiceKey + +/** + * An event that is emitted by a [Server]. + */ +public sealed class ServerEvent { + /** + * The server that emitted the event. + */ + public abstract val server: Server + + /** + * This event is emitted when the state of [server] changes. + * + * @property server The server of which the state changed. + * @property previousState The previous state of the server. + */ + public data class StateChanged(override val server: Server, val previousState: ServerState) : ServerEvent() + + /** + * This event is emitted when a server publishes a service. + * + * @property server The server that published the service. + * @property key The service key of the service that was published. + */ + public data class ServicePublished(override val server: Server, val key: ServiceKey<*>) : ServerEvent() +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ShutdownException.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ShutdownException.kt index abf6f8db..e4da557b 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ShutdownException.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ShutdownException.kt @@ -27,7 +27,7 @@ package com.atlarge.opendc.compute.core.execution import kotlinx.coroutines.CancellationException /** - * This exception is thrown by the underlying [ServerContext] to indicate that a shutdown signal + * This exception is thrown by the underlying [ServerContext] to indicate that a shutdown flow * has been sent to the server. */ public class ShutdownException(message: String? = null, override val cause: Throwable? = null) : CancellationException(message) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt deleted file mode 100644 index c2b30b9d..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/monitor/ServerMonitor.kt +++ /dev/null @@ -1,50 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.compute.core.monitor - -import com.atlarge.opendc.compute.core.Server -import com.atlarge.opendc.compute.core.ServerState -import com.atlarge.opendc.core.services.ServiceKey - -/** - * An interface for monitoring the state of a machine. - */ -public interface ServerMonitor { - /** - * This method is synchronously invoked when the state of a machine updates. - * - * @param server The server which state was updated. - * @param previousState The previous state of the server. - */ - public fun stateChanged(server: Server, previousState: ServerState) {} - - /** - * This method is synchronously invoked when the server publishes a service. - * - * @param server The server that published the service. - * @param key The key of the service that was published. - */ - public fun servicePublished(server: Server, key: ServiceKey<*>) {} -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt index 55948d3c..8b8d1596 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt @@ -27,6 +27,7 @@ package com.atlarge.opendc.compute.metal import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.core.Identity +import kotlinx.coroutines.flow.Flow import java.util.UUID /** @@ -46,7 +47,7 @@ data class Node( /** * Meta data of the node. */ - public val metadata: Map = emptyMap(), + public val metadata: Map, /** * The last known state of the compute node. @@ -61,7 +62,12 @@ data class Node( /** * The server instance that is running on the node or `null` if no server is running. */ - public val server: Server? + public val server: Server?, + + /** + * The events that are emitted by the node. + */ + public val events: Flow ) : Identity { override fun hashCode(): Int = uid.hashCode() override fun equals(other: Any?): Boolean = other is Node && uid == other.uid diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/NodeEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/NodeEvent.kt new file mode 100644 index 00000000..7719db24 --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/NodeEvent.kt @@ -0,0 +1,43 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.metal + +/** + * An event that is emitted by a [Node]. + */ +public sealed class NodeEvent { + /** + * The node that emitted the event. + */ + public abstract val node: Node + + /** + * This event is emitted when the state of [node] changes. + * + * @property node The node of which the state changed. + * @property previousState The previous state of the node. + */ + public data class StateChanged(override val node: Node, val previousState: NodeState) : NodeEvent() +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt index 3956338b..5d1db378 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt @@ -27,7 +27,6 @@ package com.atlarge.opendc.compute.metal.driver import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.metal.Node -import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.core.failure.FailureDomain import com.atlarge.opendc.core.power.Powerable import com.atlarge.opendc.core.services.AbstractServiceKey @@ -47,7 +46,7 @@ public interface BareMetalDriver : Powerable, FailureDomain { /** * Initialize the driver. */ - public suspend fun init(monitor: NodeMonitor): Node + public suspend fun init(): Node /** * Start the bare metal node with the specified boot disk image. diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 46b4c30c..49c3fa2e 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -25,12 +25,14 @@ package com.atlarge.opendc.compute.metal.driver import com.atlarge.odcsim.Domain -import com.atlarge.odcsim.signal.Signal +import com.atlarge.odcsim.flow.EventFlow +import com.atlarge.odcsim.flow.StateFlow import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.ProcessingUnit import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.MemoryUnit +import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.execution.ServerManagementContext import com.atlarge.opendc.compute.core.execution.ShutdownException @@ -38,8 +40,8 @@ import com.atlarge.opendc.compute.core.execution.assertFailure import com.atlarge.opendc.compute.core.image.EmptyImage import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.metal.Node +import com.atlarge.opendc.compute.metal.NodeEvent import com.atlarge.opendc.compute.metal.NodeState -import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.compute.metal.power.ConstantPowerModel import com.atlarge.opendc.core.power.PowerModel import com.atlarge.opendc.core.services.ServiceKey @@ -77,48 +79,47 @@ public class SimpleBareMetalDriver( powerModel: PowerModel = ConstantPowerModel(0.0) ) : BareMetalDriver { /** - * The monitor to use. + * The flavor that corresponds to this machine. + */ + private val flavor = Flavor(cpus.size, memoryUnits.map { it.size }.sum()) + + /** + * The current active server context. + */ + private var serverContext: BareMetalServerContext? = null + + /** + * The events of the machine. */ - private lateinit var monitor: NodeMonitor + private val events = EventFlow() + + /** + * The flow containing the load of the server. + */ + private val usageSignal = StateFlow(0.0) /** * The machine state. */ - private var node: Node = Node(uid, name, mapOf("driver" to this), NodeState.SHUTOFF, EmptyImage, null) + private var node: Node = Node(uid, name, mapOf("driver" to this), NodeState.SHUTOFF, EmptyImage, null, events) set(value) { if (field.state != value.state) { - monitor.stateChanged(value, field.state) + events.emit(NodeEvent.StateChanged(value, field.state)) } if (field.server != null && value.server != null && field.server!!.state != value.server.state) { - monitor.stateChanged(value.server, field.server!!.state) + serverContext!!.events.emit(ServerEvent.StateChanged(value.server, field.server!!.state)) } field = value } - /** - * The flavor that corresponds to this machine. - */ - private val flavor = Flavor(cpus.size, memoryUnits.map { it.size }.sum()) - - /** - * The current active server context. - */ - private var serverContext: BareMetalServerContext? = null - - /** - * The signal containing the load of the server. - */ - private val usageSignal = Signal(0.0) - override val usage: Flow = usageSignal override val powerDraw: Flow = powerModel(this) - override suspend fun init(monitor: NodeMonitor): Node = withContext(domain.coroutineContext) { - this@SimpleBareMetalDriver.monitor = monitor - return@withContext node + override suspend fun init(): Node = withContext(domain.coroutineContext) { + node } override suspend fun start(): Node = withContext(domain.coroutineContext) { @@ -126,6 +127,7 @@ public class SimpleBareMetalDriver( return@withContext node } + val events = EventFlow() val server = Server( UUID.randomUUID(), node.name, @@ -133,11 +135,12 @@ public class SimpleBareMetalDriver( flavor, node.image, ServerState.BUILD, - ServiceRegistry().put(BareMetalDriver, this@SimpleBareMetalDriver) + ServiceRegistry().put(BareMetalDriver, this@SimpleBareMetalDriver), + events ) node = node.copy(state = NodeState.BOOT, server = server) - serverContext = BareMetalServerContext() + serverContext = BareMetalServerContext(events) return@withContext node } @@ -166,7 +169,7 @@ public class SimpleBareMetalDriver( override suspend fun refresh(): Node = withContext(domain.coroutineContext) { node } - private inner class BareMetalServerContext : ServerManagementContext { + private inner class BareMetalServerContext(val events: EventFlow) : ServerManagementContext { private var finalized: Boolean = false override val cpus: List = this@SimpleBareMetalDriver.cpus @@ -175,6 +178,7 @@ public class SimpleBareMetalDriver( get() = node.server!! private val job = domain.launch { + delay(1) // TODO Introduce boot time init() try { server.image(this@BareMetalServerContext) @@ -198,7 +202,7 @@ public class SimpleBareMetalDriver( override suspend fun publishService(key: ServiceKey, service: T) { val server = server.copy(services = server.services.put(key, service)) node = node.copy(server = server) - monitor.servicePublished(server, key) + events.emit(ServerEvent.ServicePublished(server, key)) } override suspend fun init() { diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt deleted file mode 100644 index bd4b40d8..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/monitor/NodeMonitor.kt +++ /dev/null @@ -1,42 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.compute.metal.monitor - -import com.atlarge.opendc.compute.core.monitor.ServerMonitor -import com.atlarge.opendc.compute.metal.Node -import com.atlarge.opendc.compute.metal.NodeState - -/** - * An interface for monitoring bare-metal nodes. - */ -public interface NodeMonitor : ServerMonitor { - /** - * This method is synchronously invoked when the state of a bare metal machine updates. - * - * @param node The node for which state was updated. - * @param previousState The previous state of the node. - */ - public fun stateChanged(node: Node, previousState: NodeState) {} -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/ProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/ProvisioningService.kt index 24ade799..105505f2 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/ProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/ProvisioningService.kt @@ -25,7 +25,6 @@ package com.atlarge.opendc.compute.metal.service import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.driver.BareMetalDriver import com.atlarge.opendc.core.services.AbstractServiceKey @@ -53,7 +52,7 @@ public interface ProvisioningService { /** * Deploy the specified [Image] on a compute node. */ - public suspend fun deploy(node: Node, image: Image, monitor: ServerMonitor): Node + public suspend fun deploy(node: Node, image: Image): Node /** * The service key of this service. diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt index e5cd0a77..a7e143aa 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningService.kt @@ -25,33 +25,22 @@ package com.atlarge.opendc.compute.metal.service import com.atlarge.odcsim.Domain -import com.atlarge.opendc.compute.core.Server -import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.driver.BareMetalDriver -import com.atlarge.opendc.compute.metal.monitor.NodeMonitor -import com.atlarge.opendc.core.services.ServiceKey -import kotlinx.coroutines.launch import kotlinx.coroutines.withContext /** * A very basic implementation of the [ProvisioningService]. */ -public class SimpleProvisioningService(val domain: Domain) : ProvisioningService, NodeMonitor { +public class SimpleProvisioningService(val domain: Domain) : ProvisioningService { /** * The active nodes in this service. */ private val nodes: MutableMap = mutableMapOf() - /** - * The installed monitors. - */ - private val monitors: MutableMap = mutableMapOf() - override suspend fun create(driver: BareMetalDriver): Node = withContext(domain.coroutineContext) { - val node = driver.init(this@SimpleProvisioningService) + val node = driver.init() nodes[node] = driver return@withContext node } @@ -62,23 +51,10 @@ public class SimpleProvisioningService(val domain: Domain) : ProvisioningService return@withContext nodes[node]!!.refresh() } - override suspend fun deploy(node: Node, image: Image, monitor: ServerMonitor): Node = withContext(domain.coroutineContext) { + override suspend fun deploy(node: Node, image: Image): Node = withContext(domain.coroutineContext) { val driver = nodes[node]!! driver.setImage(image) val newNode = driver.reboot() - monitors[newNode.server!!] = monitor return@withContext newNode } - - override fun stateChanged(server: Server, previousState: ServerState) { - domain.launch { - monitors[server]?.stateChanged(server, previousState) - } - } - - override fun servicePublished(server: Server, key: ServiceKey<*>) { - domain.launch { - monitors[server]?.servicePublished(server, key) - } - } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt index d889d0f9..296f170e 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt @@ -27,8 +27,8 @@ package com.atlarge.opendc.compute.virt.driver import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.core.services.AbstractServiceKey +import kotlinx.coroutines.flow.Flow import java.util.UUID /** @@ -36,29 +36,19 @@ import java.util.UUID * provide virtualization for that particular resource. */ public interface VirtDriver { + /** + * The events emitted by the driver. + */ + public val events: Flow + /** * Spawn the given [Image] on the compute resource of this driver. * * @param image The image to deploy. - * @param monitor The monitor to use for the deployment of this particular image. * @param flavor The flavor of the server which this driver is controlling. * @return The virtual server spawned by this method. */ - public suspend fun spawn(image: Image, monitor: ServerMonitor, flavor: Flavor): Server - - /** - * Adds the given [VirtDriverMonitor] to the list of monitors to keep informed on the state of this driver. - * - * @param monitor The monitor to keep informed. - */ - public suspend fun addMonitor(monitor: VirtDriverMonitor) - - /** - * Removes the given [VirtDriverMonitor] from the list of monitors. - * - * @param monitor The monitor to unsubscribe - */ - public suspend fun removeMonitor(monitor: VirtDriverMonitor) + public suspend fun spawn(image: Image, flavor: Flavor): Server companion object Key : AbstractServiceKey(UUID.randomUUID(), "virtual-driver") } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverEvent.kt new file mode 100644 index 00000000..ccbe8b3c --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverEvent.kt @@ -0,0 +1,59 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.virt.driver + +/** + * An event that is emitted by a [VirtDriver]. + */ +public sealed class VirtDriverEvent { + /** + * The driver that emitted the event. + */ + public abstract val driver: VirtDriver + + /** + * This event is emitted when the number of active servers on the server managed by this driver is updated. + * + * @property driver The driver that emitted the event. + * @property numberOfActiveServers The number of active servers. + * @property availableMemory The available memory, in MB. + */ + public data class VmsUpdated(override val driver: VirtDriver, public val numberOfActiveServers: Int, public val availableMemory: Long) : VirtDriverEvent() + + /** + * This event is emitted when a slice is finished. + * + * @property driver The driver that emitted the event. + * @property requestedBurst The total requested CPU time (can be above capacity). + * @property grantedBurst The actual total granted capacity. + * @property numberOfDeployedImages The number of images deployed on this hypervisor. + */ + public data class SliceFinished( + override val driver: VirtDriver, + public val requestedBurst: Long, + public val grantedBurst: Long, + public val numberOfDeployedImages: Int + ) : VirtDriverEvent() +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverMonitor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverMonitor.kt deleted file mode 100644 index cf2f4619..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverMonitor.kt +++ /dev/null @@ -1,14 +0,0 @@ -package com.atlarge.opendc.compute.virt.driver - -/** - * Monitor for entities interested in the state of a [VirtDriver]. - */ -interface VirtDriverMonitor { - /** - * Called when the number of active servers on the server managed by this driver is updated. - * - * @param numberOfActiveServers The number of active servers. - * @param availableMemory The available memory, in MB. - */ - public suspend fun onUpdate(numberOfActiveServers: Int, availableMemory: Long) -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt index 0f4d3c15..1eb0e0ff 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt @@ -27,7 +27,6 @@ package com.atlarge.opendc.compute.virt.driver.hypervisor import com.atlarge.opendc.compute.core.execution.ServerContext import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.virt.driver.VirtDriver -import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor import com.atlarge.opendc.core.resource.TagContainer import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.suspendCancellableCoroutine @@ -36,20 +35,22 @@ import java.util.UUID /** * A hypervisor managing the VMs of a node. */ -class HypervisorImage( - private val hypervisorMonitor: HypervisorMonitor -) : Image { +object HypervisorImage : Image { override val uid: UUID = UUID.randomUUID() override val name: String = "vmm" override val tags: TagContainer = emptyMap() override suspend fun invoke(ctx: ServerContext) { coroutineScope { - val driver = HypervisorVirtDriver(ctx, hypervisorMonitor, this) + val driver = HypervisorVirtDriver(ctx, this) ctx.publishService(VirtDriver.Key, driver) // Suspend image until it is cancelled - suspendCancellableCoroutine {} + try { + suspendCancellableCoroutine {} + } finally { + driver.eventFlow.close() + } } } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt index 98d8092c..0b4a7109 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt @@ -25,28 +25,33 @@ package com.atlarge.opendc.compute.virt.driver.hypervisor import com.atlarge.odcsim.Domain +import com.atlarge.odcsim.flow.EventFlow import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.ProcessingUnit import com.atlarge.opendc.compute.core.Server +import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.execution.ServerContext import com.atlarge.opendc.compute.core.execution.ServerManagementContext import com.atlarge.opendc.compute.core.execution.ShutdownException import com.atlarge.opendc.compute.core.execution.assertFailure import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.virt.driver.VirtDriver -import com.atlarge.opendc.compute.virt.driver.VirtDriverMonitor -import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor +import com.atlarge.opendc.compute.virt.driver.VirtDriverEvent import com.atlarge.opendc.core.services.ServiceKey +import com.atlarge.opendc.core.services.ServiceRegistry import com.atlarge.opendc.core.workload.IMAGE_PERF_INTERFERENCE_MODEL import com.atlarge.opendc.core.workload.PerformanceInterferenceModel import kotlinx.coroutines.CancellationException import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.FlowPreview import kotlinx.coroutines.Job import kotlinx.coroutines.cancelAndJoin import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.launch import java.util.UUID import kotlin.math.ceil @@ -56,11 +61,17 @@ import kotlin.math.min /** * A [VirtDriver] that is backed by a simple hypervisor implementation. */ +@OptIn(ExperimentalCoroutinesApi::class, FlowPreview::class) class HypervisorVirtDriver( private val hostContext: ServerContext, - private val monitor: HypervisorMonitor, private val coroutineScope: CoroutineScope ) : VirtDriver { + /** + * The [Server] on which this hypervisor runs. + */ + public val server: Server + get() = hostContext.server + /** * A set for tracking the VM context objects. */ @@ -72,32 +83,33 @@ class HypervisorVirtDriver( private var availableMemory: Long = hostContext.server.flavor.memorySize /** - * Monitors to keep informed. + * The [EventFlow] to emit the events. */ - private val monitors: MutableSet = mutableSetOf() + internal val eventFlow = EventFlow() - override suspend fun spawn(image: Image, monitor: ServerMonitor, flavor: Flavor): Server { + override val events: Flow = eventFlow + + override suspend fun spawn( + image: Image, + flavor: Flavor + ): Server { val requiredMemory = flavor.memorySize if (availableMemory - requiredMemory < 0) { throw InsufficientMemoryOnServerException() } require(flavor.cpuCount <= hostContext.server.flavor.cpuCount) { "Machine does not fit" } - val server = Server(UUID.randomUUID(), "", emptyMap(), flavor, image, ServerState.BUILD) + val events = EventFlow() + val server = Server( + UUID.randomUUID(), "", emptyMap(), flavor, image, ServerState.BUILD, + ServiceRegistry(), events + ) availableMemory -= requiredMemory - vms.add(VmServerContext(server, monitor, simulationContext.domain)) - monitors.forEach { it.onUpdate(vms.size, availableMemory) } + vms.add(VmServerContext(server, events, simulationContext.domain)) + eventFlow.emit(VirtDriverEvent.VmsUpdated(this, vms.size, availableMemory)) return server } - override suspend fun addMonitor(monitor: VirtDriverMonitor) { - monitors.add(monitor) - } - - override suspend fun removeMonitor(monitor: VirtDriverMonitor) { - monitors.remove(monitor) - } - /** * A flag to indicate the driver is stopped. */ @@ -211,13 +223,7 @@ class HypervisorVirtDriver( } } - monitor.onSliceFinish( - end, - totalBurst, - totalBurst - totalRemainder, - vms.size, - hostContext.server - ) + eventFlow.emit(VirtDriverEvent.SliceFinished(this@HypervisorVirtDriver, totalBurst, totalBurst - totalRemainder, vms.size)) } this.call = call } @@ -250,7 +256,7 @@ class HypervisorVirtDriver( internal inner class VmServerContext( server: Server, - val monitor: ServerMonitor, + val events: EventFlow, val domain: Domain ) : ServerManagementContext { private var finalized: Boolean = false @@ -261,6 +267,7 @@ class HypervisorVirtDriver( private var initialized: Boolean = false internal val job: Job = coroutineScope.launch { + delay(1) // TODO Introduce boot time init() try { server.image(this@VmServerContext) @@ -273,7 +280,7 @@ class HypervisorVirtDriver( override var server: Server = server set(value) { if (field.state != value.state) { - monitor.stateChanged(value, field.state) + events.emit(ServerEvent.StateChanged(value, field.state)) } field = value @@ -283,7 +290,7 @@ class HypervisorVirtDriver( override suspend fun publishService(key: ServiceKey, service: T) { server = server.copy(services = server.services.put(key, service)) - monitor.servicePublished(server, key) + events.emit(ServerEvent.ServicePublished(server, key)) } override suspend fun init() { @@ -304,8 +311,8 @@ class HypervisorVirtDriver( server = server.copy(state = serverState) availableMemory += server.flavor.memorySize vms.remove(this) - - monitors.forEach { it.onUpdate(vms.size, availableMemory) } + events.close() + eventFlow.emit(VirtDriverEvent.VmsUpdated(this@HypervisorVirtDriver, vms.size, availableMemory)) } override suspend fun run(burst: LongArray, limit: DoubleArray, deadline: Long) { diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/monitor/HypervisorMonitor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/monitor/HypervisorMonitor.kt deleted file mode 100644 index 1e3981f6..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/monitor/HypervisorMonitor.kt +++ /dev/null @@ -1,25 +0,0 @@ -package com.atlarge.opendc.compute.virt.monitor - -import com.atlarge.opendc.compute.core.Server - -/** - * Monitoring interface for hypervisor-specific events. - */ -interface HypervisorMonitor { - /** - * Invoked after a scheduling slice has finished processed. - * - * @param time The current time (in ms). - * @param requestedBurst The total requested CPU time (can be above capacity). - * @param grantedBurst The actual total granted capacity. - * @param numberOfDeployedImages The number of images deployed on this hypervisor. - * @param hostServer The server hosting this hypervisor. - */ - suspend fun onSliceFinish( - time: Long, - requestedBurst: Long, - grantedBurst: Long, - numberOfDeployedImages: Int, - hostServer: Server - ) -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index 6fb821d7..8365f8c9 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -3,25 +3,27 @@ package com.atlarge.opendc.compute.virt.service import com.atlarge.odcsim.SimulationContext import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.Server +import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.driver.VirtDriver import com.atlarge.opendc.compute.virt.driver.hypervisor.HypervisorImage import com.atlarge.opendc.compute.virt.driver.hypervisor.InsufficientMemoryOnServerException -import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor import com.atlarge.opendc.compute.virt.service.allocation.AllocationPolicy import com.atlarge.opendc.core.services.ServiceKey +import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.Job +import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch +@OptIn(ExperimentalCoroutinesApi::class) class SimpleVirtProvisioningService( public override val allocationPolicy: AllocationPolicy, private val ctx: SimulationContext, - private val provisioningService: ProvisioningService, - private val hypervisorMonitor: HypervisorMonitor -) : VirtProvisioningService, ServerMonitor { + private val provisioningService: ProvisioningService +) : VirtProvisioningService { /** * The hypervisors that have been launched by the service. */ @@ -46,14 +48,24 @@ class SimpleVirtProvisioningService( ctx.domain.launch { val provisionedNodes = provisioningService.nodes() provisionedNodes.forEach { node -> - val hypervisorImage = HypervisorImage(hypervisorMonitor) - provisioningService.deploy(node, hypervisorImage, this@SimpleVirtProvisioningService) + val hypervisorImage = HypervisorImage + val node = provisioningService.deploy(node, hypervisorImage) + node.server!!.events.onEach { event -> + when (event) { + is ServerEvent.StateChanged -> stateChanged(event.server, event.previousState) + is ServerEvent.ServicePublished -> servicePublished(event.server, event.key) + } + } + .launchIn(ctx.domain) } } } - override suspend fun deploy(image: Image, monitor: ServerMonitor, flavor: Flavor) { - val vmInstance = ImageView(image, monitor, flavor) + override suspend fun deploy( + image: Image, + flavor: Flavor + ) { + val vmInstance = ImageView(image, flavor) incomingImages += vmInstance requestCycle() } @@ -82,7 +94,6 @@ class SimpleVirtProvisioningService( incomingImages -= imageInstance imageInstance.server = selectedHv.driver.spawn( imageInstance.image, - imageInstance.monitor, imageInstance.flavor ) activeImages += imageInstance @@ -92,7 +103,7 @@ class SimpleVirtProvisioningService( } } - override fun stateChanged(server: Server, previousState: ServerState) { + private fun stateChanged(server: Server, previousState: ServerState) { when (server.state) { ServerState.ACTIVE -> { val hvView = HypervisorView( @@ -111,7 +122,7 @@ class SimpleVirtProvisioningService( } } - override fun servicePublished(server: Server, key: ServiceKey<*>) { + private fun servicePublished(server: Server, key: ServiceKey<*>) { if (key == VirtDriver.Key) { val hv = hypervisors[server] ?: return hv.driver = server.services[VirtDriver] @@ -122,7 +133,6 @@ class SimpleVirtProvisioningService( data class ImageView( val image: Image, - val monitor: ServerMonitor, val flavor: Flavor, var server: Server? = null ) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt index 7770ec50..da72d742 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt @@ -2,7 +2,6 @@ package com.atlarge.opendc.compute.virt.service import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.virt.service.allocation.AllocationPolicy /** @@ -15,8 +14,7 @@ interface VirtProvisioningService { * Submit the specified [Image] to the provisioning service. * * @param image The image to be deployed. - * @param monitor The monitor to inform on events. * @param flavor The flavor of the machine instance to run this [image] on. */ - public suspend fun deploy(image: Image, monitor: ServerMonitor, flavor: Flavor) + public suspend fun deploy(image: Image, flavor: Flavor) } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index c5c0441c..e0d8799f 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -25,15 +25,12 @@ package com.atlarge.opendc.compute.metal.driver import com.atlarge.odcsim.SimulationEngineProvider -import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.ProcessingNode import com.atlarge.opendc.compute.core.ProcessingUnit -import com.atlarge.opendc.compute.core.Server +import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.FlopsApplicationImage -import com.atlarge.opendc.compute.metal.Node -import com.atlarge.opendc.compute.metal.NodeState -import com.atlarge.opendc.compute.metal.monitor.NodeMonitor +import kotlinx.coroutines.flow.collect import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext @@ -57,24 +54,18 @@ internal class SimpleBareMetalDriverTest { val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 4) val cpus = List(4) { ProcessingUnit(cpuNode, it, 2400.0) } val driver = SimpleBareMetalDriver(dom, UUID.randomUUID(), "test", cpus, emptyList()) - - val monitor = object : NodeMonitor { - override fun stateChanged(node: Node, previousState: NodeState) { - println(node) - } - - override fun stateChanged(server: Server, previousState: ServerState) { - println("$server") - finalState = server.state - } - } val image = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1_000, 2) // Batch driver commands withContext(dom.coroutineContext) { - driver.init(monitor) + driver.init() driver.setImage(image) - driver.start() + val server = driver.start().server!! + server.events.collect { event -> + when (event) { + is ServerEvent.StateChanged -> finalState = event.server.state + } + } } } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt index 9cbb9baa..8e07c09c 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt @@ -27,12 +27,10 @@ package com.atlarge.opendc.compute.metal.service import com.atlarge.odcsim.SimulationEngineProvider import com.atlarge.opendc.compute.core.ProcessingNode import com.atlarge.opendc.compute.core.ProcessingUnit -import com.atlarge.opendc.compute.core.Server -import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.FlopsApplicationImage -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.driver.SimpleBareMetalDriver import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.collect import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Test @@ -53,12 +51,6 @@ internal class SimpleProvisioningServiceTest { val root = system.newDomain(name = "root") root.launch { val image = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1000, 2) - val monitor = object : ServerMonitor { - override fun stateChanged(server: Server, previousState: ServerState) { - println(server) - } - } - val dom = root.newDomain("provisioner") val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 4) @@ -69,7 +61,8 @@ internal class SimpleProvisioningServiceTest { provisioner.create(driver) delay(5) val nodes = provisioner.nodes() - provisioner.deploy(nodes.first(), image, monitor) + val node = provisioner.deploy(nodes.first(), image) + node.server!!.events.collect { println(it) } } runBlocking { diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt index 9ceaf704..d86045c0 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt @@ -26,16 +26,15 @@ package com.atlarge.opendc.compute.virt.driver.hypervisor import com.atlarge.odcsim.SimulationEngineProvider import com.atlarge.opendc.compute.core.ProcessingUnit -import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.ProcessingNode -import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.FlopsApplicationImage import com.atlarge.opendc.compute.metal.driver.SimpleBareMetalDriver -import com.atlarge.opendc.compute.metal.monitor.NodeMonitor import com.atlarge.opendc.compute.virt.driver.VirtDriver -import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor +import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Test @@ -49,6 +48,7 @@ internal class HypervisorTest { /** * A smoke test for the bare-metal driver. */ + @OptIn(ExperimentalCoroutinesApi::class) @Test fun smoke() { val provider = ServiceLoader.load(SimulationEngineProvider::class.java).first() @@ -56,24 +56,9 @@ internal class HypervisorTest { val root = system.newDomain("root") root.launch { - val vmm = HypervisorImage(object : HypervisorMonitor { - override suspend fun onSliceFinish( - time: Long, - requestedBurst: Long, - grantedBurst: Long, - numberOfDeployedImages: Int, - hostServer: Server - ) { - println("Hello World!") - } - }) + val vmm = HypervisorImage val workloadA = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1_000, 1) val workloadB = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 2_000, 1) - val monitor = object : NodeMonitor { - override fun stateChanged(server: Server, previousState: ServerState) { - println("$server") - } - } val driverDom = root.newDomain("driver") @@ -81,16 +66,17 @@ internal class HypervisorTest { val cpus = List(2) { ProcessingUnit(cpuNode, it, 2000.0) } val metalDriver = SimpleBareMetalDriver(driverDom, UUID.randomUUID(), "test", cpus, emptyList()) - metalDriver.init(monitor) + metalDriver.init() metalDriver.setImage(vmm) - metalDriver.start() + val node = metalDriver.start() + node.server?.events?.onEach { println(it) }?.launchIn(this) delay(5) val flavor = Flavor(1, 0) val vmDriver = metalDriver.refresh().server!!.services[VirtDriver] - vmDriver.spawn(workloadA, monitor, flavor) - vmDriver.spawn(workloadB, monitor, flavor) + vmDriver.spawn(workloadA, flavor).events.onEach { println(it) }.launchIn(this) + vmDriver.spawn(workloadB, flavor) } runBlocking { diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index efc85653..4273c39e 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -112,7 +112,6 @@ fun main(args: Array) { AvailableMemoryAllocationPolicy(), simulationContext, bareMetalProvisioner, - monitor ) val faultInjectorDomain = root.newDomain(name = "failures") @@ -133,7 +132,7 @@ fun main(args: Array) { val (time, workload) = reader.next() delay(max(0, time - simulationContext.clock.millis())) chan.send(Unit) - scheduler.deploy(workload.image, monitor, Flavor(workload.image.cores, workload.image.requiredMemory)) + scheduler.deploy(workload.image, Flavor(workload.image.cores, workload.image.requiredMemory)) } println(simulationContext.clock.instant()) -- cgit v1.2.3 From 76bfeb44c5a02be143c152c52bc1029cff360744 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sat, 21 Mar 2020 22:04:31 +0100 Subject: refactor: Migrate to Flow for event listeners --- .../kotlin/com/atlarge/odcsim/flow/StateFlow.kt | 21 +- .../opendc/compute/core/execution/ServerContext.kt | 2 +- .../com/atlarge/opendc/compute/metal/Node.kt | 4 +- .../opendc/compute/metal/driver/BareMetalDriver.kt | 5 + .../compute/metal/driver/SimpleBareMetalDriver.kt | 72 +++-- .../com/atlarge/opendc/compute/virt/Hypervisor.kt | 58 ++++ .../atlarge/opendc/compute/virt/HypervisorEvent.kt | 65 ++++ .../atlarge/opendc/compute/virt/HypervisorImage.kt | 57 ++++ .../driver/InsufficientMemoryOnServerException.kt | 3 + .../opendc/compute/virt/driver/SimpleVirtDriver.kt | 348 +++++++++++++++++++++ .../opendc/compute/virt/driver/VirtDriver.kt | 3 +- .../opendc/compute/virt/driver/VirtDriverEvent.kt | 59 ---- .../virt/driver/hypervisor/HypervisorImage.kt | 56 ---- .../virt/driver/hypervisor/HypervisorVirtDriver.kt | 342 -------------------- .../InsufficientMemoryOnServerException.kt | 3 - .../virt/service/SimpleVirtProvisioningService.kt | 36 ++- .../virt/service/VirtProvisioningService.kt | 3 +- .../virt/driver/hypervisor/HypervisorTest.kt | 7 +- .../opendc/experiments/sc18/TestExperiment.kt | 51 ++- .../atlarge/opendc/experiments/sc20/Sc20Monitor.kt | 8 +- .../opendc/experiments/sc20/TestExperiment.kt | 13 +- .../opendc/workflows/monitor/WorkflowMonitor.kt | 53 ---- .../atlarge/opendc/workflows/service/JobState.kt | 3 +- .../workflows/service/StageWorkflowService.kt | 96 +++--- .../opendc/workflows/service/WorkflowEvent.kt | 76 +++++ .../opendc/workflows/service/WorkflowService.kt | 9 +- .../StageWorkflowSchedulerIntegrationTest.kt | 40 ++- 27 files changed, 809 insertions(+), 684 deletions(-) create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/Hypervisor.kt create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorImage.kt create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/InsufficientMemoryOnServerException.kt create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverEvent.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt delete mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/InsufficientMemoryOnServerException.kt delete mode 100644 opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/monitor/WorkflowMonitor.kt create mode 100644 opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/WorkflowEvent.kt diff --git a/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt index 429d932b..0410bd95 100644 --- a/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt +++ b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt @@ -28,7 +28,7 @@ import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.FlowPreview import kotlinx.coroutines.InternalCoroutinesApi import kotlinx.coroutines.channels.BroadcastChannel -import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.channels.ConflatedBroadcastChannel import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.FlowCollector import kotlinx.coroutines.flow.asFlow @@ -58,31 +58,22 @@ public fun StateFlow(value: T): StateFlow = StateFlowImpl(value) /** * Internal implementation of the [StateFlow] interface. */ +@OptIn(ExperimentalCoroutinesApi::class, FlowPreview::class) private class StateFlowImpl(initialValue: T) : StateFlow { /** * The [BroadcastChannel] to back this flow. */ - @OptIn(ExperimentalCoroutinesApi::class) - private val chan = BroadcastChannel(Channel.CONFLATED) + private val chan = ConflatedBroadcastChannel(initialValue) /** * The internal [Flow] backing this flow. */ - @OptIn(FlowPreview::class) private val flow = chan.asFlow() - init { - @OptIn(ExperimentalCoroutinesApi::class) - chan.offer(initialValue) - } - - @OptIn(ExperimentalCoroutinesApi::class) - public override var value: T = initialValue + public override var value: T + get() = chan.value set(value) { - if (field != value) { - chan.offer(value) - field = value - } + chan.offer(value) } @InternalCoroutinesApi diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ServerContext.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ServerContext.kt index c8caaca6..e0a491c8 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ServerContext.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/execution/ServerContext.kt @@ -30,7 +30,7 @@ import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.core.services.ServiceKey /** - * Represents the execution context in which an bootable [Image] runs on a [Server]. + * Represents the execution context in which a bootable [Image] runs on a [Server]. */ public interface ServerContext { /** diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt index 8b8d1596..7cb4c0c5 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Node.kt @@ -33,7 +33,7 @@ import java.util.UUID /** * A bare-metal compute node. */ -data class Node( +public data class Node( /** * The unique identifier of the node. */ @@ -45,7 +45,7 @@ data class Node( public override val name: String, /** - * Meta data of the node. + * Metadata of the node. */ public val metadata: Map, diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt index 5d1db378..41cec291 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/BareMetalDriver.kt @@ -37,6 +37,11 @@ import java.util.UUID * A driver interface for the management interface of a bare-metal compute node. */ public interface BareMetalDriver : Powerable, FailureDomain { + /** + * The [Node] that is controlled by this driver. + */ + public val node: Flow + /** * The amount of work done by the machine in percentage with respect to the total amount of processing power * available. diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 49c3fa2e..67069c03 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -48,10 +48,13 @@ import com.atlarge.opendc.core.services.ServiceKey import com.atlarge.opendc.core.services.ServiceRegistry import kotlinx.coroutines.CancellationException import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.Job import kotlinx.coroutines.cancel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.scanReduce import kotlinx.coroutines.launch import java.util.UUID import kotlin.math.ceil @@ -96,33 +99,40 @@ public class SimpleBareMetalDriver( /** * The flow containing the load of the server. */ - private val usageSignal = StateFlow(0.0) + private val usageState = StateFlow(0.0) /** * The machine state. */ - private var node: Node = Node(uid, name, mapOf("driver" to this), NodeState.SHUTOFF, EmptyImage, null, events) - set(value) { + private val nodeState = StateFlow(Node(uid, name, mapOf("driver" to this), NodeState.SHUTOFF, EmptyImage, null, events)) + + override val node: Flow = nodeState + + override val usage: Flow = usageState + + override val powerDraw: Flow = powerModel(this) + + init { + @OptIn(ExperimentalCoroutinesApi::class) + nodeState.scanReduce { field, value -> if (field.state != value.state) { events.emit(NodeEvent.StateChanged(value, field.state)) } - if (field.server != null && value.server != null && field.server!!.state != value.server.state) { - serverContext!!.events.emit(ServerEvent.StateChanged(value.server, field.server!!.state)) + if (field.server != null && value.server != null && field.server.state != value.server.state) { + serverContext!!.events.emit(ServerEvent.StateChanged(value.server, field.server.state)) } - field = value - } - - override val usage: Flow = usageSignal - - override val powerDraw: Flow = powerModel(this) + value + }.launchIn(domain) + } override suspend fun init(): Node = withContext(domain.coroutineContext) { - node + nodeState.value } override suspend fun start(): Node = withContext(domain.coroutineContext) { + val node = nodeState.value if (node.state != NodeState.SHUTOFF) { return@withContext node } @@ -139,12 +149,13 @@ public class SimpleBareMetalDriver( events ) - node = node.copy(state = NodeState.BOOT, server = server) + nodeState.value = node.copy(state = NodeState.BOOT, server = server) serverContext = BareMetalServerContext(events) - return@withContext node + return@withContext nodeState.value } override suspend fun stop(): Node = withContext(domain.coroutineContext) { + val node = nodeState.value if (node.state == NodeState.SHUTOFF) { return@withContext node } @@ -153,7 +164,7 @@ public class SimpleBareMetalDriver( serverContext!!.cancel(fail = false) serverContext = null - node = node.copy(state = NodeState.SHUTOFF, server = null) + nodeState.value = node.copy(state = NodeState.SHUTOFF, server = null) return@withContext node } @@ -163,11 +174,11 @@ public class SimpleBareMetalDriver( } override suspend fun setImage(image: Image): Node = withContext(domain.coroutineContext) { - node = node.copy(image = image) - return@withContext node + nodeState.value = nodeState.value.copy(image = image) + return@withContext nodeState.value } - override suspend fun refresh(): Node = withContext(domain.coroutineContext) { node } + override suspend fun refresh(): Node = withContext(domain.coroutineContext) { nodeState.value } private inner class BareMetalServerContext(val events: EventFlow) : ServerManagementContext { private var finalized: Boolean = false @@ -175,7 +186,7 @@ public class SimpleBareMetalDriver( override val cpus: List = this@SimpleBareMetalDriver.cpus override val server: Server - get() = node.server!! + get() = nodeState.value.server!! private val job = domain.launch { delay(1) // TODO Introduce boot time @@ -193,15 +204,15 @@ public class SimpleBareMetalDriver( */ suspend fun cancel(fail: Boolean) { if (fail) - domain.cancel(ShutdownException(cause = Exception("Random failure"))) + job.cancel(ShutdownException(cause = Exception("Random failure"))) else - domain.cancel(ShutdownException()) + job.cancel(ShutdownException()) job.join() } override suspend fun publishService(key: ServiceKey, service: T) { val server = server.copy(services = server.services.put(key, service)) - node = node.copy(server = server) + nodeState.value = nodeState.value.copy(server = server) events.emit(ServerEvent.ServicePublished(server, key)) } @@ -209,24 +220,24 @@ public class SimpleBareMetalDriver( assert(!finalized) { "Machine is already finalized" } val server = server.copy(state = ServerState.ACTIVE) - node = node.copy(state = NodeState.ACTIVE, server = server) + nodeState.value = nodeState.value.copy(state = NodeState.ACTIVE, server = server) } override suspend fun exit(cause: Throwable?) { finalized = true - val serverState = + val newServerState = if (cause == null || (cause is ShutdownException && cause.cause == null)) ServerState.SHUTOFF else ServerState.ERROR - val nodeState = + val newNodeState = if (cause == null || (cause is ShutdownException && cause.cause != null)) - node.state + nodeState.value.state else NodeState.ERROR - val server = server.copy(state = serverState) - node = node.copy(state = nodeState, server = server) + val server = server.copy(state = newServerState) + nodeState.value = nodeState.value.copy(state = newNodeState, server = server) } private var flush: Job? = null @@ -256,7 +267,7 @@ public class SimpleBareMetalDriver( } } - usageSignal.value = totalUsage / cpus.size + usageState.value = totalUsage / cpus.size try { delay(duration) @@ -269,7 +280,7 @@ public class SimpleBareMetalDriver( // Flush the load if the do not receive a new run call for the same timestamp flush = domain.launch(job) { delay(1) - usageSignal.value = 0.0 + usageState.value = 0.0 } flush!!.invokeOnCompletion { flush = null @@ -289,5 +300,6 @@ public class SimpleBareMetalDriver( override suspend fun fail() { serverContext?.cancel(fail = true) + domain.cancel() } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/Hypervisor.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/Hypervisor.kt new file mode 100644 index 00000000..69b0124d --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/Hypervisor.kt @@ -0,0 +1,58 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.virt + +import com.atlarge.opendc.core.Identity +import kotlinx.coroutines.flow.Flow +import java.util.UUID + +/** + * A hypervisor (or virtual machine monitor) is software or firmware that virtualizes the host compute environment + * into several virtual guest machines. + */ +public class Hypervisor( + /** + * The unique identifier of the hypervisor. + */ + override val uid: UUID, + + /** + * The optional name of the hypervisor. + */ + override val name: String, + + /** + * Metadata of the hypervisor. + */ + public val metadata: Map, + + /** + * The events that are emitted by the hypervisor. + */ + public val events: Flow +) : Identity { + override fun hashCode(): Int = uid.hashCode() + override fun equals(other: Any?): Boolean = other is Hypervisor && uid == other.uid +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt new file mode 100644 index 00000000..3230c2ba --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt @@ -0,0 +1,65 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.virt + +import com.atlarge.opendc.compute.virt.driver.VirtDriver + +/** + * An event that is emitted by a [VirtDriver]. + */ +public sealed class HypervisorEvent { + /** + * The driver that emitted the event. + */ + public abstract val driver: VirtDriver + + /** + * This event is emitted when the number of active servers on the server managed by this driver is updated. + * + * @property driver The driver that emitted the event. + * @property numberOfActiveServers The number of active servers. + * @property availableMemory The available memory, in MB. + */ + public data class VmsUpdated( + override val driver: VirtDriver, + public val numberOfActiveServers: Int, + public val availableMemory: Long + ) : HypervisorEvent() + + /** + * This event is emitted when a slice is finished. + * + * @property driver The driver that emitted the event. + * @property requestedBurst The total requested CPU time (can be above capacity). + * @property grantedBurst The actual total granted capacity. + * @property numberOfDeployedImages The number of images deployed on this hypervisor. + */ + public data class SliceFinished( + override val driver: VirtDriver, + public val requestedBurst: Long, + public val grantedBurst: Long, + public val numberOfDeployedImages: Int + ) : HypervisorEvent() +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorImage.kt new file mode 100644 index 00000000..c21b002d --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorImage.kt @@ -0,0 +1,57 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.virt + +import com.atlarge.opendc.compute.core.execution.ServerContext +import com.atlarge.opendc.compute.core.image.Image +import com.atlarge.opendc.compute.virt.driver.SimpleVirtDriver +import com.atlarge.opendc.compute.virt.driver.VirtDriver +import com.atlarge.opendc.core.resource.TagContainer +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.suspendCancellableCoroutine +import java.util.UUID + +/** + * A hypervisor managing the VMs of a node. + */ +object HypervisorImage : Image { + override val uid: UUID = UUID.randomUUID() + override val name: String = "vmm" + override val tags: TagContainer = emptyMap() + + override suspend fun invoke(ctx: ServerContext) { + coroutineScope { + val driver = SimpleVirtDriver(ctx, this) + ctx.publishService(VirtDriver.Key, driver) + + // Suspend image until it is cancelled + try { + suspendCancellableCoroutine {} + } finally { + driver.eventFlow.close() + } + } + } +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/InsufficientMemoryOnServerException.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/InsufficientMemoryOnServerException.kt new file mode 100644 index 00000000..0586ae00 --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/InsufficientMemoryOnServerException.kt @@ -0,0 +1,3 @@ +package com.atlarge.opendc.compute.virt.driver + +public class InsufficientMemoryOnServerException : IllegalStateException("Insufficient memory left on server.") diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt new file mode 100644 index 00000000..fc4c7634 --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt @@ -0,0 +1,348 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.virt.driver + +import com.atlarge.odcsim.Domain +import com.atlarge.odcsim.flow.EventFlow +import com.atlarge.odcsim.simulationContext +import com.atlarge.opendc.compute.core.Flavor +import com.atlarge.opendc.compute.core.ProcessingUnit +import com.atlarge.opendc.compute.core.Server +import com.atlarge.opendc.compute.core.ServerEvent +import com.atlarge.opendc.compute.core.ServerState +import com.atlarge.opendc.compute.core.execution.ServerContext +import com.atlarge.opendc.compute.core.execution.ServerManagementContext +import com.atlarge.opendc.compute.core.execution.ShutdownException +import com.atlarge.opendc.compute.core.execution.assertFailure +import com.atlarge.opendc.compute.core.image.Image +import com.atlarge.opendc.compute.virt.HypervisorEvent +import com.atlarge.opendc.core.services.ServiceKey +import com.atlarge.opendc.core.services.ServiceRegistry +import com.atlarge.opendc.core.workload.IMAGE_PERF_INTERFERENCE_MODEL +import com.atlarge.opendc.core.workload.PerformanceInterferenceModel +import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.FlowPreview +import kotlinx.coroutines.Job +import kotlinx.coroutines.cancelAndJoin +import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.launch +import java.util.UUID +import kotlin.math.ceil +import kotlin.math.max +import kotlin.math.min + +/** + * A [VirtDriver] that is backed by a simple hypervisor implementation. + */ +@OptIn(ExperimentalCoroutinesApi::class, FlowPreview::class) +class SimpleVirtDriver( + private val hostContext: ServerContext, + private val coroutineScope: CoroutineScope +) : VirtDriver { + /** + * The [Server] on which this hypervisor runs. + */ + public val server: Server + get() = hostContext.server + + /** + * A set for tracking the VM context objects. + */ + internal val vms: MutableSet = mutableSetOf() + + /** + * Current total memory use of the images on this hypervisor. + */ + private var availableMemory: Long = hostContext.server.flavor.memorySize + + /** + * The [EventFlow] to emit the events. + */ + internal val eventFlow = EventFlow() + + override val events: Flow = eventFlow + + override suspend fun spawn( + image: Image, + flavor: Flavor + ): Server { + val requiredMemory = flavor.memorySize + if (availableMemory - requiredMemory < 0) { + throw InsufficientMemoryOnServerException() + } + require(flavor.cpuCount <= hostContext.server.flavor.cpuCount) { "Machine does not fit" } + + val events = EventFlow() + val server = Server( + UUID.randomUUID(), "", emptyMap(), flavor, image, ServerState.BUILD, + ServiceRegistry(), events + ) + availableMemory -= requiredMemory + vms.add(VmServerContext(server, events, simulationContext.domain)) + eventFlow.emit(HypervisorEvent.VmsUpdated(this, vms.size, availableMemory)) + return server + } + + /** + * A flag to indicate the driver is stopped. + */ + private var stopped: Boolean = false + + /** + * The set of [VmServerContext] instances that is being scheduled at the moment. + */ + private val activeVms = mutableSetOf() + + /** + * The deferred run call. + */ + private var call: Job? = null + + /** + * Schedule the vCPUs on the physical CPUs. + */ + private suspend fun reschedule() { + flush() + + // Do not schedule a call if there is no work to schedule or the driver stopped. + if (stopped || activeVms.isEmpty()) { + return + } + + val call = coroutineScope.launch { + val start = simulationContext.clock.millis() + val vms = activeVms.toSet() + + var duration: Double = Double.POSITIVE_INFINITY + var deadline: Long = Long.MAX_VALUE + + val maxUsage = hostContext.cpus.sumByDouble { it.frequency } + var availableUsage = maxUsage + val requests = vms.asSequence() + .flatMap { it.requests.asSequence() } + .sortedBy { it.limit } + .toList() + + // Divide the available host capacity fairly across the vCPUs using max-min fair sharing + for ((i, req) in requests.withIndex()) { + val remaining = requests.size - i + val availableShare = availableUsage / remaining + val grantedUsage = min(req.limit, availableShare) + + req.allocatedUsage = grantedUsage + availableUsage -= grantedUsage + + // The duration that we want to run is that of the shortest request from a vCPU + duration = min(duration, req.burst / req.allocatedUsage) + deadline = min(deadline, req.vm.deadline) + } + + val usage = DoubleArray(hostContext.cpus.size) + val burst = LongArray(hostContext.cpus.size) + val totalUsage = maxUsage - availableUsage + availableUsage = totalUsage + val serverLoad = totalUsage / maxUsage + + // Divide the requests over the available capacity of the pCPUs fairly + for (i in hostContext.cpus.indices.sortedBy { hostContext.cpus[it].frequency }) { + val remaining = hostContext.cpus.size - i + val availableShare = availableUsage / remaining + val grantedUsage = min(hostContext.cpus[i].frequency, availableShare) + + usage[i] = grantedUsage + burst[i] = (duration * grantedUsage).toLong() + availableUsage -= grantedUsage + } + + val remainder = burst.clone() + // We run the total burst on the host processor. Note that this call may be cancelled at any moment in + // time, so not all of the burst may be executed. + hostContext.run(remainder, usage, deadline) + val end = simulationContext.clock.millis() + + // No work was performed + if ((end - start) <= 0) { + return@launch + } + + val totalRemainder = remainder.sum() + val totalBurst = burst.sum() + val imagesRunning = vms.map { it.server.image }.toSet() + + for (vm in vms) { + // Apply performance interference model + val performanceModel = + vm.server.image.tags[IMAGE_PERF_INTERFERENCE_MODEL] as? PerformanceInterferenceModel? + val performanceScore = performanceModel?.apply(imagesRunning, serverLoad) ?: 1.0 + + for ((i, req) in vm.requests.withIndex()) { + // Compute the fraction of compute time allocated to the VM + val fraction = req.allocatedUsage / totalUsage + + // Derive the burst that was allocated to this vCPU + val allocatedBurst = ceil(duration * req.allocatedUsage).toLong() + + // Compute the burst time that the VM was actually granted + val grantedBurst = (performanceScore * (allocatedBurst - ceil(totalRemainder * fraction))).toLong() + + // Compute remaining burst time to be executed for the request + req.burst = max(0, vm.burst[i] - grantedBurst) + vm.burst[i] = req.burst + } + + if (vm.burst.any { it == 0L } || vm.deadline <= end) { + // Return vCPU `run` call: the requested burst was completed or deadline was exceeded + vm.chan.send(Unit) + } + } + + eventFlow.emit(HypervisorEvent.SliceFinished(this@SimpleVirtDriver, totalBurst, totalBurst - totalRemainder, vms.size)) + } + this.call = call + } + + /** + * Flush the progress of the current active VMs. + */ + private suspend fun flush() { + val call = call ?: return // If there is no active call, there is nothing to flush + // The progress is actually flushed in the coroutine when it notices: we cancel it and wait for its + // completion. + call.cancelAndJoin() + this.call = null + } + + /** + * A request to schedule a virtual CPU on the host cpu. + */ + internal data class CpuRequest( + val vm: VmServerContext, + val vcpu: ProcessingUnit, + var burst: Long, + val limit: Double + ) { + /** + * The usage that was actually granted. + */ + var allocatedUsage: Double = 0.0 + } + + internal inner class VmServerContext( + server: Server, + val events: EventFlow, + val domain: Domain + ) : ServerManagementContext { + private var finalized: Boolean = false + lateinit var requests: List + lateinit var burst: LongArray + var deadline: Long = 0L + var chan = Channel(Channel.RENDEZVOUS) + private var initialized: Boolean = false + + internal val job: Job = coroutineScope.launch { + delay(1) // TODO Introduce boot time + init() + try { + server.image(this@VmServerContext) + exit() + } catch (cause: Throwable) { + exit(cause) + } + } + + override var server: Server = server + set(value) { + if (field.state != value.state) { + events.emit(ServerEvent.StateChanged(value, field.state)) + } + + field = value + } + + override val cpus: List = hostContext.cpus.take(server.flavor.cpuCount) + + override suspend fun publishService(key: ServiceKey, service: T) { + server = server.copy(services = server.services.put(key, service)) + events.emit(ServerEvent.ServicePublished(server, key)) + } + + override suspend fun init() { + assert(!finalized) { "VM is already finalized" } + + server = server.copy(state = ServerState.ACTIVE) + initialized = true + } + + override suspend fun exit(cause: Throwable?) { + finalized = true + + val serverState = + if (cause == null || (cause is ShutdownException && cause.cause == null)) + ServerState.SHUTOFF + else + ServerState.ERROR + server = server.copy(state = serverState) + availableMemory += server.flavor.memorySize + vms.remove(this) + events.close() + eventFlow.emit(HypervisorEvent.VmsUpdated(this@SimpleVirtDriver, vms.size, availableMemory)) + } + + override suspend fun run(burst: LongArray, limit: DoubleArray, deadline: Long) { + require(burst.size == limit.size) { "Array dimensions do not match" } + + this.deadline = deadline + this.burst = burst + requests = cpus.asSequence() + .take(burst.size) + .mapIndexed { i, cpu -> + CpuRequest( + this, + cpu, + burst[i], + limit[i] + ) + } + .toList() + + // Wait until the burst has been run or the coroutine is cancelled + try { + activeVms += this + reschedule() + chan.receive() + } catch (e: CancellationException) { + // On cancellation, we compute and return the remaining burst + e.assertFailure() + } finally { + activeVms -= this + reschedule() + } + } + } +} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt index 296f170e..d7ae0c12 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt @@ -27,6 +27,7 @@ package com.atlarge.opendc.compute.virt.driver import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image +import com.atlarge.opendc.compute.virt.HypervisorEvent import com.atlarge.opendc.core.services.AbstractServiceKey import kotlinx.coroutines.flow.Flow import java.util.UUID @@ -39,7 +40,7 @@ public interface VirtDriver { /** * The events emitted by the driver. */ - public val events: Flow + public val events: Flow /** * Spawn the given [Image] on the compute resource of this driver. diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverEvent.kt deleted file mode 100644 index ccbe8b3c..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriverEvent.kt +++ /dev/null @@ -1,59 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.compute.virt.driver - -/** - * An event that is emitted by a [VirtDriver]. - */ -public sealed class VirtDriverEvent { - /** - * The driver that emitted the event. - */ - public abstract val driver: VirtDriver - - /** - * This event is emitted when the number of active servers on the server managed by this driver is updated. - * - * @property driver The driver that emitted the event. - * @property numberOfActiveServers The number of active servers. - * @property availableMemory The available memory, in MB. - */ - public data class VmsUpdated(override val driver: VirtDriver, public val numberOfActiveServers: Int, public val availableMemory: Long) : VirtDriverEvent() - - /** - * This event is emitted when a slice is finished. - * - * @property driver The driver that emitted the event. - * @property requestedBurst The total requested CPU time (can be above capacity). - * @property grantedBurst The actual total granted capacity. - * @property numberOfDeployedImages The number of images deployed on this hypervisor. - */ - public data class SliceFinished( - override val driver: VirtDriver, - public val requestedBurst: Long, - public val grantedBurst: Long, - public val numberOfDeployedImages: Int - ) : VirtDriverEvent() -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt deleted file mode 100644 index 1eb0e0ff..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorImage.kt +++ /dev/null @@ -1,56 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.compute.virt.driver.hypervisor - -import com.atlarge.opendc.compute.core.execution.ServerContext -import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.virt.driver.VirtDriver -import com.atlarge.opendc.core.resource.TagContainer -import kotlinx.coroutines.coroutineScope -import kotlinx.coroutines.suspendCancellableCoroutine -import java.util.UUID - -/** - * A hypervisor managing the VMs of a node. - */ -object HypervisorImage : Image { - override val uid: UUID = UUID.randomUUID() - override val name: String = "vmm" - override val tags: TagContainer = emptyMap() - - override suspend fun invoke(ctx: ServerContext) { - coroutineScope { - val driver = HypervisorVirtDriver(ctx, this) - ctx.publishService(VirtDriver.Key, driver) - - // Suspend image until it is cancelled - try { - suspendCancellableCoroutine {} - } finally { - driver.eventFlow.close() - } - } - } -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt deleted file mode 100644 index 0b4a7109..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorVirtDriver.kt +++ /dev/null @@ -1,342 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.compute.virt.driver.hypervisor - -import com.atlarge.odcsim.Domain -import com.atlarge.odcsim.flow.EventFlow -import com.atlarge.odcsim.simulationContext -import com.atlarge.opendc.compute.core.Flavor -import com.atlarge.opendc.compute.core.ProcessingUnit -import com.atlarge.opendc.compute.core.Server -import com.atlarge.opendc.compute.core.ServerEvent -import com.atlarge.opendc.compute.core.ServerState -import com.atlarge.opendc.compute.core.execution.ServerContext -import com.atlarge.opendc.compute.core.execution.ServerManagementContext -import com.atlarge.opendc.compute.core.execution.ShutdownException -import com.atlarge.opendc.compute.core.execution.assertFailure -import com.atlarge.opendc.compute.core.image.Image -import com.atlarge.opendc.compute.virt.driver.VirtDriver -import com.atlarge.opendc.compute.virt.driver.VirtDriverEvent -import com.atlarge.opendc.core.services.ServiceKey -import com.atlarge.opendc.core.services.ServiceRegistry -import com.atlarge.opendc.core.workload.IMAGE_PERF_INTERFERENCE_MODEL -import com.atlarge.opendc.core.workload.PerformanceInterferenceModel -import kotlinx.coroutines.CancellationException -import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.ExperimentalCoroutinesApi -import kotlinx.coroutines.FlowPreview -import kotlinx.coroutines.Job -import kotlinx.coroutines.cancelAndJoin -import kotlinx.coroutines.channels.Channel -import kotlinx.coroutines.delay -import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.launch -import java.util.UUID -import kotlin.math.ceil -import kotlin.math.max -import kotlin.math.min - -/** - * A [VirtDriver] that is backed by a simple hypervisor implementation. - */ -@OptIn(ExperimentalCoroutinesApi::class, FlowPreview::class) -class HypervisorVirtDriver( - private val hostContext: ServerContext, - private val coroutineScope: CoroutineScope -) : VirtDriver { - /** - * The [Server] on which this hypervisor runs. - */ - public val server: Server - get() = hostContext.server - - /** - * A set for tracking the VM context objects. - */ - internal val vms: MutableSet = mutableSetOf() - - /** - * Current total memory use of the images on this hypervisor. - */ - private var availableMemory: Long = hostContext.server.flavor.memorySize - - /** - * The [EventFlow] to emit the events. - */ - internal val eventFlow = EventFlow() - - override val events: Flow = eventFlow - - override suspend fun spawn( - image: Image, - flavor: Flavor - ): Server { - val requiredMemory = flavor.memorySize - if (availableMemory - requiredMemory < 0) { - throw InsufficientMemoryOnServerException() - } - require(flavor.cpuCount <= hostContext.server.flavor.cpuCount) { "Machine does not fit" } - - val events = EventFlow() - val server = Server( - UUID.randomUUID(), "", emptyMap(), flavor, image, ServerState.BUILD, - ServiceRegistry(), events - ) - availableMemory -= requiredMemory - vms.add(VmServerContext(server, events, simulationContext.domain)) - eventFlow.emit(VirtDriverEvent.VmsUpdated(this, vms.size, availableMemory)) - return server - } - - /** - * A flag to indicate the driver is stopped. - */ - private var stopped: Boolean = false - - /** - * The set of [VmServerContext] instances that is being scheduled at the moment. - */ - private val activeVms = mutableSetOf() - - /** - * The deferred run call. - */ - private var call: Job? = null - - /** - * Schedule the vCPUs on the physical CPUs. - */ - private suspend fun reschedule() { - flush() - - // Do not schedule a call if there is no work to schedule or the driver stopped. - if (stopped || activeVms.isEmpty()) { - return - } - - val call = coroutineScope.launch { - val start = simulationContext.clock.millis() - val vms = activeVms.toSet() - - var duration: Double = Double.POSITIVE_INFINITY - var deadline: Long = Long.MAX_VALUE - - val maxUsage = hostContext.cpus.sumByDouble { it.frequency } - var availableUsage = maxUsage - val requests = vms.asSequence() - .flatMap { it.requests.asSequence() } - .sortedBy { it.limit } - .toList() - - // Divide the available host capacity fairly across the vCPUs using max-min fair sharing - for ((i, req) in requests.withIndex()) { - val remaining = requests.size - i - val availableShare = availableUsage / remaining - val grantedUsage = min(req.limit, availableShare) - - req.allocatedUsage = grantedUsage - availableUsage -= grantedUsage - - // The duration that we want to run is that of the shortest request from a vCPU - duration = min(duration, req.burst / req.allocatedUsage) - deadline = min(deadline, req.vm.deadline) - } - - val usage = DoubleArray(hostContext.cpus.size) - val burst = LongArray(hostContext.cpus.size) - val totalUsage = maxUsage - availableUsage - availableUsage = totalUsage - val serverLoad = totalUsage / maxUsage - - // Divide the requests over the available capacity of the pCPUs fairly - for (i in hostContext.cpus.indices.sortedBy { hostContext.cpus[it].frequency }) { - val remaining = hostContext.cpus.size - i - val availableShare = availableUsage / remaining - val grantedUsage = min(hostContext.cpus[i].frequency, availableShare) - - usage[i] = grantedUsage - burst[i] = (duration * grantedUsage).toLong() - availableUsage -= grantedUsage - } - - val remainder = burst.clone() - // We run the total burst on the host processor. Note that this call may be cancelled at any moment in - // time, so not all of the burst may be executed. - hostContext.run(remainder, usage, deadline) - val end = simulationContext.clock.millis() - - // No work was performed - if ((end - start) <= 0) { - return@launch - } - - val totalRemainder = remainder.sum() - val totalBurst = burst.sum() - val imagesRunning = vms.map { it.server.image }.toSet() - - for (vm in vms) { - // Apply performance interference model - val performanceModel = - vm.server.image.tags[IMAGE_PERF_INTERFERENCE_MODEL] as? PerformanceInterferenceModel? - val performanceScore = performanceModel?.apply(imagesRunning, serverLoad) ?: 1.0 - - for ((i, req) in vm.requests.withIndex()) { - // Compute the fraction of compute time allocated to the VM - val fraction = req.allocatedUsage / totalUsage - - // Derive the burst that was allocated to this vCPU - val allocatedBurst = ceil(duration * req.allocatedUsage).toLong() - - // Compute the burst time that the VM was actually granted - val grantedBurst = (performanceScore * (allocatedBurst - ceil(totalRemainder * fraction))).toLong() - - // Compute remaining burst time to be executed for the request - req.burst = max(0, vm.burst[i] - grantedBurst) - vm.burst[i] = req.burst - } - - if (vm.burst.any { it == 0L } || vm.deadline <= end) { - // Return vCPU `run` call: the requested burst was completed or deadline was exceeded - vm.chan.send(Unit) - } - } - - eventFlow.emit(VirtDriverEvent.SliceFinished(this@HypervisorVirtDriver, totalBurst, totalBurst - totalRemainder, vms.size)) - } - this.call = call - } - - /** - * Flush the progress of the current active VMs. - */ - private suspend fun flush() { - val call = call ?: return // If there is no active call, there is nothing to flush - // The progress is actually flushed in the coroutine when it notices: we cancel it and wait for its - // completion. - call.cancelAndJoin() - this.call = null - } - - /** - * A request to schedule a virtual CPU on the host cpu. - */ - internal data class CpuRequest( - val vm: VmServerContext, - val vcpu: ProcessingUnit, - var burst: Long, - val limit: Double - ) { - /** - * The usage that was actually granted. - */ - var allocatedUsage: Double = 0.0 - } - - internal inner class VmServerContext( - server: Server, - val events: EventFlow, - val domain: Domain - ) : ServerManagementContext { - private var finalized: Boolean = false - lateinit var requests: List - lateinit var burst: LongArray - var deadline: Long = 0L - var chan = Channel(Channel.RENDEZVOUS) - private var initialized: Boolean = false - - internal val job: Job = coroutineScope.launch { - delay(1) // TODO Introduce boot time - init() - try { - server.image(this@VmServerContext) - exit() - } catch (cause: Throwable) { - exit(cause) - } - } - - override var server: Server = server - set(value) { - if (field.state != value.state) { - events.emit(ServerEvent.StateChanged(value, field.state)) - } - - field = value - } - - override val cpus: List = hostContext.cpus.take(server.flavor.cpuCount) - - override suspend fun publishService(key: ServiceKey, service: T) { - server = server.copy(services = server.services.put(key, service)) - events.emit(ServerEvent.ServicePublished(server, key)) - } - - override suspend fun init() { - assert(!finalized) { "VM is already finalized" } - - server = server.copy(state = ServerState.ACTIVE) - initialized = true - } - - override suspend fun exit(cause: Throwable?) { - finalized = true - - val serverState = - if (cause == null || (cause is ShutdownException && cause.cause == null)) - ServerState.SHUTOFF - else - ServerState.ERROR - server = server.copy(state = serverState) - availableMemory += server.flavor.memorySize - vms.remove(this) - events.close() - eventFlow.emit(VirtDriverEvent.VmsUpdated(this@HypervisorVirtDriver, vms.size, availableMemory)) - } - - override suspend fun run(burst: LongArray, limit: DoubleArray, deadline: Long) { - require(burst.size == limit.size) { "Array dimensions do not match" } - - this.deadline = deadline - this.burst = burst - requests = cpus.asSequence() - .take(burst.size) - .mapIndexed { i, cpu -> CpuRequest(this, cpu, burst[i], limit[i]) } - .toList() - - // Wait until the burst has been run or the coroutine is cancelled - try { - activeVms += this - reschedule() - chan.receive() - } catch (e: CancellationException) { - // On cancellation, we compute and return the remaining burst - e.assertFailure() - } finally { - activeVms -= this - reschedule() - } - } - } -} diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/InsufficientMemoryOnServerException.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/InsufficientMemoryOnServerException.kt deleted file mode 100644 index 926234b5..00000000 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/InsufficientMemoryOnServerException.kt +++ /dev/null @@ -1,3 +0,0 @@ -package com.atlarge.opendc.compute.virt.driver.hypervisor - -public class InsufficientMemoryOnServerException : IllegalStateException("Insufficient memory left on server.") diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index 8365f8c9..8393dfa9 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -8,22 +8,26 @@ import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.driver.VirtDriver -import com.atlarge.opendc.compute.virt.driver.hypervisor.HypervisorImage -import com.atlarge.opendc.compute.virt.driver.hypervisor.InsufficientMemoryOnServerException +import com.atlarge.opendc.compute.virt.HypervisorImage +import com.atlarge.opendc.compute.virt.driver.InsufficientMemoryOnServerException import com.atlarge.opendc.compute.virt.service.allocation.AllocationPolicy import com.atlarge.opendc.core.services.ServiceKey +import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.Job -import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.collect import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch +import kotlinx.coroutines.suspendCancellableCoroutine +import kotlin.coroutines.Continuation +import kotlin.coroutines.resume @OptIn(ExperimentalCoroutinesApi::class) class SimpleVirtProvisioningService( public override val allocationPolicy: AllocationPolicy, private val ctx: SimulationContext, private val provisioningService: ProvisioningService -) : VirtProvisioningService { +) : VirtProvisioningService, CoroutineScope by ctx.domain { /** * The hypervisors that have been launched by the service. */ @@ -45,18 +49,17 @@ class SimpleVirtProvisioningService( private val activeImages: MutableSet = mutableSetOf() init { - ctx.domain.launch { + launch { val provisionedNodes = provisioningService.nodes() provisionedNodes.forEach { node -> val hypervisorImage = HypervisorImage val node = provisioningService.deploy(node, hypervisorImage) node.server!!.events.onEach { event -> - when (event) { - is ServerEvent.StateChanged -> stateChanged(event.server, event.previousState) - is ServerEvent.ServicePublished -> servicePublished(event.server, event.key) - } + when (event) { + is ServerEvent.StateChanged -> stateChanged(event.server) + is ServerEvent.ServicePublished -> servicePublished(event.server, event.key) } - .launchIn(ctx.domain) + }.collect() } } } @@ -64,8 +67,8 @@ class SimpleVirtProvisioningService( override suspend fun deploy( image: Image, flavor: Flavor - ) { - val vmInstance = ImageView(image, flavor) + ): Server = suspendCancellableCoroutine { cont -> + val vmInstance = ImageView(image, flavor, cont) incomingImages += vmInstance requestCycle() } @@ -77,7 +80,7 @@ class SimpleVirtProvisioningService( return } - val call = ctx.domain.launch { + val call = launch { schedule() } call.invokeOnCompletion { this.call = null } @@ -92,10 +95,12 @@ class SimpleVirtProvisioningService( try { println("Spawning ${imageInstance.image}") incomingImages -= imageInstance - imageInstance.server = selectedHv.driver.spawn( + val server = selectedHv.driver.spawn( imageInstance.image, imageInstance.flavor ) + imageInstance.server = server + imageInstance.continuation.resume(server) activeImages += imageInstance } catch (e: InsufficientMemoryOnServerException) { println("Unable to deploy image due to insufficient memory") @@ -103,7 +108,7 @@ class SimpleVirtProvisioningService( } } - private fun stateChanged(server: Server, previousState: ServerState) { + private fun stateChanged(server: Server) { when (server.state) { ServerState.ACTIVE -> { val hvView = HypervisorView( @@ -134,6 +139,7 @@ class SimpleVirtProvisioningService( data class ImageView( val image: Image, val flavor: Flavor, + val continuation: Continuation, var server: Server? = null ) } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt index da72d742..12543ce3 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt @@ -1,6 +1,7 @@ package com.atlarge.opendc.compute.virt.service import com.atlarge.opendc.compute.core.Flavor +import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image import com.atlarge.opendc.compute.virt.service.allocation.AllocationPolicy @@ -16,5 +17,5 @@ interface VirtProvisioningService { * @param image The image to be deployed. * @param flavor The flavor of the machine instance to run this [image] on. */ - public suspend fun deploy(image: Image, flavor: Flavor) + public suspend fun deploy(image: Image, flavor: Flavor): Server } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt index d86045c0..bcaafb59 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt @@ -30,6 +30,7 @@ import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.ProcessingNode import com.atlarge.opendc.compute.core.image.FlopsApplicationImage import com.atlarge.opendc.compute.metal.driver.SimpleBareMetalDriver +import com.atlarge.opendc.compute.virt.HypervisorImage import com.atlarge.opendc.compute.virt.driver.VirtDriver import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.delay @@ -75,8 +76,10 @@ internal class HypervisorTest { val flavor = Flavor(1, 0) val vmDriver = metalDriver.refresh().server!!.services[VirtDriver] - vmDriver.spawn(workloadA, flavor).events.onEach { println(it) }.launchIn(this) - vmDriver.spawn(workloadB, flavor) + val vmA = vmDriver.spawn(workloadA, flavor) + vmA.events.onEach { println(it) }.launchIn(this) + val vmB = vmDriver.spawn(workloadB, flavor) + vmB.events.onEach { println(it) }.launchIn(this) } runBlocking { diff --git a/opendc/opendc-experiments-sc18/src/main/kotlin/com/atlarge/opendc/experiments/sc18/TestExperiment.kt b/opendc/opendc-experiments-sc18/src/main/kotlin/com/atlarge/opendc/experiments/sc18/TestExperiment.kt index d5e1404a..b0182ab3 100644 --- a/opendc/opendc-experiments-sc18/src/main/kotlin/com/atlarge/opendc/experiments/sc18/TestExperiment.kt +++ b/opendc/opendc-experiments-sc18/src/main/kotlin/com/atlarge/opendc/experiments/sc18/TestExperiment.kt @@ -29,8 +29,8 @@ import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.format.environment.sc18.Sc18EnvironmentReader import com.atlarge.opendc.format.trace.gwf.GwfTraceReader -import com.atlarge.opendc.workflows.monitor.WorkflowMonitor import com.atlarge.opendc.workflows.service.StageWorkflowService +import com.atlarge.opendc.workflows.service.WorkflowEvent import com.atlarge.opendc.workflows.service.WorkflowSchedulerMode import com.atlarge.opendc.workflows.service.stage.job.NullJobAdmissionPolicy import com.atlarge.opendc.workflows.service.stage.job.SubmissionTimeJobOrderPolicy @@ -38,12 +38,12 @@ import com.atlarge.opendc.workflows.service.stage.resource.FirstFitResourceSelec import com.atlarge.opendc.workflows.service.stage.resource.FunctionalResourceFilterPolicy import com.atlarge.opendc.workflows.service.stage.task.NullTaskEligibilityPolicy import com.atlarge.opendc.workflows.service.stage.task.SubmissionTimeTaskOrderPolicy -import com.atlarge.opendc.workflows.workload.Job -import com.atlarge.opendc.workflows.workload.Task import kotlin.math.max import kotlinx.coroutines.async import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import java.io.File @@ -62,28 +62,6 @@ fun main(args: Array) { var finished = 0 val token = Channel() - - val monitor = object : WorkflowMonitor { - override suspend fun onJobStart(job: Job, time: Long) { - println("Job ${job.uid} started") - } - - override suspend fun onJobFinish(job: Job, time: Long) { - finished += 1 - println("Jobs $finished/$total finished (${job.tasks.size} tasks)") - - if (finished == total) { - token.send(true) - } - } - - override suspend fun onTaskStart(job: Job, task: Task, time: Long) { - } - - override suspend fun onTaskFinish(job: Job, task: Task, status: Int, time: Long) { - } - } - val provider = ServiceLoader.load(SimulationEngineProvider::class.java).first() val system = provider(name = "sim") @@ -106,6 +84,27 @@ fun main(args: Array) { } val broker = system.newDomain(name = "broker") + + broker.launch { + val scheduler = schedulerAsync.await() + scheduler.events + .onEach { event -> + when (event) { + is WorkflowEvent.JobStarted -> { + println("Job ${event.job.uid} started") + } + is WorkflowEvent.JobFinished -> { + finished += 1 + println("Jobs $finished/$total finished (${event.job.tasks.size} tasks)") + + if (finished == total) { + token.send(true) + } + } + } + } + .collect() + } broker.launch { val ctx = simulationContext val reader = GwfTraceReader(File(args[0])) @@ -115,7 +114,7 @@ fun main(args: Array) { val (time, job) = reader.next() total += 1 delay(max(0, time * 1000 - ctx.clock.millis())) - scheduler.submit(job, monitor) + scheduler.submit(job) } } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index 0f4d0c1b..e18bbe30 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -2,9 +2,7 @@ package com.atlarge.opendc.experiments.sc20 import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.driver.BareMetalDriver -import com.atlarge.opendc.compute.virt.monitor.HypervisorMonitor import kotlinx.coroutines.flow.first import java.io.BufferedWriter import java.io.Closeable @@ -12,7 +10,7 @@ import java.io.FileWriter class Sc20Monitor( destination: String -) : HypervisorMonitor, ServerMonitor, Closeable { +) : Closeable { private val outputFile = BufferedWriter(FileWriter(destination)) private var failed: Int = 0 @@ -20,14 +18,14 @@ class Sc20Monitor( outputFile.write("time,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw,failedVms\n") } - override fun stateChanged(server: Server, previousState: ServerState) { + fun stateChanged(server: Server) { println("${server.uid} ${server.state}") if (server.state == ServerState.ERROR) { failed++ } } - override suspend fun onSliceFinish( + suspend fun onSliceFinish( time: Long, requestedBurst: Long, grantedBurst: Long, diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 4273c39e..96033ea7 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -27,6 +27,7 @@ package com.atlarge.opendc.experiments.sc20 import com.atlarge.odcsim.SimulationEngineProvider import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Flavor +import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.service.SimpleVirtProvisioningService import com.atlarge.opendc.compute.virt.service.allocation.AvailableMemoryAllocationPolicy @@ -41,6 +42,8 @@ import com.xenomachina.argparser.ArgParser import com.xenomachina.argparser.default import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import java.io.File @@ -107,11 +110,10 @@ fun main(args: Array) { println(simulationContext.clock.instant()) val bareMetalProvisioner = environment.platforms[0].zones[0].services[ProvisioningService.Key] - val scheduler = SimpleVirtProvisioningService( AvailableMemoryAllocationPolicy(), simulationContext, - bareMetalProvisioner, + bareMetalProvisioner ) val faultInjectorDomain = root.newDomain(name = "failures") @@ -131,8 +133,11 @@ fun main(args: Array) { while (reader.hasNext()) { val (time, workload) = reader.next() delay(max(0, time - simulationContext.clock.millis())) - chan.send(Unit) - scheduler.deploy(workload.image, Flavor(workload.image.cores, workload.image.requiredMemory)) + launch { + chan.send(Unit) + val server = scheduler.deploy(workload.image, Flavor(workload.image.cores, workload.image.requiredMemory)) + server.events.onEach { if (it is ServerEvent.StateChanged) monitor.stateChanged(it.server) }.collect() + } } println(simulationContext.clock.instant()) diff --git a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/monitor/WorkflowMonitor.kt b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/monitor/WorkflowMonitor.kt deleted file mode 100644 index 3c77d57a..00000000 --- a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/monitor/WorkflowMonitor.kt +++ /dev/null @@ -1,53 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.workflows.monitor - -import com.atlarge.opendc.workflows.workload.Job -import com.atlarge.opendc.workflows.workload.Task - -/** - * An interface for monitoring the progression of workflows. - */ -public interface WorkflowMonitor { - /** - * This method is invoked when a job has become active. - */ - public suspend fun onJobStart(job: Job, time: Long) - - /** - * This method is invoked when a job has finished processing. - */ - public suspend fun onJobFinish(job: Job, time: Long) - - /** - * This method is invoked when a task of a job has started processing. - */ - public suspend fun onTaskStart(job: Job, task: Task, time: Long) - - /** - * This method is invoked when a task has finished processing. - */ - public suspend fun onTaskFinish(job: Job, task: Task, status: Int, time: Long) -} diff --git a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/JobState.kt b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/JobState.kt index b444f91c..1cb2de97 100644 --- a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/JobState.kt +++ b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/JobState.kt @@ -24,10 +24,9 @@ package com.atlarge.opendc.workflows.service -import com.atlarge.opendc.workflows.monitor.WorkflowMonitor import com.atlarge.opendc.workflows.workload.Job -class JobState(val job: Job, val monitor: WorkflowMonitor, val submittedAt: Long) { +class JobState(val job: Job, val submittedAt: Long) { /** * A flag to indicate whether this job is finished. */ diff --git a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowService.kt b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowService.kt index a055a3fe..7a20363c 100644 --- a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowService.kt +++ b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowService.kt @@ -25,13 +25,13 @@ package com.atlarge.opendc.workflows.service import com.atlarge.odcsim.Domain +import com.atlarge.odcsim.flow.EventFlow import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Server +import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.core.ServerState -import com.atlarge.opendc.compute.core.monitor.ServerMonitor import com.atlarge.opendc.compute.metal.Node import com.atlarge.opendc.compute.metal.service.ProvisioningService -import com.atlarge.opendc.workflows.monitor.WorkflowMonitor import com.atlarge.opendc.workflows.service.stage.job.JobAdmissionPolicy import com.atlarge.opendc.workflows.service.stage.job.JobOrderPolicy import com.atlarge.opendc.workflows.service.stage.resource.ResourceFilterPolicy @@ -39,6 +39,11 @@ import com.atlarge.opendc.workflows.service.stage.resource.ResourceSelectionPoli import com.atlarge.opendc.workflows.service.stage.task.TaskEligibilityPolicy import com.atlarge.opendc.workflows.service.stage.task.TaskOrderPolicy import com.atlarge.opendc.workflows.workload.Job +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.onEach import java.util.PriorityQueue import java.util.Queue import kotlinx.coroutines.launch @@ -58,7 +63,7 @@ class StageWorkflowService( taskOrderPolicy: TaskOrderPolicy, resourceFilterPolicy: ResourceFilterPolicy, resourceSelectionPolicy: ResourceSelectionPolicy -) : WorkflowService, ServerMonitor { +) : WorkflowService, CoroutineScope by domain { /** * The incoming jobs ready to be processed by the scheduler. @@ -167,6 +172,7 @@ class StageWorkflowService( private val taskEligibilityPolicy: TaskEligibilityPolicy.Logic private val resourceFilterPolicy: ResourceFilterPolicy.Logic private val resourceSelectionPolicy: Comparator + private val eventFlow = EventFlow() init { domain.launch { @@ -183,9 +189,11 @@ class StageWorkflowService( this.resourceSelectionPolicy = resourceSelectionPolicy(this) } - override suspend fun submit(job: Job, monitor: WorkflowMonitor) = withContext(domain.coroutineContext) { + override val events: Flow = eventFlow + + override suspend fun submit(job: Job) = withContext(domain.coroutineContext) { // J1 Incoming Jobs - val jobInstance = JobState(job, monitor, simulationContext.clock.millis()) + val jobInstance = JobState(job, simulationContext.clock.millis()) val instances = job.tasks.associateWith { TaskState(jobInstance, it) } @@ -217,6 +225,7 @@ class StageWorkflowService( /** * Perform a scheduling cycle immediately. */ + @OptIn(ExperimentalCoroutinesApi::class) internal suspend fun schedule() { // J2 Create list of eligible jobs val iterator = incomingJobs.iterator() @@ -232,7 +241,7 @@ class StageWorkflowService( iterator.remove() jobQueue.add(jobInstance) activeJobs += jobInstance - jobInstance.monitor.onJobStart(jobInstance.job, simulationContext.clock.millis()) + eventFlow.emit(WorkflowEvent.JobStarted(this, jobInstance.job, simulationContext.clock.millis())) rootListener.jobStarted(jobInstance) } @@ -280,10 +289,13 @@ class StageWorkflowService( // T4 Submit task to machine available -= host instance.state = TaskStatus.ACTIVE - - val newHost = provisioningService.deploy(host, instance.task.image, this) + val newHost = provisioningService.deploy(host, instance.task.image) + val server = newHost.server!! instance.host = newHost - taskByServer[newHost.server!!] = instance + taskByServer[server] = instance + server.events + .onEach { event -> if (event is ServerEvent.StateChanged) stateChanged(event.server) } + .launchIn(this) activeTasks += instance taskQueue.poll() @@ -294,50 +306,48 @@ class StageWorkflowService( } } - override fun stateChanged(server: Server, previousState: ServerState) { - domain.launch { - when (server.state) { - ServerState.ACTIVE -> { - val task = taskByServer.getValue(server) - task.startedAt = simulationContext.clock.millis() - task.job.monitor.onTaskStart(task.job.job, task.task, simulationContext.clock.millis()) - rootListener.taskStarted(task) - } - ServerState.SHUTOFF, ServerState.ERROR -> { - val task = taskByServer.remove(server) ?: throw IllegalStateException() - val job = task.job - task.state = TaskStatus.FINISHED - task.finishedAt = simulationContext.clock.millis() - job.tasks.remove(task) - available += task.host!! - activeTasks -= task - job.monitor.onTaskFinish(job.job, task.task, 0, simulationContext.clock.millis()) - rootListener.taskFinished(task) - - // Add job roots to the scheduling queue - for (dependent in task.dependents) { - if (dependent.state != TaskStatus.READY) { - continue - } - - incomingTasks += dependent - rootListener.taskReady(dependent) + private suspend fun stateChanged(server: Server) { + when (server.state) { + ServerState.ACTIVE -> { + val task = taskByServer.getValue(server) + task.startedAt = simulationContext.clock.millis() + eventFlow.emit(WorkflowEvent.TaskStarted(this@StageWorkflowService, task.job.job, task.task, simulationContext.clock.millis())) + rootListener.taskStarted(task) + } + ServerState.SHUTOFF, ServerState.ERROR -> { + val task = taskByServer.remove(server) ?: throw IllegalStateException() + val job = task.job + task.state = TaskStatus.FINISHED + task.finishedAt = simulationContext.clock.millis() + job.tasks.remove(task) + available += task.host!! + activeTasks -= task + eventFlow.emit(WorkflowEvent.TaskFinished(this@StageWorkflowService, task.job.job, task.task, simulationContext.clock.millis())) + rootListener.taskFinished(task) + + // Add job roots to the scheduling queue + for (dependent in task.dependents) { + if (dependent.state != TaskStatus.READY) { + continue } - if (job.isFinished) { - finishJob(job) - } + incomingTasks += dependent + rootListener.taskReady(dependent) + } - requestCycle() + if (job.isFinished) { + finishJob(job) } - else -> throw IllegalStateException() + + requestCycle() } + else -> throw IllegalStateException() } } private suspend fun finishJob(job: JobState) { activeJobs -= job - job.monitor.onJobFinish(job.job, simulationContext.clock.millis()) + eventFlow.emit(WorkflowEvent.JobFinished(this, job.job, simulationContext.clock.millis())) rootListener.jobFinished(job) } diff --git a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/WorkflowEvent.kt b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/WorkflowEvent.kt new file mode 100644 index 00000000..2ca5a19d --- /dev/null +++ b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/WorkflowEvent.kt @@ -0,0 +1,76 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.workflows.service + +import com.atlarge.opendc.workflows.workload.Job +import com.atlarge.opendc.workflows.workload.Task + +/** + * An event emitted by the [WorkflowService]. + */ +public sealed class WorkflowEvent { + /** + * The [WorkflowService] that emitted the event. + */ + public abstract val service: WorkflowService + + /** + * This event is emitted when a job has become active. + */ + public data class JobStarted( + override val service: WorkflowService, + public val job: Job, + public val time: Long + ) : WorkflowEvent() + + /** + * This event is emitted when a job has finished processing. + */ + public data class JobFinished( + override val service: WorkflowService, + public val job: Job, + public val time: Long + ) : WorkflowEvent() + + /** + * This event is emitted when a task of a job has started processing. + */ + public data class TaskStarted( + override val service: WorkflowService, + public val job: Job, + public val task: Task, + public val time: Long + ) : WorkflowEvent() + + /** + * This event is emitted when a task of a job has started processing. + */ + public data class TaskFinished( + override val service: WorkflowService, + public val job: Job, + public val task: Task, + public val time: Long + ) : WorkflowEvent() +} diff --git a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/WorkflowService.kt b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/WorkflowService.kt index 524f4f9e..38ea49c4 100644 --- a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/WorkflowService.kt +++ b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/service/WorkflowService.kt @@ -25,8 +25,8 @@ package com.atlarge.opendc.workflows.service import com.atlarge.opendc.core.services.AbstractServiceKey -import com.atlarge.opendc.workflows.monitor.WorkflowMonitor import com.atlarge.opendc.workflows.workload.Job +import kotlinx.coroutines.flow.Flow import java.util.UUID /** @@ -35,10 +35,15 @@ import java.util.UUID * The workflow scheduler is modelled after the Reference Architecture for Datacenter Scheduling by Andreadis et al. */ public interface WorkflowService { + /** + * Thie events emitted by the workflow scheduler. + */ + public val events: Flow + /** * Submit the specified [Job] to the workflow service for scheduling. */ - public suspend fun submit(job: Job, monitor: WorkflowMonitor) + public suspend fun submit(job: Job) /** * The service key for the workflow scheduler. diff --git a/opendc/opendc-workflows/src/test/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowSchedulerIntegrationTest.kt b/opendc/opendc-workflows/src/test/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowSchedulerIntegrationTest.kt index 19e56482..5ee6d5e6 100644 --- a/opendc/opendc-workflows/src/test/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowSchedulerIntegrationTest.kt +++ b/opendc/opendc-workflows/src/test/kotlin/com/atlarge/opendc/workflows/service/StageWorkflowSchedulerIntegrationTest.kt @@ -29,17 +29,16 @@ import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.format.environment.sc18.Sc18EnvironmentReader import com.atlarge.opendc.format.trace.gwf.GwfTraceReader -import com.atlarge.opendc.workflows.monitor.WorkflowMonitor import com.atlarge.opendc.workflows.service.stage.job.NullJobAdmissionPolicy import com.atlarge.opendc.workflows.service.stage.job.SubmissionTimeJobOrderPolicy import com.atlarge.opendc.workflows.service.stage.resource.FirstFitResourceSelectionPolicy import com.atlarge.opendc.workflows.service.stage.resource.FunctionalResourceFilterPolicy import com.atlarge.opendc.workflows.service.stage.task.NullTaskEligibilityPolicy import com.atlarge.opendc.workflows.service.stage.task.SubmissionTimeTaskOrderPolicy -import com.atlarge.opendc.workflows.workload.Job -import com.atlarge.opendc.workflows.workload.Task import kotlinx.coroutines.async import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Assertions.assertEquals @@ -64,24 +63,6 @@ internal class StageWorkflowSchedulerIntegrationTest { var tasksStarted = 0L var tasksFinished = 0L - val monitor = object : WorkflowMonitor { - override suspend fun onJobStart(job: Job, time: Long) { - jobsStarted++ - } - - override suspend fun onJobFinish(job: Job, time: Long) { - jobsFinished++ - } - - override suspend fun onTaskStart(job: Job, task: Task, time: Long) { - tasksStarted++ - } - - override suspend fun onTaskFinish(job: Job, task: Task, status: Int, time: Long) { - tasksFinished++ - } - } - val provider = ServiceLoader.load(SimulationEngineProvider::class.java).first() val system = provider(name = "sim") @@ -104,6 +85,21 @@ internal class StageWorkflowSchedulerIntegrationTest { } val broker = system.newDomain(name = "broker") + + broker.launch { + val scheduler = schedulerAsync.await() + scheduler.events + .onEach { event -> + when (event) { + is WorkflowEvent.JobStarted -> jobsStarted++ + is WorkflowEvent.JobFinished -> jobsFinished++ + is WorkflowEvent.TaskStarted -> tasksStarted++ + is WorkflowEvent.TaskFinished -> tasksFinished++ + } + } + .collect() + } + broker.launch { val ctx = simulationContext val reader = GwfTraceReader(object {}.javaClass.getResourceAsStream("/trace.gwf")) @@ -113,7 +109,7 @@ internal class StageWorkflowSchedulerIntegrationTest { val (time, job) = reader.next() jobsSubmitted++ delay(max(0, time * 1000 - ctx.clock.millis())) - scheduler.submit(job, monitor) + scheduler.submit(job) } } -- cgit v1.2.3 From edce7993772182bac0d0c74d22189137b35872aa Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 24 Mar 2020 20:36:54 +0100 Subject: refactor: Add support for specifying VM name --- .../opendc/compute/virt/driver/SimpleVirtDriver.kt | 3 +- .../opendc/compute/virt/driver/VirtDriver.kt | 3 +- .../virt/service/SimpleVirtProvisioningService.kt | 5 +- .../virt/service/VirtProvisioningService.kt | 3 +- .../atlarge/opendc/compute/virt/HypervisorTest.kt | 89 +++++++++++++++++++++ .../virt/driver/hypervisor/HypervisorTest.kt | 90 ---------------------- .../opendc/experiments/sc20/TestExperiment.kt | 5 +- 7 files changed, 103 insertions(+), 95 deletions(-) create mode 100644 opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt delete mode 100644 opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt index fc4c7634..e7b06329 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt @@ -89,6 +89,7 @@ class SimpleVirtDriver( override val events: Flow = eventFlow override suspend fun spawn( + name: String, image: Image, flavor: Flavor ): Server { @@ -100,7 +101,7 @@ class SimpleVirtDriver( val events = EventFlow() val server = Server( - UUID.randomUUID(), "", emptyMap(), flavor, image, ServerState.BUILD, + UUID.randomUUID(), name, emptyMap(), flavor, image, ServerState.BUILD, ServiceRegistry(), events ) availableMemory -= requiredMemory diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt index d7ae0c12..1002d382 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/VirtDriver.kt @@ -45,11 +45,12 @@ public interface VirtDriver { /** * Spawn the given [Image] on the compute resource of this driver. * + * @param name The name of the server to spawn. * @param image The image to deploy. * @param flavor The flavor of the server which this driver is controlling. * @return The virtual server spawned by this method. */ - public suspend fun spawn(image: Image, flavor: Flavor): Server + public suspend fun spawn(name: String, image: Image, flavor: Flavor): Server companion object Key : AbstractServiceKey(UUID.randomUUID(), "virtual-driver") } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index 8393dfa9..a16c0793 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -65,10 +65,11 @@ class SimpleVirtProvisioningService( } override suspend fun deploy( + name: String, image: Image, flavor: Flavor ): Server = suspendCancellableCoroutine { cont -> - val vmInstance = ImageView(image, flavor, cont) + val vmInstance = ImageView(name, image, flavor, cont) incomingImages += vmInstance requestCycle() } @@ -96,6 +97,7 @@ class SimpleVirtProvisioningService( println("Spawning ${imageInstance.image}") incomingImages -= imageInstance val server = selectedHv.driver.spawn( + imageInstance.name, imageInstance.image, imageInstance.flavor ) @@ -137,6 +139,7 @@ class SimpleVirtProvisioningService( } data class ImageView( + val name: String, val image: Image, val flavor: Flavor, val continuation: Continuation, diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt index 12543ce3..6f0c22f6 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt @@ -14,8 +14,9 @@ interface VirtProvisioningService { /** * Submit the specified [Image] to the provisioning service. * + * @param name The name of the server to deploy. * @param image The image to be deployed. * @param flavor The flavor of the machine instance to run this [image] on. */ - public suspend fun deploy(image: Image, flavor: Flavor): Server + public suspend fun deploy(name: String, image: Image, flavor: Flavor): Server } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt new file mode 100644 index 00000000..4bd8d4e2 --- /dev/null +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt @@ -0,0 +1,89 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.virt + +import com.atlarge.odcsim.SimulationEngineProvider +import com.atlarge.opendc.compute.core.ProcessingUnit +import com.atlarge.opendc.compute.core.Flavor +import com.atlarge.opendc.compute.core.ProcessingNode +import com.atlarge.opendc.compute.core.image.FlopsApplicationImage +import com.atlarge.opendc.compute.metal.driver.SimpleBareMetalDriver +import com.atlarge.opendc.compute.virt.driver.VirtDriver +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.onEach +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Test +import java.util.ServiceLoader +import java.util.UUID + +/** + * Basic test-suite for the hypervisor. + */ +internal class HypervisorTest { + /** + * A smoke test for the bare-metal driver. + */ + @OptIn(ExperimentalCoroutinesApi::class) + @Test + fun smoke() { + val provider = ServiceLoader.load(SimulationEngineProvider::class.java).first() + val system = provider("test") + val root = system.newDomain("root") + + root.launch { + val vmm = HypervisorImage + val workloadA = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1_000, 1) + val workloadB = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 2_000, 1) + + val driverDom = root.newDomain("driver") + + val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 2) + val cpus = List(2) { ProcessingUnit(cpuNode, it, 2000.0) } + val metalDriver = SimpleBareMetalDriver(driverDom, UUID.randomUUID(), "test", cpus, emptyList()) + + metalDriver.init() + metalDriver.setImage(vmm) + val node = metalDriver.start() + node.server?.events?.onEach { println(it) }?.launchIn(this) + + delay(5) + + val flavor = Flavor(1, 0) + val vmDriver = metalDriver.refresh().server!!.services[VirtDriver] + val vmA = vmDriver.spawn("a", workloadA, flavor) + vmA.events.onEach { println(it) }.launchIn(this) + val vmB = vmDriver.spawn("b", workloadB, flavor) + vmB.events.onEach { println(it) }.launchIn(this) + } + + runBlocking { + system.run() + system.terminate() + } + } +} diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt deleted file mode 100644 index bcaafb59..00000000 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/driver/hypervisor/HypervisorTest.kt +++ /dev/null @@ -1,90 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package com.atlarge.opendc.compute.virt.driver.hypervisor - -import com.atlarge.odcsim.SimulationEngineProvider -import com.atlarge.opendc.compute.core.ProcessingUnit -import com.atlarge.opendc.compute.core.Flavor -import com.atlarge.opendc.compute.core.ProcessingNode -import com.atlarge.opendc.compute.core.image.FlopsApplicationImage -import com.atlarge.opendc.compute.metal.driver.SimpleBareMetalDriver -import com.atlarge.opendc.compute.virt.HypervisorImage -import com.atlarge.opendc.compute.virt.driver.VirtDriver -import kotlinx.coroutines.ExperimentalCoroutinesApi -import kotlinx.coroutines.delay -import kotlinx.coroutines.flow.launchIn -import kotlinx.coroutines.flow.onEach -import kotlinx.coroutines.launch -import kotlinx.coroutines.runBlocking -import org.junit.jupiter.api.Test -import java.util.ServiceLoader -import java.util.UUID - -/** - * Basic test-suite for the hypervisor. - */ -internal class HypervisorTest { - /** - * A smoke test for the bare-metal driver. - */ - @OptIn(ExperimentalCoroutinesApi::class) - @Test - fun smoke() { - val provider = ServiceLoader.load(SimulationEngineProvider::class.java).first() - val system = provider("test") - val root = system.newDomain("root") - - root.launch { - val vmm = HypervisorImage - val workloadA = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1_000, 1) - val workloadB = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 2_000, 1) - - val driverDom = root.newDomain("driver") - - val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 2) - val cpus = List(2) { ProcessingUnit(cpuNode, it, 2000.0) } - val metalDriver = SimpleBareMetalDriver(driverDom, UUID.randomUUID(), "test", cpus, emptyList()) - - metalDriver.init() - metalDriver.setImage(vmm) - val node = metalDriver.start() - node.server?.events?.onEach { println(it) }?.launchIn(this) - - delay(5) - - val flavor = Flavor(1, 0) - val vmDriver = metalDriver.refresh().server!!.services[VirtDriver] - val vmA = vmDriver.spawn(workloadA, flavor) - vmA.events.onEach { println(it) }.launchIn(this) - val vmB = vmDriver.spawn(workloadB, flavor) - vmB.events.onEach { println(it) }.launchIn(this) - } - - runBlocking { - system.run() - system.terminate() - } - } -} diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 96033ea7..6d832ee4 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -135,7 +135,10 @@ fun main(args: Array) { delay(max(0, time - simulationContext.clock.millis())) launch { chan.send(Unit) - val server = scheduler.deploy(workload.image, Flavor(workload.image.cores, workload.image.requiredMemory)) + val server = scheduler.deploy( + workload.image.name, workload.image, + Flavor(workload.image.cores, workload.image.requiredMemory) + ) server.events.onEach { if (it is ServerEvent.StateChanged) monitor.stateChanged(it.server) }.collect() } } -- cgit v1.2.3 From 225a9dd042870b1320681104aa022120611cc92b Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 24 Mar 2020 22:04:06 +0100 Subject: feat: Record hypervisor events during experiment --- .../kotlin/com/atlarge/odcsim/flow/StateFlow.kt | 4 +- .../odcsim/engine/omega/OmegaSimulationEngine.kt | 7 +++- .../compute/metal/driver/SimpleBareMetalDriver.kt | 43 ++++++++++------------ .../atlarge/opendc/compute/virt/HypervisorEvent.kt | 4 +- .../opendc/compute/virt/driver/SimpleVirtDriver.kt | 9 ++--- .../virt/service/SimpleVirtProvisioningService.kt | 17 ++++++--- .../virt/service/VirtProvisioningService.kt | 6 +++ .../metal/driver/SimpleBareMetalDriverTest.kt | 2 +- .../atlarge/opendc/experiments/sc20/Sc20Monitor.kt | 5 ++- .../opendc/experiments/sc20/TestExperiment.kt | 21 ++++++++++- .../com/atlarge/opendc/workflows/workload/Job.kt | 2 + 11 files changed, 77 insertions(+), 43 deletions(-) diff --git a/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt index 0410bd95..50add0ad 100644 --- a/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt +++ b/odcsim/odcsim-api/src/main/kotlin/com/atlarge/odcsim/flow/StateFlow.kt @@ -70,10 +70,10 @@ private class StateFlowImpl(initialValue: T) : StateFlow { */ private val flow = chan.asFlow() - public override var value: T - get() = chan.value + public override var value: T = initialValue set(value) { chan.offer(value) + field = value } @InternalCoroutinesApi diff --git a/odcsim/odcsim-engine-omega/src/main/kotlin/com/atlarge/odcsim/engine/omega/OmegaSimulationEngine.kt b/odcsim/odcsim-engine-omega/src/main/kotlin/com/atlarge/odcsim/engine/omega/OmegaSimulationEngine.kt index 4edf94d2..934af293 100644 --- a/odcsim/odcsim-engine-omega/src/main/kotlin/com/atlarge/odcsim/engine/omega/OmegaSimulationEngine.kt +++ b/odcsim/odcsim-engine-omega/src/main/kotlin/com/atlarge/odcsim/engine/omega/OmegaSimulationEngine.kt @@ -37,6 +37,7 @@ import kotlin.coroutines.CoroutineContext import kotlin.coroutines.coroutineContext import kotlinx.coroutines.CancellableContinuation import kotlinx.coroutines.CoroutineDispatcher +import kotlinx.coroutines.CoroutineExceptionHandler import kotlinx.coroutines.CoroutineName import kotlinx.coroutines.Delay import kotlinx.coroutines.DisposableHandle @@ -174,6 +175,10 @@ public class OmegaSimulationEngine(override val name: String) : SimulationEngine } } + private val exceptionHandler = CoroutineExceptionHandler { _, exception -> + log.error("Uncaught exception", exception) + } + // SimulationContext override val key: CoroutineContext.Key<*> = SimulationContext.Key @@ -192,7 +197,7 @@ public class OmegaSimulationEngine(override val name: String) : SimulationEngine override val parent: Domain = parent ?: this @InternalCoroutinesApi - override val coroutineContext: CoroutineContext = this + CoroutineName(name) + dispatcher + job + override val coroutineContext: CoroutineContext = this + CoroutineName(name) + dispatcher + job + exceptionHandler override fun toString(): String = path } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 67069c03..9ab6fbc5 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -48,13 +48,10 @@ import com.atlarge.opendc.core.services.ServiceKey import com.atlarge.opendc.core.services.ServiceRegistry import kotlinx.coroutines.CancellationException import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.Job import kotlinx.coroutines.cancel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.flow.launchIn -import kotlinx.coroutines.flow.scanReduce import kotlinx.coroutines.launch import java.util.UUID import kotlin.math.ceil @@ -112,21 +109,6 @@ public class SimpleBareMetalDriver( override val powerDraw: Flow = powerModel(this) - init { - @OptIn(ExperimentalCoroutinesApi::class) - nodeState.scanReduce { field, value -> - if (field.state != value.state) { - events.emit(NodeEvent.StateChanged(value, field.state)) - } - - if (field.server != null && value.server != null && field.server.state != value.server.state) { - serverContext!!.events.emit(ServerEvent.StateChanged(value.server, field.server.state)) - } - - value - }.launchIn(domain) - } - override suspend fun init(): Node = withContext(domain.coroutineContext) { nodeState.value } @@ -149,7 +131,7 @@ public class SimpleBareMetalDriver( events ) - nodeState.value = node.copy(state = NodeState.BOOT, server = server) + setNode(node.copy(state = NodeState.BOOT, server = server)) serverContext = BareMetalServerContext(events) return@withContext nodeState.value } @@ -164,7 +146,7 @@ public class SimpleBareMetalDriver( serverContext!!.cancel(fail = false) serverContext = null - nodeState.value = node.copy(state = NodeState.SHUTOFF, server = null) + setNode(node.copy(state = NodeState.SHUTOFF, server = null)) return@withContext node } @@ -174,12 +156,25 @@ public class SimpleBareMetalDriver( } override suspend fun setImage(image: Image): Node = withContext(domain.coroutineContext) { - nodeState.value = nodeState.value.copy(image = image) + setNode(nodeState.value.copy(image = image)) return@withContext nodeState.value } override suspend fun refresh(): Node = withContext(domain.coroutineContext) { nodeState.value } + private fun setNode(value: Node) { + val field = nodeState.value + if (field.state != value.state) { + events.emit(NodeEvent.StateChanged(value, field.state)) + } + + if (field.server != null && value.server != null && field.server.state != value.server.state) { + serverContext!!.events.emit(ServerEvent.StateChanged(value.server, field.server.state)) + } + + nodeState.value = value + } + private inner class BareMetalServerContext(val events: EventFlow) : ServerManagementContext { private var finalized: Boolean = false @@ -212,7 +207,7 @@ public class SimpleBareMetalDriver( override suspend fun publishService(key: ServiceKey, service: T) { val server = server.copy(services = server.services.put(key, service)) - nodeState.value = nodeState.value.copy(server = server) + setNode(nodeState.value.copy(server = server)) events.emit(ServerEvent.ServicePublished(server, key)) } @@ -220,7 +215,7 @@ public class SimpleBareMetalDriver( assert(!finalized) { "Machine is already finalized" } val server = server.copy(state = ServerState.ACTIVE) - nodeState.value = nodeState.value.copy(state = NodeState.ACTIVE, server = server) + setNode(nodeState.value.copy(state = NodeState.ACTIVE, server = server)) } override suspend fun exit(cause: Throwable?) { @@ -237,7 +232,7 @@ public class SimpleBareMetalDriver( else NodeState.ERROR val server = server.copy(state = newServerState) - nodeState.value = nodeState.value.copy(state = newNodeState, server = server) + setNode(nodeState.value.copy(state = newNodeState, server = server)) } private var flush: Job? = null diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt index 3230c2ba..5c19b00d 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt @@ -24,6 +24,7 @@ package com.atlarge.opendc.compute.virt +import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.virt.driver.VirtDriver /** @@ -60,6 +61,7 @@ public sealed class HypervisorEvent { override val driver: VirtDriver, public val requestedBurst: Long, public val grantedBurst: Long, - public val numberOfDeployedImages: Int + public val numberOfDeployedImages: Int, + public val hostServer: Server ) : HypervisorEvent() } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt index e7b06329..76368080 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt @@ -47,7 +47,6 @@ import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.FlowPreview import kotlinx.coroutines.Job -import kotlinx.coroutines.cancelAndJoin import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow @@ -68,7 +67,7 @@ class SimpleVirtDriver( /** * The [Server] on which this hypervisor runs. */ - public val server: Server + private val server: Server get() = hostContext.server /** @@ -223,7 +222,7 @@ class SimpleVirtDriver( } } - eventFlow.emit(HypervisorEvent.SliceFinished(this@SimpleVirtDriver, totalBurst, totalBurst - totalRemainder, vms.size)) + eventFlow.emit(HypervisorEvent.SliceFinished(this@SimpleVirtDriver, totalBurst, totalBurst - totalRemainder, vms.size, server)) } this.call = call } @@ -231,11 +230,11 @@ class SimpleVirtDriver( /** * Flush the progress of the current active VMs. */ - private suspend fun flush() { + private fun flush() { val call = call ?: return // If there is no active call, there is nothing to flush // The progress is actually flushed in the coroutine when it notices: we cancel it and wait for its // completion. - call.cancelAndJoin() + call.cancel() this.call = null } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index a16c0793..fb874e22 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -19,6 +19,7 @@ import kotlinx.coroutines.flow.collect import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch import kotlinx.coroutines.suspendCancellableCoroutine +import kotlinx.coroutines.withContext import kotlin.coroutines.Continuation import kotlin.coroutines.resume @@ -64,14 +65,20 @@ class SimpleVirtProvisioningService( } } + override suspend fun drivers(): Set = withContext(coroutineContext) { + availableHypervisors.map { it.driver }.toSet() + } + override suspend fun deploy( name: String, image: Image, flavor: Flavor - ): Server = suspendCancellableCoroutine { cont -> - val vmInstance = ImageView(name, image, flavor, cont) - incomingImages += vmInstance - requestCycle() + ): Server = withContext(coroutineContext) { + suspendCancellableCoroutine { cont -> + val vmInstance = ImageView(name, image, flavor, cont) + incomingImages += vmInstance + requestCycle() + } } private var call: Job? = null @@ -82,9 +89,9 @@ class SimpleVirtProvisioningService( } val call = launch { + this@SimpleVirtProvisioningService.call = null schedule() } - call.invokeOnCompletion { this.call = null } this.call = call } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt index 6f0c22f6..a3ade2fb 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/VirtProvisioningService.kt @@ -3,6 +3,7 @@ package com.atlarge.opendc.compute.virt.service import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.image.Image +import com.atlarge.opendc.compute.virt.driver.VirtDriver import com.atlarge.opendc.compute.virt.service.allocation.AllocationPolicy /** @@ -11,6 +12,11 @@ import com.atlarge.opendc.compute.virt.service.allocation.AllocationPolicy interface VirtProvisioningService { val allocationPolicy: AllocationPolicy + /** + * Obtain the active hypervisors for this provisioner. + */ + public suspend fun drivers(): Set + /** * Submit the specified [Image] to the provisioning service. * diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index e0d8799f..3eb6a12c 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -63,7 +63,7 @@ internal class SimpleBareMetalDriverTest { val server = driver.start().server!! server.events.collect { event -> when (event) { - is ServerEvent.StateChanged -> finalState = event.server.state + is ServerEvent.StateChanged -> { println(event); finalState = event.server.state } } } } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index e18bbe30..36da7703 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -1,5 +1,6 @@ package com.atlarge.opendc.experiments.sc20 +import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.metal.driver.BareMetalDriver @@ -18,8 +19,8 @@ class Sc20Monitor( outputFile.write("time,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw,failedVms\n") } - fun stateChanged(server: Server) { - println("${server.uid} ${server.state}") + suspend fun stateChanged(server: Server) { + println("[${simulationContext.clock.millis()}] ${server.uid} ${server.state}") if (server.state == ServerState.ERROR) { failed++ } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 6d832ee4..0fafc118 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -29,10 +29,10 @@ import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.metal.service.ProvisioningService +import com.atlarge.opendc.compute.virt.HypervisorEvent import com.atlarge.opendc.compute.virt.service.SimpleVirtProvisioningService import com.atlarge.opendc.compute.virt.service.allocation.AvailableMemoryAllocationPolicy import com.atlarge.opendc.core.failure.CorrelatedFaultInjector -import com.atlarge.opendc.core.failure.FailureDomain import com.atlarge.opendc.format.environment.sc20.Sc20ClusterEnvironmentReader import com.atlarge.opendc.format.trace.sc20.Sc20PerformanceInterferenceReader import com.atlarge.opendc.format.trace.sc20.Sc20TraceReader @@ -43,6 +43,7 @@ import com.xenomachina.argparser.default import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.flow.launchIn import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking @@ -116,6 +117,21 @@ fun main(args: Array) { bareMetalProvisioner ) + // Wait for the hypervisors to be spawned + delay(10) + + // Monitor hypervisor events + for (hypervisor in scheduler.drivers()) { + hypervisor.events + .onEach { event -> + when (event) { + is HypervisorEvent.SliceFinished -> monitor.onSliceFinish(simulationContext.clock.millis(), event.requestedBurst, event.grantedBurst, event.numberOfDeployedImages, event.hostServer) + else -> println(event) + } + } + .launchIn(this) + } + val faultInjectorDomain = root.newDomain(name = "failures") faultInjectorDomain.launch { chan.receive() @@ -125,7 +141,7 @@ fun main(args: Array) { sizeScale = 1.88, sizeShape = 1.25 ) for (node in bareMetalProvisioner.nodes()) { - faultInjector.enqueue(node.metadata["driver"] as FailureDomain) + // faultInjector.enqueue(node.metadata["driver"] as FailureDomain) } } @@ -139,6 +155,7 @@ fun main(args: Array) { workload.image.name, workload.image, Flavor(workload.image.cores, workload.image.requiredMemory) ) + // Monitor server events server.events.onEach { if (it is ServerEvent.StateChanged) monitor.stateChanged(it.server) }.collect() } } diff --git a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/workload/Job.kt b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/workload/Job.kt index 40389ce2..02969d8a 100644 --- a/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/workload/Job.kt +++ b/opendc/opendc-workflows/src/main/kotlin/com/atlarge/opendc/workflows/workload/Job.kt @@ -47,4 +47,6 @@ data class Job( override fun equals(other: Any?): Boolean = other is Job && uid == other.uid override fun hashCode(): Int = uid.hashCode() + + override fun toString(): String = "Job(uid=$uid, name=$name, tasks=${tasks.size}, metadata=$metadata)" } -- cgit v1.2.3 From 27a8f2312bf9207314abb201ed74f021b818f8af Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 24 Mar 2020 22:58:27 +0100 Subject: bug: Fix race condition in VM provisioner This change fixes a race condition in the VM provisioner where VMs were scheduled based on stale information. --- .../virt/service/SimpleVirtProvisioningService.kt | 26 ++++++++++++++++++++-- .../opendc/experiments/sc20/TestExperiment.kt | 10 +++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index fb874e22..156521db 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -6,7 +6,9 @@ import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.core.image.Image +import com.atlarge.opendc.compute.core.image.VmImage import com.atlarge.opendc.compute.metal.service.ProvisioningService +import com.atlarge.opendc.compute.virt.HypervisorEvent import com.atlarge.opendc.compute.virt.driver.VirtDriver import com.atlarge.opendc.compute.virt.HypervisorImage import com.atlarge.opendc.compute.virt.driver.InsufficientMemoryOnServerException @@ -15,7 +17,8 @@ import com.atlarge.opendc.core.services.ServiceKey import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.Job -import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.launchIn import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch import kotlinx.coroutines.suspendCancellableCoroutine @@ -60,7 +63,7 @@ class SimpleVirtProvisioningService( is ServerEvent.StateChanged -> stateChanged(event.server) is ServerEvent.ServicePublished -> servicePublished(event.server, event.key) } - }.collect() + }.launchIn(this) } } } @@ -89,6 +92,7 @@ class SimpleVirtProvisioningService( } val call = launch { + delay(1) this@SimpleVirtProvisioningService.call = null schedule() } @@ -103,6 +107,12 @@ class SimpleVirtProvisioningService( try { println("Spawning ${imageInstance.image}") incomingImages -= imageInstance + + // Speculatively update the hypervisor view information to prevent other images in the queue from + // deciding on stale values. + selectedHv.numberOfActiveServers++ + selectedHv.availableMemory -= (imageInstance.image as VmImage).requiredMemory // XXX Temporary hack + val server = selectedHv.driver.spawn( imageInstance.name, imageInstance.image, @@ -113,6 +123,9 @@ class SimpleVirtProvisioningService( activeImages += imageInstance } catch (e: InsufficientMemoryOnServerException) { println("Unable to deploy image due to insufficient memory") + + selectedHv.numberOfActiveServers-- + selectedHv.availableMemory += (imageInstance.image as VmImage).requiredMemory } } } @@ -141,6 +154,15 @@ class SimpleVirtProvisioningService( val hv = hypervisors[server] ?: return hv.driver = server.services[VirtDriver] availableHypervisors += hv + + hv.driver.events + .onEach { event -> + if (event is HypervisorEvent.VmsUpdated) { + hv.numberOfActiveServers = event.numberOfActiveServers + hv.availableMemory = event.availableMemory + } + }.launchIn(this) + requestCycle() } } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 0fafc118..66b20bff 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -40,6 +40,7 @@ import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper import com.fasterxml.jackson.module.kotlin.readValue import com.xenomachina.argparser.ArgParser import com.xenomachina.argparser.default +import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.collect @@ -86,6 +87,7 @@ class ExperimentParameters(parser: ArgParser) { /** * Main entry point of the experiment. */ +@OptIn(ExperimentalCoroutinesApi::class) fun main(args: Array) { ArgParser(args).parseInto(::ExperimentParameters).run { val monitor = Sc20Monitor(outputFile) @@ -111,6 +113,10 @@ fun main(args: Array) { println(simulationContext.clock.instant()) val bareMetalProvisioner = environment.platforms[0].zones[0].services[ProvisioningService.Key] + + // Wait for the bare metal nodes to be spawned + delay(10) + val scheduler = SimpleVirtProvisioningService( AvailableMemoryAllocationPolicy(), simulationContext, @@ -140,9 +146,9 @@ fun main(args: Array) { iatScale = -1.39, iatShape = 1.03, sizeScale = 1.88, sizeShape = 1.25 ) - for (node in bareMetalProvisioner.nodes()) { + // for (node in bareMetalProvisioner.nodes()) { // faultInjector.enqueue(node.metadata["driver"] as FailureDomain) - } + // } } val reader = Sc20TraceReader(File(traceDirectory), performanceInterferenceModel, getSelectedVmList()) -- cgit v1.2.3 From b3d11a0740f9a925f9cebd524863668fb9b07000 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 26 Mar 2020 12:10:47 +0100 Subject: feat: Add cluster metadata to bare metal nodes --- .../com/atlarge/opendc/compute/metal/Metadata.kt | 34 ++++++++++++++++++++++ .../compute/metal/driver/SimpleBareMetalDriver.kt | 8 +++-- .../metal/driver/SimpleBareMetalDriverTest.kt | 2 +- .../metal/service/SimpleProvisioningServiceTest.kt | 2 +- .../atlarge/opendc/compute/virt/HypervisorTest.kt | 2 +- .../environment/sc18/Sc18EnvironmentReader.kt | 9 +++++- .../sc20/Sc20ClusterEnvironmentReader.kt | 4 ++- .../environment/sc20/Sc20EnvironmentReader.kt | 3 +- 8 files changed, 56 insertions(+), 8 deletions(-) create mode 100644 opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Metadata.kt diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Metadata.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Metadata.kt new file mode 100644 index 00000000..a3a851fe --- /dev/null +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/Metadata.kt @@ -0,0 +1,34 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package com.atlarge.opendc.compute.metal + +/* + * Common metadata keys for bare-metal nodes. + */ + +/** + * The cluster to which the node belongs. + */ +const val NODE_CLUSTER = "bare-metal:cluster" diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 9ab6fbc5..834e683d 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -66,6 +66,7 @@ import java.lang.Exception * @param domain The simulation domain the driver runs in. * @param uid The unique identifier of the machine. * @param name An optional name of the machine. + * @param metadata The initial metadata of the node. * @param cpus The CPUs available to the bare metal machine. * @param memoryUnits The memory units in this machine. * @param powerModel The power model of this machine. @@ -74,9 +75,12 @@ public class SimpleBareMetalDriver( private val domain: Domain, uid: UUID, name: String, + metadata: Map, val cpus: List, val memoryUnits: List, - powerModel: PowerModel = ConstantPowerModel(0.0) + powerModel: PowerModel = ConstantPowerModel( + 0.0 + ) ) : BareMetalDriver { /** * The flavor that corresponds to this machine. @@ -101,7 +105,7 @@ public class SimpleBareMetalDriver( /** * The machine state. */ - private val nodeState = StateFlow(Node(uid, name, mapOf("driver" to this), NodeState.SHUTOFF, EmptyImage, null, events)) + private val nodeState = StateFlow(Node(uid, name, metadata + ("driver" to this), NodeState.SHUTOFF, EmptyImage, null, events)) override val node: Flow = nodeState diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt index 3eb6a12c..0fc64373 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriverTest.kt @@ -53,7 +53,7 @@ internal class SimpleBareMetalDriverTest { val dom = root.newDomain(name = "driver") val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 4) val cpus = List(4) { ProcessingUnit(cpuNode, it, 2400.0) } - val driver = SimpleBareMetalDriver(dom, UUID.randomUUID(), "test", cpus, emptyList()) + val driver = SimpleBareMetalDriver(dom, UUID.randomUUID(), "test", emptyMap(), cpus, emptyList()) val image = FlopsApplicationImage(UUID.randomUUID(), "", emptyMap(), 1_000, 2) // Batch driver commands diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt index 8e07c09c..f8bd786e 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/metal/service/SimpleProvisioningServiceTest.kt @@ -55,7 +55,7 @@ internal class SimpleProvisioningServiceTest { val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 4) val cpus = List(4) { ProcessingUnit(cpuNode, it, 2400.0) } - val driver = SimpleBareMetalDriver(dom.newDomain(), UUID.randomUUID(), "test", cpus, emptyList()) + val driver = SimpleBareMetalDriver(dom.newDomain(), UUID.randomUUID(), "test", emptyMap(), cpus, emptyList()) val provisioner = SimpleProvisioningService(dom) provisioner.create(driver) diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt index 4bd8d4e2..58d784b0 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt @@ -64,7 +64,7 @@ internal class HypervisorTest { val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 2) val cpus = List(2) { ProcessingUnit(cpuNode, it, 2000.0) } - val metalDriver = SimpleBareMetalDriver(driverDom, UUID.randomUUID(), "test", cpus, emptyList()) + val metalDriver = SimpleBareMetalDriver(driverDom, UUID.randomUUID(), "test", emptyMap(), cpus, emptyList()) metalDriver.init() metalDriver.setImage(vmm) diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc18/Sc18EnvironmentReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc18/Sc18EnvironmentReader.kt index ab9f272f..6f6aa616 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc18/Sc18EnvironmentReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc18/Sc18EnvironmentReader.kt @@ -77,7 +77,14 @@ class Sc18EnvironmentReader(input: InputStream, mapper: ObjectMapper = jacksonOb else -> throw IllegalArgumentException("The cpu id $id is not recognized") } } - SimpleBareMetalDriver(dom.newDomain("node-$counter"), UUID.randomUUID(), "node-${counter++}", cores, listOf(MemoryUnit("", "", 2300.0, 16000))) + SimpleBareMetalDriver( + dom.newDomain("node-$counter"), + UUID.randomUUID(), + "node-${counter++}", + emptyMap(), + cores, + listOf(MemoryUnit("", "", 2300.0, 16000)) + ) } } } diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20ClusterEnvironmentReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20ClusterEnvironmentReader.kt index c6a393e1..708e27bf 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20ClusterEnvironmentReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20ClusterEnvironmentReader.kt @@ -28,6 +28,7 @@ import com.atlarge.odcsim.Domain import com.atlarge.opendc.compute.core.MemoryUnit import com.atlarge.opendc.compute.core.ProcessingNode import com.atlarge.opendc.compute.core.ProcessingUnit +import com.atlarge.opendc.compute.metal.NODE_CLUSTER import com.atlarge.opendc.compute.metal.driver.SimpleBareMetalDriver import com.atlarge.opendc.compute.metal.power.LinearLoadPowerModel import com.atlarge.opendc.compute.metal.service.ProvisioningService @@ -100,13 +101,14 @@ class Sc20ClusterEnvironmentReader( dom.newDomain("node-$clusterId-$it"), UUID.randomUUID(), "node-$clusterId-$it", + mapOf(NODE_CLUSTER to clusterId), List(coresPerHost) { coreId -> ProcessingUnit(unknownProcessingNode, coreId, speed) }, - listOf(unknownMemoryUnit), // For now we assume a simple linear load model with an idle draw of ~200W and a maximum // power draw of 350W. // Source: https://stackoverflow.com/questions/6128960 + listOf(unknownMemoryUnit), LinearLoadPowerModel(200.0, 350.0) ) ) diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20EnvironmentReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20EnvironmentReader.kt index 07309341..4b5d6fb7 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20EnvironmentReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/environment/sc20/Sc20EnvironmentReader.kt @@ -85,11 +85,12 @@ class Sc20EnvironmentReader(input: InputStream, mapper: ObjectMapper = jacksonOb dom.newDomain("node-$counter"), UUID.randomUUID(), "node-${counter++}", + emptyMap(), cores, - memories, // For now we assume a simple linear load model with an idle draw of ~200W and a maximum // power draw of 350W. // Source: https://stackoverflow.com/questions/6128960 + memories, LinearLoadPowerModel(200.0, 350.0) ) } -- cgit v1.2.3 From 7eb8177e2278bde2c0f4fad00af6fdd2d632cb5b Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 26 Mar 2020 12:37:54 +0100 Subject: feat: Implement correlated failures for individual clusters --- .../compute/metal/driver/SimpleBareMetalDriver.kt | 2 ++ .../opendc/core/failure/CorrelatedFaultInjector.kt | 5 ++-- opendc/opendc-experiments-sc20/build.gradle.kts | 2 ++ .../opendc/experiments/sc20/TestExperiment.kt | 33 +++++++++++++++------- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 834e683d..4a40dc9f 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -301,4 +301,6 @@ public class SimpleBareMetalDriver( serverContext?.cancel(fail = true) domain.cancel() } + + override fun toString(): String = "SimpleBareMetalDriver(node = ${nodeState.value.uid})" } diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt index 41412195..da4dee12 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt @@ -70,7 +70,6 @@ public class CorrelatedFaultInjector( // Clean up the domain if it finishes domain.scope.coroutineContext[Job]!!.invokeOnCompletion { this@CorrelatedFaultInjector.domain.launch { - println("CANCELLED") active -= domain if (active.isEmpty()) { @@ -88,7 +87,8 @@ public class CorrelatedFaultInjector( while (true) { ensureActive() - val d = lognvariate(iatScale, iatShape) * 1e3 // Make sure to convert delay to milliseconds + // Make sure to convert delay from hours to milliseconds + val d = lognvariate(iatScale, iatShape) * 3600 * 1e6 // Handle long overflow if (simulationContext.clock.millis() + d <= 0) { @@ -98,7 +98,6 @@ public class CorrelatedFaultInjector( delay(d.toLong()) val n = lognvariate(sizeScale, sizeShape).toInt() - for (failureDomain in active.shuffled(random).take(n)) { failureDomain.fail() } diff --git a/opendc/opendc-experiments-sc20/build.gradle.kts b/opendc/opendc-experiments-sc20/build.gradle.kts index d3b37336..28b8ae12 100644 --- a/opendc/opendc-experiments-sc20/build.gradle.kts +++ b/opendc/opendc-experiments-sc20/build.gradle.kts @@ -41,7 +41,9 @@ dependencies { implementation("com.xenomachina:kotlin-argparser:2.0.7") api("com.fasterxml.jackson.module:jackson-module-kotlin:2.9.8") + runtimeOnly("org.slf4j:slf4j-simple:${Library.SLF4J}") runtimeOnly(project(":odcsim:odcsim-engine-omega")) + testImplementation("org.junit.jupiter:junit-jupiter-api:${Library.JUNIT_JUPITER}") testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:${Library.JUNIT_JUPITER}") testImplementation("org.junit.platform:junit-platform-launcher:${Library.JUNIT_PLATFORM}") diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 66b20bff..639c3aef 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -24,15 +24,19 @@ package com.atlarge.opendc.experiments.sc20 +import com.atlarge.odcsim.Domain import com.atlarge.odcsim.SimulationEngineProvider import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Flavor import com.atlarge.opendc.compute.core.ServerEvent +import com.atlarge.opendc.compute.metal.NODE_CLUSTER import com.atlarge.opendc.compute.metal.service.ProvisioningService import com.atlarge.opendc.compute.virt.HypervisorEvent import com.atlarge.opendc.compute.virt.service.SimpleVirtProvisioningService import com.atlarge.opendc.compute.virt.service.allocation.AvailableMemoryAllocationPolicy import com.atlarge.opendc.core.failure.CorrelatedFaultInjector +import com.atlarge.opendc.core.failure.FailureDomain +import com.atlarge.opendc.core.failure.FaultInjector import com.atlarge.opendc.format.environment.sc20.Sc20ClusterEnvironmentReader import com.atlarge.opendc.format.trace.sc20.Sc20PerformanceInterferenceReader import com.atlarge.opendc.format.trace.sc20.Sc20TraceReader @@ -84,6 +88,17 @@ class ExperimentParameters(parser: ArgParser) { } } +/** + * Obtain the [FaultInjector] to use for the experiments. + */ +fun createFaultInjector(domain: Domain): FaultInjector { + // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009 + return CorrelatedFaultInjector(domain, + iatScale = -1.39, iatShape = 1.03, + sizeScale = 1.88, sizeShape = 1.25 + ) +} + /** * Main entry point of the experiment. */ @@ -138,17 +153,15 @@ fun main(args: Array) { .launchIn(this) } - val faultInjectorDomain = root.newDomain(name = "failures") - faultInjectorDomain.launch { + root.newDomain(name = "failures").launch { chan.receive() - // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009 - val faultInjector = CorrelatedFaultInjector(faultInjectorDomain, - iatScale = -1.39, iatShape = 1.03, - sizeScale = 1.88, sizeShape = 1.25 - ) - // for (node in bareMetalProvisioner.nodes()) { - // faultInjector.enqueue(node.metadata["driver"] as FailureDomain) - // } + val injectors = mutableMapOf() + + for (node in bareMetalProvisioner.nodes()) { + val cluster = node.metadata[NODE_CLUSTER] as String + val injector = injectors.getOrPut(cluster) { createFaultInjector(simulationContext.domain) } + injector.enqueue(node.metadata["driver"] as FailureDomain) + } } val reader = Sc20TraceReader(File(traceDirectory), performanceInterferenceModel, getSelectedVmList()) -- cgit v1.2.3