From c4016fcfd37550b237f6940eaffb5b4efd607601 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 3 Apr 2020 17:05:05 +0200 Subject: feat: Add initial prototype for failure recovery --- .../opendc/experiments/sc20/TestExperiment.kt | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'opendc/opendc-experiments-sc20/src') diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index cc403e6e..400cef33 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -52,6 +52,7 @@ import com.fasterxml.jackson.module.kotlin.readValue import com.xenomachina.argparser.ArgParser import com.xenomachina.argparser.default import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.collect @@ -63,6 +64,7 @@ import java.io.File import java.io.FileReader import java.util.ServiceLoader import kotlin.math.max +import kotlin.random.Random class ExperimentParameters(parser: ArgParser) { val traceDirectory by parser.storing("path to the trace directory") @@ -100,11 +102,13 @@ class ExperimentParameters(parser: ArgParser) { /** * Obtain the [FaultInjector] to use for the experiments. */ -fun createFaultInjector(domain: Domain): FaultInjector { +fun createFaultInjector(domain: Domain, random: Random): FaultInjector { // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009 return CorrelatedFaultInjector(domain, iatScale = -1.39, iatShape = 1.03, - sizeScale = 1.88, sizeShape = 1.25 + sizeScale = 1.88, sizeShape = 1.25, + dScale = 1.88, dShape = 1.25, + random = random ) } @@ -202,18 +206,22 @@ fun main(args: Array) { .launchIn(this) } - if (failures) { - println("ENABLE Failures") - root.newDomain(name = "failures").launch { + val failureDomain = if (failures) { + println("ENABLING failures") + val domain = root.newDomain(name = "failures") + domain.launch { chan.receive() + val random = Random(0) val injectors = mutableMapOf() - for (node in bareMetalProvisioner.nodes()) { val cluster = node.metadata[NODE_CLUSTER] as String - val injector = injectors.getOrPut(cluster) { createFaultInjector(simulationContext.domain) } + val injector = injectors.getOrPut(cluster) { createFaultInjector(simulationContext.domain, random) } injector.enqueue(node.metadata["driver"] as FailureDomain) } } + domain + } else { + null } val running = mutableSetOf() @@ -250,6 +258,7 @@ fun main(args: Array) { finish.receive() scheduler.terminate() + failureDomain?.cancel() println(simulationContext.clock.instant()) } -- cgit v1.2.3 From 993da5586c23a8cf9c29f5970cc84284e847b408 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 3 Apr 2020 20:12:24 +0200 Subject: feat: Fix failure duration parameters --- .../com/atlarge/opendc/experiments/sc20/TestExperiment.kt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'opendc/opendc-experiments-sc20/src') diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 400cef33..ca7e31ea 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -104,10 +104,11 @@ class ExperimentParameters(parser: ArgParser) { */ fun createFaultInjector(domain: Domain, random: Random): FaultInjector { // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009 + // GRID'5000 return CorrelatedFaultInjector(domain, - iatScale = -1.39, iatShape = 1.03, + iatScale = -1.39, iatShape = 1.03, // Hours sizeScale = 1.88, sizeShape = 1.25, - dScale = 1.88, dShape = 1.25, + dScale = 9.51, dShape = 3.21, // Minutes random = random ) } @@ -245,11 +246,12 @@ fun main(args: Array) { monitor.onVmStateChanged(it.server) // Detect whether the VM has finished running - if (it.server.state == ServerState.ERROR || it.server.state == ServerState.SHUTOFF) { + if (it.server.state == ServerState.SHUTOFF) { running -= server + } - if (running.isEmpty() && (!reader.hasNext() || availableHypervisors == 0)) - finish.send(Unit) + if (running.isEmpty() && !reader.hasNext()) { + finish.send(Unit) } } .collect() -- cgit v1.2.3 From 1f67deb18d1430931aec955e7c129cb0d714718c Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 3 Apr 2020 20:50:33 +0200 Subject: feat: Report hypervisor state every slice --- .../com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'opendc/opendc-experiments-sc20/src') diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index 120c4f81..212b1bfb 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -14,18 +14,13 @@ class Sc20Monitor( destination: String ) : Closeable { private val outputFile = BufferedWriter(FileWriter(destination)) - private var failedInSlice: Int = 0 private val lastServerStates = mutableMapOf>() init { - outputFile.write("time,duration,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw,failedVms\n") + outputFile.write("time,duration,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostState,hostUsage,powerDraw,failedVms\n") } - suspend fun onVmStateChanged(server: Server) { - if (server.state == ServerState.ERROR) { - failedInSlice++ - } - } + suspend fun onVmStateChanged(server: Server) {} suspend fun serverStateChanged(driver: VirtDriver, server: Server) { if ((server.state == ServerState.SHUTOFF || server.state == ServerState.ERROR) && @@ -61,12 +56,8 @@ class Sc20Monitor( val driver = hostServer.services[BareMetalDriver.Key] val usage = driver.usage.first() val powerDraw = driver.powerDraw.first() - val failed = if (failedInSlice > 0) { - failedInSlice.also { failedInSlice = 0 } - } else { - 0 - } - outputFile.write("$time,$duration,$requestedBurst,$grantedBurst,$numberOfDeployedImages,${hostServer.uid},$usage,$powerDraw,$failed") + + outputFile.write("$time,$duration,$requestedBurst,$grantedBurst,$numberOfDeployedImages,${hostServer.uid},${hostServer.state},$usage,$powerDraw") outputFile.newLine() } -- cgit v1.2.3