diff options
| author | Georgios Andreadis <g.andreadis@student.tudelft.nl> | 2020-04-03 22:13:35 +0200 |
|---|---|---|
| committer | Georgios Andreadis <g.andreadis@student.tudelft.nl> | 2020-04-03 22:13:35 +0200 |
| commit | 6fc21e6df6345c6ec029cc50674352949af83510 (patch) | |
| tree | ce62c5f919fb45e7bf00800bab86defb3521a18c /opendc/opendc-experiments-sc20/src | |
| parent | a625066b997cfeeb31c88dddeb17fc67ea75d6e6 (diff) | |
| parent | 1f67deb18d1430931aec955e7c129cb0d714718c (diff) | |
Merge branch 'feat/failure-recovery' into '2.x'
Add initial prototype for failure recovery
See merge request opendc/opendc-simulator!51
Diffstat (limited to 'opendc/opendc-experiments-sc20/src')
2 files changed, 26 insertions, 24 deletions
diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index 120c4f81..212b1bfb 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -14,18 +14,13 @@ class Sc20Monitor( destination: String ) : Closeable { private val outputFile = BufferedWriter(FileWriter(destination)) - private var failedInSlice: Int = 0 private val lastServerStates = mutableMapOf<Server, Pair<ServerState, Long>>() init { - outputFile.write("time,duration,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostUsage,powerDraw,failedVms\n") + outputFile.write("time,duration,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostState,hostUsage,powerDraw,failedVms\n") } - suspend fun onVmStateChanged(server: Server) { - if (server.state == ServerState.ERROR) { - failedInSlice++ - } - } + suspend fun onVmStateChanged(server: Server) {} suspend fun serverStateChanged(driver: VirtDriver, server: Server) { if ((server.state == ServerState.SHUTOFF || server.state == ServerState.ERROR) && @@ -61,12 +56,8 @@ class Sc20Monitor( val driver = hostServer.services[BareMetalDriver.Key] val usage = driver.usage.first() val powerDraw = driver.powerDraw.first() - val failed = if (failedInSlice > 0) { - failedInSlice.also { failedInSlice = 0 } - } else { - 0 - } - outputFile.write("$time,$duration,$requestedBurst,$grantedBurst,$numberOfDeployedImages,${hostServer.uid},$usage,$powerDraw,$failed") + + outputFile.write("$time,$duration,$requestedBurst,$grantedBurst,$numberOfDeployedImages,${hostServer.uid},${hostServer.state},$usage,$powerDraw") outputFile.newLine() } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index cc403e6e..ca7e31ea 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -52,6 +52,7 @@ import com.fasterxml.jackson.module.kotlin.readValue import com.xenomachina.argparser.ArgParser import com.xenomachina.argparser.default import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.delay import kotlinx.coroutines.flow.collect @@ -63,6 +64,7 @@ import java.io.File import java.io.FileReader import java.util.ServiceLoader import kotlin.math.max +import kotlin.random.Random class ExperimentParameters(parser: ArgParser) { val traceDirectory by parser.storing("path to the trace directory") @@ -100,11 +102,14 @@ class ExperimentParameters(parser: ArgParser) { /** * Obtain the [FaultInjector] to use for the experiments. */ -fun createFaultInjector(domain: Domain): FaultInjector { +fun createFaultInjector(domain: Domain, random: Random): FaultInjector { // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009 + // GRID'5000 return CorrelatedFaultInjector(domain, - iatScale = -1.39, iatShape = 1.03, - sizeScale = 1.88, sizeShape = 1.25 + iatScale = -1.39, iatShape = 1.03, // Hours + sizeScale = 1.88, sizeShape = 1.25, + dScale = 9.51, dShape = 3.21, // Minutes + random = random ) } @@ -202,18 +207,22 @@ fun main(args: Array<String>) { .launchIn(this) } - if (failures) { - println("ENABLE Failures") - root.newDomain(name = "failures").launch { + val failureDomain = if (failures) { + println("ENABLING failures") + val domain = root.newDomain(name = "failures") + domain.launch { chan.receive() + val random = Random(0) val injectors = mutableMapOf<String, FaultInjector>() - for (node in bareMetalProvisioner.nodes()) { val cluster = node.metadata[NODE_CLUSTER] as String - val injector = injectors.getOrPut(cluster) { createFaultInjector(simulationContext.domain) } + val injector = injectors.getOrPut(cluster) { createFaultInjector(simulationContext.domain, random) } injector.enqueue(node.metadata["driver"] as FailureDomain) } } + domain + } else { + null } val running = mutableSetOf<Server>() @@ -237,11 +246,12 @@ fun main(args: Array<String>) { monitor.onVmStateChanged(it.server) // Detect whether the VM has finished running - if (it.server.state == ServerState.ERROR || it.server.state == ServerState.SHUTOFF) { + if (it.server.state == ServerState.SHUTOFF) { running -= server + } - if (running.isEmpty() && (!reader.hasNext() || availableHypervisors == 0)) - finish.send(Unit) + if (running.isEmpty() && !reader.hasNext()) { + finish.send(Unit) } } .collect() @@ -250,6 +260,7 @@ fun main(args: Array<String>) { finish.receive() scheduler.terminate() + failureDomain?.cancel() println(simulationContext.clock.instant()) } |
