summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2020-03-26 12:37:54 +0100
committerFabian Mastenbroek <mail.fabianm@gmail.com>2020-03-26 12:41:10 +0100
commit7eb8177e2278bde2c0f4fad00af6fdd2d632cb5b (patch)
treef5f7ffdce8efdcffb92e158ebbb643ba1a797b23
parentb3d11a0740f9a925f9cebd524863668fb9b07000 (diff)
feat: Implement correlated failures for individual clusters
-rw-r--r--opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt2
-rw-r--r--opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt5
-rw-r--r--opendc/opendc-experiments-sc20/build.gradle.kts2
-rw-r--r--opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt33
4 files changed, 29 insertions, 13 deletions
diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt
index 834e683d..4a40dc9f 100644
--- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt
+++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt
@@ -301,4 +301,6 @@ public class SimpleBareMetalDriver(
serverContext?.cancel(fail = true)
domain.cancel()
}
+
+ override fun toString(): String = "SimpleBareMetalDriver(node = ${nodeState.value.uid})"
}
diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt
index 41412195..da4dee12 100644
--- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt
+++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt
@@ -70,7 +70,6 @@ public class CorrelatedFaultInjector(
// Clean up the domain if it finishes
domain.scope.coroutineContext[Job]!!.invokeOnCompletion {
this@CorrelatedFaultInjector.domain.launch {
- println("CANCELLED")
active -= domain
if (active.isEmpty()) {
@@ -88,7 +87,8 @@ public class CorrelatedFaultInjector(
while (true) {
ensureActive()
- val d = lognvariate(iatScale, iatShape) * 1e3 // Make sure to convert delay to milliseconds
+ // Make sure to convert delay from hours to milliseconds
+ val d = lognvariate(iatScale, iatShape) * 3600 * 1e6
// Handle long overflow
if (simulationContext.clock.millis() + d <= 0) {
@@ -98,7 +98,6 @@ public class CorrelatedFaultInjector(
delay(d.toLong())
val n = lognvariate(sizeScale, sizeShape).toInt()
-
for (failureDomain in active.shuffled(random).take(n)) {
failureDomain.fail()
}
diff --git a/opendc/opendc-experiments-sc20/build.gradle.kts b/opendc/opendc-experiments-sc20/build.gradle.kts
index d3b37336..28b8ae12 100644
--- a/opendc/opendc-experiments-sc20/build.gradle.kts
+++ b/opendc/opendc-experiments-sc20/build.gradle.kts
@@ -41,7 +41,9 @@ dependencies {
implementation("com.xenomachina:kotlin-argparser:2.0.7")
api("com.fasterxml.jackson.module:jackson-module-kotlin:2.9.8")
+ runtimeOnly("org.slf4j:slf4j-simple:${Library.SLF4J}")
runtimeOnly(project(":odcsim:odcsim-engine-omega"))
+
testImplementation("org.junit.jupiter:junit-jupiter-api:${Library.JUNIT_JUPITER}")
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:${Library.JUNIT_JUPITER}")
testImplementation("org.junit.platform:junit-platform-launcher:${Library.JUNIT_PLATFORM}")
diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt
index 66b20bff..639c3aef 100644
--- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt
+++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt
@@ -24,15 +24,19 @@
package com.atlarge.opendc.experiments.sc20
+import com.atlarge.odcsim.Domain
import com.atlarge.odcsim.SimulationEngineProvider
import com.atlarge.odcsim.simulationContext
import com.atlarge.opendc.compute.core.Flavor
import com.atlarge.opendc.compute.core.ServerEvent
+import com.atlarge.opendc.compute.metal.NODE_CLUSTER
import com.atlarge.opendc.compute.metal.service.ProvisioningService
import com.atlarge.opendc.compute.virt.HypervisorEvent
import com.atlarge.opendc.compute.virt.service.SimpleVirtProvisioningService
import com.atlarge.opendc.compute.virt.service.allocation.AvailableMemoryAllocationPolicy
import com.atlarge.opendc.core.failure.CorrelatedFaultInjector
+import com.atlarge.opendc.core.failure.FailureDomain
+import com.atlarge.opendc.core.failure.FaultInjector
import com.atlarge.opendc.format.environment.sc20.Sc20ClusterEnvironmentReader
import com.atlarge.opendc.format.trace.sc20.Sc20PerformanceInterferenceReader
import com.atlarge.opendc.format.trace.sc20.Sc20TraceReader
@@ -85,6 +89,17 @@ class ExperimentParameters(parser: ArgParser) {
}
/**
+ * Obtain the [FaultInjector] to use for the experiments.
+ */
+fun createFaultInjector(domain: Domain): FaultInjector {
+ // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009
+ return CorrelatedFaultInjector(domain,
+ iatScale = -1.39, iatShape = 1.03,
+ sizeScale = 1.88, sizeShape = 1.25
+ )
+}
+
+/**
* Main entry point of the experiment.
*/
@OptIn(ExperimentalCoroutinesApi::class)
@@ -138,17 +153,15 @@ fun main(args: Array<String>) {
.launchIn(this)
}
- val faultInjectorDomain = root.newDomain(name = "failures")
- faultInjectorDomain.launch {
+ root.newDomain(name = "failures").launch {
chan.receive()
- // Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009
- val faultInjector = CorrelatedFaultInjector(faultInjectorDomain,
- iatScale = -1.39, iatShape = 1.03,
- sizeScale = 1.88, sizeShape = 1.25
- )
- // for (node in bareMetalProvisioner.nodes()) {
- // faultInjector.enqueue(node.metadata["driver"] as FailureDomain)
- // }
+ val injectors = mutableMapOf<String, FaultInjector>()
+
+ for (node in bareMetalProvisioner.nodes()) {
+ val cluster = node.metadata[NODE_CLUSTER] as String
+ val injector = injectors.getOrPut(cluster) { createFaultInjector(simulationContext.domain) }
+ injector.enqueue(node.metadata["driver"] as FailureDomain)
+ }
}
val reader = Sc20TraceReader(File(traceDirectory), performanceInterferenceModel, getSelectedVmList())