summaryrefslogtreecommitdiff
path: root/opendc-simulator/opendc-simulator-failures
diff options
context:
space:
mode:
Diffstat (limited to 'opendc-simulator/opendc-simulator-failures')
-rw-r--r--opendc-simulator/opendc-simulator-failures/build.gradle.kts32
-rw-r--r--opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/CorrelatedFaultInjector.kt129
-rw-r--r--opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FailureDomain.kt47
-rw-r--r--opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FaultInjector.kt33
-rw-r--r--opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/UncorrelatedFaultInjector.kt61
5 files changed, 302 insertions, 0 deletions
diff --git a/opendc-simulator/opendc-simulator-failures/build.gradle.kts b/opendc-simulator/opendc-simulator-failures/build.gradle.kts
new file mode 100644
index 00000000..0f6b2de2
--- /dev/null
+++ b/opendc-simulator/opendc-simulator-failures/build.gradle.kts
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+description = "Failure models for OpenDC"
+
+plugins {
+ `kotlin-library-conventions`
+}
+
+dependencies {
+ api(platform(project(":opendc-platform")))
+ api("org.jetbrains.kotlinx:kotlinx-coroutines-core")
+}
diff --git a/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/CorrelatedFaultInjector.kt b/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/CorrelatedFaultInjector.kt
new file mode 100644
index 00000000..0e15f338
--- /dev/null
+++ b/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/CorrelatedFaultInjector.kt
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2020 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.simulator.failures
+
+import kotlinx.coroutines.*
+import java.time.Clock
+import kotlin.math.exp
+import kotlin.math.max
+import kotlin.random.Random
+import kotlin.random.asJavaRandom
+
+/**
+ * A [FaultInjector] that injects fault in the system which are correlated to each other. Failures do not occur in
+ * isolation, but will trigger other faults.
+ */
+public class CorrelatedFaultInjector(
+ private val coroutineScope: CoroutineScope,
+ private val clock: Clock,
+ private val iatScale: Double,
+ private val iatShape: Double,
+ private val sizeScale: Double,
+ private val sizeShape: Double,
+ private val dScale: Double,
+ private val dShape: Double,
+ random: Random = Random(0)
+) : FaultInjector {
+ /**
+ * The active failure domains that have been registered.
+ */
+ private val active = mutableSetOf<FailureDomain>()
+
+ /**
+ * The [Job] that awaits the nearest fault in the system.
+ */
+ private var job: Job? = null
+
+ /**
+ * The [Random] instance to use.
+ */
+ private val random: java.util.Random = random.asJavaRandom()
+
+ /**
+ * Enqueue the specified [FailureDomain] to fail some time in the future.
+ */
+ override fun enqueue(domain: FailureDomain) {
+ active += domain
+
+ // Clean up the domain if it finishes
+ domain.scope.coroutineContext[Job]!!.invokeOnCompletion {
+ this@CorrelatedFaultInjector.coroutineScope.launch {
+ active -= domain
+
+ if (active.isEmpty()) {
+ job?.cancel()
+ job = null
+ }
+ }
+ }
+
+ if (job != null) {
+ return
+ }
+
+ job = this.coroutineScope.launch {
+ while (active.isNotEmpty()) {
+ ensureActive()
+
+ // Make sure to convert delay from hours to milliseconds
+ val d = lognvariate(iatScale, iatShape) * 3.6e6
+
+ // Handle long overflow
+ if (clock.millis() + d <= 0) {
+ return@launch
+ }
+
+ delay(d.toLong())
+
+ val n = lognvariate(sizeScale, sizeShape).toInt()
+ val targets = active.shuffled(random).take(n)
+
+ for (failureDomain in targets) {
+ active -= failureDomain
+ failureDomain.fail()
+ }
+
+ val df = max(lognvariate(dScale, dShape) * 6e4, 15 * 6e4)
+
+ // Handle long overflow
+ if (clock.millis() + df <= 0) {
+ return@launch
+ }
+
+ delay(df.toLong())
+
+ for (failureDomain in targets) {
+ failureDomain.recover()
+
+ // Re-enqueue machine to be failed
+ enqueue(failureDomain)
+ }
+ }
+
+ job = null
+ }
+ }
+
+ // XXX We should extract this in some common package later on.
+ private fun lognvariate(scale: Double, shape: Double) = exp(scale + shape * random.nextGaussian())
+}
diff --git a/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FailureDomain.kt b/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FailureDomain.kt
new file mode 100644
index 00000000..dc3006e8
--- /dev/null
+++ b/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FailureDomain.kt
@@ -0,0 +1,47 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2020 atlarge-research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.simulator.failures
+
+import kotlinx.coroutines.CoroutineScope
+
+/**
+ * A logical or physical component in a computing environment which may fail.
+ */
+public interface FailureDomain {
+ /**
+ * The lifecycle of the failure domain to which a [FaultInjector] will attach.
+ */
+ public val scope: CoroutineScope
+
+ /**
+ * Fail the domain externally.
+ */
+ public suspend fun fail()
+
+ /**
+ * Resume the failure domain.
+ */
+ public suspend fun recover()
+}
diff --git a/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FaultInjector.kt b/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FaultInjector.kt
new file mode 100644
index 00000000..a866260c
--- /dev/null
+++ b/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FaultInjector.kt
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.simulator.failures
+
+/**
+ * An interface for stochastically injecting faults into a running system.
+ */
+public interface FaultInjector {
+ /**
+ * Enqueue the specified [FailureDomain] into the queue as candidate for failure injection in the future.
+ */
+ public fun enqueue(domain: FailureDomain)
+}
diff --git a/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/UncorrelatedFaultInjector.kt b/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/UncorrelatedFaultInjector.kt
new file mode 100644
index 00000000..b3bd737e
--- /dev/null
+++ b/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/UncorrelatedFaultInjector.kt
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.simulator.failures
+
+import kotlinx.coroutines.delay
+import kotlinx.coroutines.launch
+import java.time.Clock
+import kotlin.math.ln1p
+import kotlin.math.pow
+import kotlin.random.Random
+
+/**
+ * A [FaultInjector] that injects uncorrelated faults into the system, meaning that failures of the subsystems are
+ * independent.
+ */
+public class UncorrelatedFaultInjector(
+ private val clock: Clock,
+ private val alpha: Double,
+ private val beta: Double,
+ private val random: Random = Random(0)
+) : FaultInjector {
+ /**
+ * Enqueue the specified [FailureDomain] to fail some time in the future.
+ */
+ override fun enqueue(domain: FailureDomain) {
+ domain.scope.launch {
+ val d = random.weibull(alpha, beta) * 1e3 // Make sure to convert delay to milliseconds
+
+ // Handle long overflow
+ if (clock.millis() + d <= 0) {
+ return@launch
+ }
+
+ delay(d.toLong())
+ domain.fail()
+ }
+ }
+
+ // XXX We should extract this in some common package later on.
+ private fun Random.weibull(alpha: Double, beta: Double) = (beta * (-ln1p(-nextDouble())).pow(1.0 / alpha))
+}