From e06c953d1c71c704b351c81611e2e2bececf8e67 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 5 Oct 2020 12:12:32 +0200 Subject: Move failure models to separate module --- .../compute/core/metal/driver/BareMetalDriver.kt | 3 +- .../opendc-compute-simulator/build.gradle.kts | 1 + .../opendc/compute/simulator/SimBareMetalDriver.kt | 3 +- .../opendc/core/failure/CorrelatedFaultInjector.kt | 129 --------------------- .../org/opendc/core/failure/FailureDomain.kt | 47 -------- .../org/opendc/core/failure/FaultInjector.kt | 33 ------ .../core/failure/UncorrelatedFaultInjector.kt | 61 ---------- .../opendc-experiments-sc20/build.gradle.kts | 1 + .../sc20/experiment/ExperimentHelpers.kt | 6 +- .../opendc-simulator-failures/build.gradle.kts | 31 +++++ .../simulator/failures/CorrelatedFaultInjector.kt | 129 +++++++++++++++++++++ .../org/opendc/simulator/failures/FailureDomain.kt | 47 ++++++++ .../org/opendc/simulator/failures/FaultInjector.kt | 33 ++++++ .../failures/UncorrelatedFaultInjector.kt | 61 ++++++++++ simulator/settings.gradle.kts | 1 + 15 files changed, 310 insertions(+), 276 deletions(-) delete mode 100644 simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/CorrelatedFaultInjector.kt delete mode 100644 simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/FailureDomain.kt delete mode 100644 simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/FaultInjector.kt delete mode 100644 simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/UncorrelatedFaultInjector.kt create mode 100644 simulator/opendc-simulator/opendc-simulator-failures/build.gradle.kts create mode 100644 simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/CorrelatedFaultInjector.kt create mode 100644 simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FailureDomain.kt create mode 100644 simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FaultInjector.kt create mode 100644 simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/UncorrelatedFaultInjector.kt diff --git a/simulator/opendc-compute/opendc-compute-core/src/main/kotlin/org/opendc/compute/core/metal/driver/BareMetalDriver.kt b/simulator/opendc-compute/opendc-compute-core/src/main/kotlin/org/opendc/compute/core/metal/driver/BareMetalDriver.kt index 6f58aa20..8697a553 100644 --- a/simulator/opendc-compute/opendc-compute-core/src/main/kotlin/org/opendc/compute/core/metal/driver/BareMetalDriver.kt +++ b/simulator/opendc-compute/opendc-compute-core/src/main/kotlin/org/opendc/compute/core/metal/driver/BareMetalDriver.kt @@ -26,7 +26,6 @@ import kotlinx.coroutines.flow.Flow import org.opendc.compute.core.Server import org.opendc.compute.core.image.Image import org.opendc.compute.core.metal.Node -import org.opendc.core.failure.FailureDomain import org.opendc.core.power.Powerable import org.opendc.core.services.AbstractServiceKey import java.util.UUID @@ -34,7 +33,7 @@ import java.util.UUID /** * A driver interface for the management interface of a bare-metal compute node. */ -public interface BareMetalDriver : Powerable, FailureDomain { +public interface BareMetalDriver : Powerable { /** * The [Node] that is controlled by this driver. */ diff --git a/simulator/opendc-compute/opendc-compute-simulator/build.gradle.kts b/simulator/opendc-compute/opendc-compute-simulator/build.gradle.kts index e1ec6421..d7570e54 100644 --- a/simulator/opendc-compute/opendc-compute-simulator/build.gradle.kts +++ b/simulator/opendc-compute/opendc-compute-simulator/build.gradle.kts @@ -32,6 +32,7 @@ dependencies { implementation(project(":opendc-utils")) implementation("io.github.microutils:kotlin-logging:1.7.9") implementation(project(":opendc-simulator:opendc-simulator-compute")) + api(project(":opendc-simulator:opendc-simulator-failures")) testImplementation(project(":opendc-simulator:opendc-simulator-core")) testRuntimeOnly("org.slf4j:slf4j-simple:${Library.SLF4J}") diff --git a/simulator/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimBareMetalDriver.kt b/simulator/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimBareMetalDriver.kt index 08bb7e79..7b1f0af6 100644 --- a/simulator/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimBareMetalDriver.kt +++ b/simulator/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimBareMetalDriver.kt @@ -41,6 +41,7 @@ import org.opendc.simulator.compute.SimBareMetalMachine import org.opendc.simulator.compute.SimExecutionContext import org.opendc.simulator.compute.SimMachineModel import org.opendc.simulator.compute.workload.SimWorkload +import org.opendc.simulator.failures.FailureDomain import org.opendc.utils.flow.EventFlow import org.opendc.utils.flow.StateFlow import java.time.Clock @@ -67,7 +68,7 @@ public class SimBareMetalDriver( metadata: Map, machine: SimMachineModel, powerModel: PowerModel = ConstantPowerModel(0.0) -) : BareMetalDriver { +) : BareMetalDriver, FailureDomain { /** * The flavor that corresponds to this machine. */ diff --git a/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/CorrelatedFaultInjector.kt b/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/CorrelatedFaultInjector.kt deleted file mode 100644 index f3bd7f5a..00000000 --- a/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/CorrelatedFaultInjector.kt +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2020 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.core.failure - -import kotlinx.coroutines.* -import java.time.Clock -import kotlin.math.exp -import kotlin.math.max -import kotlin.random.Random -import kotlin.random.asJavaRandom - -/** - * A [FaultInjector] that injects fault in the system which are correlated to each other. Failures do not occur in - * isolation, but will trigger other faults. - */ -public class CorrelatedFaultInjector( - private val coroutineScope: CoroutineScope, - private val clock: Clock, - private val iatScale: Double, - private val iatShape: Double, - private val sizeScale: Double, - private val sizeShape: Double, - private val dScale: Double, - private val dShape: Double, - random: Random = Random(0) -) : FaultInjector { - /** - * The active failure domains that have been registered. - */ - private val active = mutableSetOf() - - /** - * The [Job] that awaits the nearest fault in the system. - */ - private var job: Job? = null - - /** - * The [Random] instance to use. - */ - private val random: java.util.Random = random.asJavaRandom() - - /** - * Enqueue the specified [FailureDomain] to fail some time in the future. - */ - override fun enqueue(domain: FailureDomain) { - active += domain - - // Clean up the domain if it finishes - domain.scope.coroutineContext[Job]!!.invokeOnCompletion { - this@CorrelatedFaultInjector.coroutineScope.launch { - active -= domain - - if (active.isEmpty()) { - job?.cancel() - job = null - } - } - } - - if (job != null) { - return - } - - job = this.coroutineScope.launch { - while (active.isNotEmpty()) { - ensureActive() - - // Make sure to convert delay from hours to milliseconds - val d = lognvariate(iatScale, iatShape) * 3.6e6 - - // Handle long overflow - if (clock.millis() + d <= 0) { - return@launch - } - - delay(d.toLong()) - - val n = lognvariate(sizeScale, sizeShape).toInt() - val targets = active.shuffled(random).take(n) - - for (failureDomain in targets) { - active -= failureDomain - failureDomain.fail() - } - - val df = max(lognvariate(dScale, dShape) * 6e4, 15 * 6e4) - - // Handle long overflow - if (clock.millis() + df <= 0) { - return@launch - } - - delay(df.toLong()) - - for (failureDomain in targets) { - failureDomain.recover() - - // Re-enqueue machine to be failed - enqueue(failureDomain) - } - } - - job = null - } - } - - // XXX We should extract this in some common package later on. - private fun lognvariate(scale: Double, shape: Double) = exp(scale + shape * random.nextGaussian()) -} diff --git a/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/FailureDomain.kt b/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/FailureDomain.kt deleted file mode 100644 index 0795b09a..00000000 --- a/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/FailureDomain.kt +++ /dev/null @@ -1,47 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 atlarge-research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.core.failure - -import kotlinx.coroutines.CoroutineScope - -/** - * A logical or physical component in a computing environment which may fail. - */ -public interface FailureDomain { - /** - * The lifecycle of the failure domain to which a [FaultInjector] will attach. - */ - public val scope: CoroutineScope - - /** - * Fail the domain externally. - */ - public suspend fun fail() - - /** - * Resume the failure domain. - */ - public suspend fun recover() -} diff --git a/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/FaultInjector.kt b/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/FaultInjector.kt deleted file mode 100644 index ad776641..00000000 --- a/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/FaultInjector.kt +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2020 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.core.failure - -/** - * An interface for stochastically injecting faults into a running system. - */ -public interface FaultInjector { - /** - * Enqueue the specified [FailureDomain] into the queue as candidate for failure injection in the future. - */ - public fun enqueue(domain: FailureDomain) -} diff --git a/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/UncorrelatedFaultInjector.kt b/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/UncorrelatedFaultInjector.kt deleted file mode 100644 index f64b8815..00000000 --- a/simulator/opendc-core/src/main/kotlin/org/opendc/core/failure/UncorrelatedFaultInjector.kt +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2020 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.core.failure - -import kotlinx.coroutines.delay -import kotlinx.coroutines.launch -import java.time.Clock -import kotlin.math.ln1p -import kotlin.math.pow -import kotlin.random.Random - -/** - * A [FaultInjector] that injects uncorrelated faults into the system, meaning that failures of the subsystems are - * independent. - */ -public class UncorrelatedFaultInjector( - private val clock: Clock, - private val alpha: Double, - private val beta: Double, - private val random: Random = Random(0) -) : FaultInjector { - /** - * Enqueue the specified [FailureDomain] to fail some time in the future. - */ - override fun enqueue(domain: FailureDomain) { - domain.scope.launch { - val d = random.weibull(alpha, beta) * 1e3 // Make sure to convert delay to milliseconds - - // Handle long overflow - if (clock.millis() + d <= 0) { - return@launch - } - - delay(d.toLong()) - domain.fail() - } - } - - // XXX We should extract this in some common package later on. - private fun Random.weibull(alpha: Double, beta: Double) = (beta * (-ln1p(-nextDouble())).pow(1.0 / alpha)) -} diff --git a/simulator/opendc-experiments/opendc-experiments-sc20/build.gradle.kts b/simulator/opendc-experiments/opendc-experiments-sc20/build.gradle.kts index d93c1713..3b682668 100644 --- a/simulator/opendc-experiments/opendc-experiments-sc20/build.gradle.kts +++ b/simulator/opendc-experiments/opendc-experiments-sc20/build.gradle.kts @@ -38,6 +38,7 @@ dependencies { implementation(project(":opendc-format")) implementation(project(":opendc-simulator:opendc-simulator-core")) implementation(project(":opendc-simulator:opendc-simulator-compute")) + implementation(project(":opendc-simulator:opendc-simulator-failures")) implementation(project(":opendc-compute:opendc-compute-simulator")) implementation("com.github.ajalt:clikt:2.6.0") diff --git a/simulator/opendc-experiments/opendc-experiments-sc20/src/main/kotlin/org/opendc/experiments/sc20/experiment/ExperimentHelpers.kt b/simulator/opendc-experiments/opendc-experiments-sc20/src/main/kotlin/org/opendc/experiments/sc20/experiment/ExperimentHelpers.kt index 805e34ee..59498c5b 100644 --- a/simulator/opendc-experiments/opendc-experiments-sc20/src/main/kotlin/org/opendc/experiments/sc20/experiment/ExperimentHelpers.kt +++ b/simulator/opendc-experiments/opendc-experiments-sc20/src/main/kotlin/org/opendc/experiments/sc20/experiment/ExperimentHelpers.kt @@ -43,14 +43,14 @@ import org.opendc.compute.core.workload.VmWorkload import org.opendc.compute.simulator.SimVirtDriver import org.opendc.compute.simulator.SimVirtProvisioningService import org.opendc.compute.simulator.allocation.AllocationPolicy -import org.opendc.core.failure.CorrelatedFaultInjector -import org.opendc.core.failure.FailureDomain -import org.opendc.core.failure.FaultInjector import org.opendc.experiments.sc20.experiment.monitor.ExperimentMonitor import org.opendc.experiments.sc20.trace.Sc20StreamingParquetTraceReader import org.opendc.format.environment.EnvironmentReader import org.opendc.format.trace.TraceReader import org.opendc.simulator.compute.interference.PerformanceInterferenceModel +import org.opendc.simulator.failures.CorrelatedFaultInjector +import org.opendc.simulator.failures.FailureDomain +import org.opendc.simulator.failures.FaultInjector import java.io.File import java.time.Clock import kotlin.math.ln diff --git a/simulator/opendc-simulator/opendc-simulator-failures/build.gradle.kts b/simulator/opendc-simulator/opendc-simulator-failures/build.gradle.kts new file mode 100644 index 00000000..1c30506f --- /dev/null +++ b/simulator/opendc-simulator/opendc-simulator-failures/build.gradle.kts @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2020 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +description = "Failure models for OpenDC" + +plugins { + `kotlin-library-convention` +} + +dependencies { + api("org.jetbrains.kotlinx:kotlinx-coroutines-core:${Library.KOTLINX_COROUTINES}") +} diff --git a/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/CorrelatedFaultInjector.kt b/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/CorrelatedFaultInjector.kt new file mode 100644 index 00000000..0e15f338 --- /dev/null +++ b/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/CorrelatedFaultInjector.kt @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2020 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.failures + +import kotlinx.coroutines.* +import java.time.Clock +import kotlin.math.exp +import kotlin.math.max +import kotlin.random.Random +import kotlin.random.asJavaRandom + +/** + * A [FaultInjector] that injects fault in the system which are correlated to each other. Failures do not occur in + * isolation, but will trigger other faults. + */ +public class CorrelatedFaultInjector( + private val coroutineScope: CoroutineScope, + private val clock: Clock, + private val iatScale: Double, + private val iatShape: Double, + private val sizeScale: Double, + private val sizeShape: Double, + private val dScale: Double, + private val dShape: Double, + random: Random = Random(0) +) : FaultInjector { + /** + * The active failure domains that have been registered. + */ + private val active = mutableSetOf() + + /** + * The [Job] that awaits the nearest fault in the system. + */ + private var job: Job? = null + + /** + * The [Random] instance to use. + */ + private val random: java.util.Random = random.asJavaRandom() + + /** + * Enqueue the specified [FailureDomain] to fail some time in the future. + */ + override fun enqueue(domain: FailureDomain) { + active += domain + + // Clean up the domain if it finishes + domain.scope.coroutineContext[Job]!!.invokeOnCompletion { + this@CorrelatedFaultInjector.coroutineScope.launch { + active -= domain + + if (active.isEmpty()) { + job?.cancel() + job = null + } + } + } + + if (job != null) { + return + } + + job = this.coroutineScope.launch { + while (active.isNotEmpty()) { + ensureActive() + + // Make sure to convert delay from hours to milliseconds + val d = lognvariate(iatScale, iatShape) * 3.6e6 + + // Handle long overflow + if (clock.millis() + d <= 0) { + return@launch + } + + delay(d.toLong()) + + val n = lognvariate(sizeScale, sizeShape).toInt() + val targets = active.shuffled(random).take(n) + + for (failureDomain in targets) { + active -= failureDomain + failureDomain.fail() + } + + val df = max(lognvariate(dScale, dShape) * 6e4, 15 * 6e4) + + // Handle long overflow + if (clock.millis() + df <= 0) { + return@launch + } + + delay(df.toLong()) + + for (failureDomain in targets) { + failureDomain.recover() + + // Re-enqueue machine to be failed + enqueue(failureDomain) + } + } + + job = null + } + } + + // XXX We should extract this in some common package later on. + private fun lognvariate(scale: Double, shape: Double) = exp(scale + shape * random.nextGaussian()) +} diff --git a/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FailureDomain.kt b/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FailureDomain.kt new file mode 100644 index 00000000..dc3006e8 --- /dev/null +++ b/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FailureDomain.kt @@ -0,0 +1,47 @@ +/* + * MIT License + * + * Copyright (c) 2020 atlarge-research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.failures + +import kotlinx.coroutines.CoroutineScope + +/** + * A logical or physical component in a computing environment which may fail. + */ +public interface FailureDomain { + /** + * The lifecycle of the failure domain to which a [FaultInjector] will attach. + */ + public val scope: CoroutineScope + + /** + * Fail the domain externally. + */ + public suspend fun fail() + + /** + * Resume the failure domain. + */ + public suspend fun recover() +} diff --git a/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FaultInjector.kt b/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FaultInjector.kt new file mode 100644 index 00000000..a866260c --- /dev/null +++ b/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/FaultInjector.kt @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.failures + +/** + * An interface for stochastically injecting faults into a running system. + */ +public interface FaultInjector { + /** + * Enqueue the specified [FailureDomain] into the queue as candidate for failure injection in the future. + */ + public fun enqueue(domain: FailureDomain) +} diff --git a/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/UncorrelatedFaultInjector.kt b/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/UncorrelatedFaultInjector.kt new file mode 100644 index 00000000..b3bd737e --- /dev/null +++ b/simulator/opendc-simulator/opendc-simulator-failures/src/main/kotlin/org/opendc/simulator/failures/UncorrelatedFaultInjector.kt @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.failures + +import kotlinx.coroutines.delay +import kotlinx.coroutines.launch +import java.time.Clock +import kotlin.math.ln1p +import kotlin.math.pow +import kotlin.random.Random + +/** + * A [FaultInjector] that injects uncorrelated faults into the system, meaning that failures of the subsystems are + * independent. + */ +public class UncorrelatedFaultInjector( + private val clock: Clock, + private val alpha: Double, + private val beta: Double, + private val random: Random = Random(0) +) : FaultInjector { + /** + * Enqueue the specified [FailureDomain] to fail some time in the future. + */ + override fun enqueue(domain: FailureDomain) { + domain.scope.launch { + val d = random.weibull(alpha, beta) * 1e3 // Make sure to convert delay to milliseconds + + // Handle long overflow + if (clock.millis() + d <= 0) { + return@launch + } + + delay(d.toLong()) + domain.fail() + } + } + + // XXX We should extract this in some common package later on. + private fun Random.weibull(alpha: Double, beta: Double) = (beta * (-ln1p(-nextDouble())).pow(1.0 / alpha)) +} diff --git a/simulator/settings.gradle.kts b/simulator/settings.gradle.kts index 418b00fb..935a18d0 100644 --- a/simulator/settings.gradle.kts +++ b/simulator/settings.gradle.kts @@ -31,4 +31,5 @@ include(":opendc-experiments:opendc-experiments-sc20") include(":opendc-runner-web") include(":opendc-simulator:opendc-simulator-core") include(":opendc-simulator:opendc-simulator-compute") +include(":opendc-simulator:opendc-simulator-failures") include(":opendc-utils") -- cgit v1.2.3