From c4016fcfd37550b237f6940eaffb5b4efd607601 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 3 Apr 2020 17:05:05 +0200 Subject: feat: Add initial prototype for failure recovery --- .../opendc/core/failure/CorrelatedFaultInjector.kt | 24 ++++++++++++++++++++-- .../atlarge/opendc/core/failure/FailureDomain.kt | 5 +++++ 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'opendc/opendc-core/src') diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt index c5189764..f363bf45 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/CorrelatedFaultInjector.kt @@ -44,6 +44,8 @@ public class CorrelatedFaultInjector( private val iatShape: Double, private val sizeScale: Double, private val sizeShape: Double, + private val dScale: Double, + private val dShape: Double, random: Random = Random(0) ) : FaultInjector { /** @@ -84,7 +86,7 @@ public class CorrelatedFaultInjector( } job = this.domain.launch { - while (true) { + while (active.isNotEmpty()) { ensureActive() // Make sure to convert delay from hours to milliseconds @@ -98,10 +100,28 @@ public class CorrelatedFaultInjector( delay(d.toLong()) val n = lognvariate(sizeScale, sizeShape).toInt() - for (failureDomain in active.shuffled(random).take(n)) { + val targets = active.shuffled(random).take(n) + for (failureDomain in targets) { + active -= failureDomain failureDomain.fail() } + + val df = lognvariate(dScale, dShape) * 3600 * 1e6 + + // Handle long overflow + if (simulationContext.clock.millis() + df <= 0) { + return@launch + } + + delay(df.toLong()) + + for (failureDomain in targets) { + failureDomain.recover() + enqueue(failureDomain) + } } + + job = null } } diff --git a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt index 91ca9b83..d56df3c9 100644 --- a/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt +++ b/opendc/opendc-core/src/main/kotlin/com/atlarge/opendc/core/failure/FailureDomain.kt @@ -39,4 +39,9 @@ public interface FailureDomain { * Fail the domain externally. */ public suspend fun fail() + + /** + * Resume the failure domain. + */ + public suspend fun recover() } -- cgit v1.2.3