summaryrefslogtreecommitdiff
path: root/opendc-experiments/opendc-experiments-capelin/src
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2021-09-10 10:59:44 +0200
committerFabian Mastenbroek <mail.fabianm@gmail.com>2021-09-10 16:18:00 +0200
commitd24cc0cc9c4fe2145f0337d65e9a75f631365973 (patch)
treefef06ec8c23654b49eda2b15a3febf0a57a78aa5 /opendc-experiments/opendc-experiments-capelin/src
parent04945381d01d8c6e59befe6843f2c6f6da5e91bf (diff)
refactor(compute): Integrate fault injection into compute simulator
This change moves the fault injection logic directly into the opendc-compute-simulator module, so that it can operate at a higher abstraction. In the future, we might again split the module if we can re-use some of its logic.
Diffstat (limited to 'opendc-experiments/opendc-experiments-capelin/src')
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt61
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt17
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt41
3 files changed, 29 insertions, 90 deletions
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt
index 512b754d..8227bca9 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt
@@ -25,7 +25,6 @@ package org.opendc.experiments.capelin
import io.opentelemetry.api.metrics.MeterProvider
import io.opentelemetry.sdk.metrics.SdkMeterProvider
import kotlinx.coroutines.*
-import kotlinx.coroutines.channels.Channel
import org.apache.commons.math3.distribution.LogNormalDistribution
import org.apache.commons.math3.random.Well19937c
import org.opendc.compute.api.*
@@ -41,6 +40,9 @@ import org.opendc.compute.service.scheduler.weights.InstanceCountWeigher
import org.opendc.compute.service.scheduler.weights.RamWeigher
import org.opendc.compute.service.scheduler.weights.VCpuWeigher
import org.opendc.compute.simulator.SimHost
+import org.opendc.compute.simulator.failure.HostFaultInjector
+import org.opendc.compute.simulator.failure.StartStopHostFault
+import org.opendc.compute.simulator.failure.StochasticVictimSelector
import org.opendc.experiments.capelin.env.EnvironmentReader
import org.opendc.experiments.capelin.trace.TraceReader
import org.opendc.simulator.compute.kernel.SimFairShareHypervisorProvider
@@ -48,67 +50,36 @@ import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel
import org.opendc.simulator.compute.power.SimplePowerDriver
import org.opendc.simulator.compute.workload.SimTraceWorkload
import org.opendc.simulator.compute.workload.SimWorkload
-import org.opendc.simulator.failures.CorrelatedFaultInjector
-import org.opendc.simulator.failures.FaultInjector
import org.opendc.simulator.resources.SimResourceInterpreter
import org.opendc.telemetry.compute.ComputeMonitor
import org.opendc.telemetry.sdk.toOtelClock
import java.time.Clock
+import kotlin.coroutines.CoroutineContext
import kotlin.math.ln
import kotlin.math.max
import kotlin.random.Random
/**
- * Construct the failure domain for the experiments.
- */
-fun createFailureDomain(
- coroutineScope: CoroutineScope,
- clock: Clock,
- seed: Int,
- failureInterval: Double,
- service: ComputeService,
- chan: Channel<Unit>
-): CoroutineScope {
- val job = coroutineScope.launch {
- chan.receive()
- val random = Random(seed)
- val injectors = mutableMapOf<String, FaultInjector>()
- for (host in service.hosts) {
- val cluster = host.meta["cluster"] as String
- val injector =
- injectors.getOrPut(cluster) {
- createFaultInjector(
- this,
- clock,
- random,
- failureInterval
- )
- }
- injector.enqueue(host as SimHost)
- }
- }
- return CoroutineScope(coroutineScope.coroutineContext + job)
-}
-
-/**
* Obtain the [FaultInjector] to use for the experiments.
*/
fun createFaultInjector(
- coroutineScope: CoroutineScope,
+ context: CoroutineContext,
clock: Clock,
- random: Random,
+ hosts: Set<SimHost>,
+ seed: Int,
failureInterval: Double
-): FaultInjector {
- val rng = Well19937c(random.nextLong())
+): HostFaultInjector {
+ val rng = Well19937c(seed)
// Parameters from A. Iosup, A Framework for the Study of Grid Inter-Operation Mechanisms, 2009
// GRID'5000
- return CorrelatedFaultInjector(
- coroutineScope,
+ return HostFaultInjector(
+ context,
clock,
+ hosts,
iat = LogNormalDistribution(rng, ln(failureInterval), 1.03),
- size = LogNormalDistribution(rng, 1.88, 1.25),
- duration = LogNormalDistribution(rng, 8.89, 2.71)
+ selector = StochasticVictimSelector(LogNormalDistribution(rng, 1.88, 1.25), Random(seed)),
+ fault = StartStopHostFault(LogNormalDistribution(rng, 8.89, 2.71))
)
}
@@ -164,7 +135,6 @@ suspend fun processTrace(
clock: Clock,
reader: TraceReader<SimWorkload>,
scheduler: ComputeService,
- chan: Channel<Unit>,
monitor: ComputeMonitor? = null,
) {
val client = scheduler.newClient()
@@ -193,10 +163,9 @@ suspend fun processTrace(
delay(max(0, (entry.start - offset) - clock.millis()))
launch {
- chan.send(Unit)
-
val workloadOffset = -offset + 300001
val workload = SimTraceWorkload((entry.meta["workload"] as SimTraceWorkload).trace, workloadOffset)
+
val server = client.newServer(
entry.name,
image,
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
index 4db04591..82794471 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
@@ -25,9 +25,8 @@ package org.opendc.experiments.capelin
import com.typesafe.config.ConfigFactory
import io.opentelemetry.sdk.metrics.export.MetricProducer
import kotlinx.coroutines.ExperimentalCoroutinesApi
-import kotlinx.coroutines.cancel
-import kotlinx.coroutines.channels.Channel
import mu.KotlinLogging
+import org.opendc.compute.simulator.SimHost
import org.opendc.experiments.capelin.env.ClusterEnvironmentReader
import org.opendc.experiments.capelin.export.parquet.ParquetExportMonitor
import org.opendc.experiments.capelin.model.CompositeWorkload
@@ -103,7 +102,6 @@ abstract class Portfolio(name: String) : Experiment(name) {
val seeder = Random(repeat.toLong())
val environment = ClusterEnvironmentReader(File(config.getString("env-path"), "${topology.name}.txt"))
- val chan = Channel<Unit>(Channel.CONFLATED)
val allocationPolicy = createComputeScheduler(allocationPolicy, seeder.asKotlinRandom(), vmPlacements)
val meterProvider = createMeterProvider(clock)
@@ -137,31 +135,30 @@ abstract class Portfolio(name: String) : Experiment(name) {
)
withComputeService(clock, meterProvider, environment, allocationPolicy, performanceInterferenceModel) { scheduler ->
- val failureDomain = if (operationalPhenomena.failureFrequency > 0) {
+ val faultInjector = if (operationalPhenomena.failureFrequency > 0) {
logger.debug("ENABLING failures")
- createFailureDomain(
- this,
+ createFaultInjector(
+ coroutineContext,
clock,
+ scheduler.hosts.map { it as SimHost }.toSet(),
seeder.nextInt(),
operationalPhenomena.failureFrequency,
- scheduler,
- chan
)
} else {
null
}
withMonitor(scheduler, clock, meterProvider as MetricProducer, monitor) {
+ faultInjector?.start()
processTrace(
clock,
trace,
scheduler,
- chan,
monitor
)
}
- failureDomain?.cancel()
+ faultInjector?.close()
monitor.close()
}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt b/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
index ab33bc25..44cf92a8 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
@@ -23,8 +23,6 @@
package org.opendc.experiments.capelin
import io.opentelemetry.sdk.metrics.export.MetricProducer
-import kotlinx.coroutines.cancel
-import kotlinx.coroutines.channels.Channel
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
@@ -34,6 +32,7 @@ import org.opendc.compute.service.scheduler.filters.ComputeFilter
import org.opendc.compute.service.scheduler.filters.RamFilter
import org.opendc.compute.service.scheduler.filters.VCpuFilter
import org.opendc.compute.service.scheduler.weights.CoreRamWeigher
+import org.opendc.compute.simulator.SimHost
import org.opendc.experiments.capelin.env.ClusterEnvironmentReader
import org.opendc.experiments.capelin.env.EnvironmentReader
import org.opendc.experiments.capelin.model.Workload
@@ -73,9 +72,6 @@ class CapelinIntegrationTest {
*/
@Test
fun testLarge() = runBlockingSimulation {
- val failures = false
- val seed = 0
- val chan = Channel<Unit>(Channel.CONFLATED)
val allocationPolicy = FilterScheduler(
filters = listOf(ComputeFilter(), VCpuFilter(16.0), RamFilter(1.0)),
weighers = listOf(CoreRamWeigher(multiplier = 1.0))
@@ -85,31 +81,14 @@ class CapelinIntegrationTest {
val meterProvider = createMeterProvider(clock)
withComputeService(clock, meterProvider, environmentReader, allocationPolicy) { scheduler ->
- val failureDomain = if (failures) {
- println("ENABLING failures")
- createFailureDomain(
- this,
- clock,
- seed,
- 24.0 * 7,
- scheduler,
- chan
- )
- } else {
- null
- }
-
withMonitor(scheduler, clock, meterProvider as MetricProducer, monitor) {
processTrace(
clock,
traceReader,
scheduler,
- chan,
monitor
)
}
-
- failureDomain?.cancel()
}
val serviceMetrics = collectServiceMetrics(clock.millis(), meterProvider as MetricProducer)
@@ -141,7 +120,6 @@ class CapelinIntegrationTest {
@Test
fun testSmall() = runBlockingSimulation {
val seed = 1
- val chan = Channel<Unit>(Channel.CONFLATED)
val allocationPolicy = FilterScheduler(
filters = listOf(ComputeFilter(), VCpuFilter(16.0), RamFilter(1.0)),
weighers = listOf(CoreRamWeigher(multiplier = 1.0))
@@ -157,7 +135,6 @@ class CapelinIntegrationTest {
clock,
traceReader,
scheduler,
- chan,
monitor
)
}
@@ -187,7 +164,6 @@ class CapelinIntegrationTest {
@Test
fun testInterference() = runBlockingSimulation {
val seed = 1
- val chan = Channel<Unit>(Channel.CONFLATED)
val allocationPolicy = FilterScheduler(
filters = listOf(ComputeFilter(), VCpuFilter(16.0), RamFilter(1.0)),
weighers = listOf(CoreRamWeigher(multiplier = 1.0))
@@ -209,7 +185,6 @@ class CapelinIntegrationTest {
clock,
traceReader,
scheduler,
- chan,
monitor
)
}
@@ -239,7 +214,6 @@ class CapelinIntegrationTest {
@Test
fun testFailures() = runBlockingSimulation {
val seed = 1
- val chan = Channel<Unit>(Channel.CONFLATED)
val allocationPolicy = FilterScheduler(
filters = listOf(ComputeFilter(), VCpuFilter(16.0), RamFilter(1.0)),
weighers = listOf(CoreRamWeigher(multiplier = 1.0))
@@ -250,27 +224,26 @@ class CapelinIntegrationTest {
val meterProvider = createMeterProvider(clock)
withComputeService(clock, meterProvider, environmentReader, allocationPolicy) { scheduler ->
- val failureDomain =
- createFailureDomain(
- this,
+ val faultInjector =
+ createFaultInjector(
+ coroutineContext,
clock,
+ scheduler.hosts.map { it as SimHost }.toSet(),
seed,
24.0 * 7,
- scheduler,
- chan
)
withMonitor(scheduler, clock, meterProvider as MetricProducer, monitor) {
+ faultInjector.start()
processTrace(
clock,
traceReader,
scheduler,
- chan,
monitor
)
}
- failureDomain.cancel()
+ faultInjector.close()
}
val serviceMetrics = collectServiceMetrics(clock.millis(), meterProvider as MetricProducer)