summaryrefslogtreecommitdiff
path: root/opendc-compute
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2021-08-24 12:55:49 +0200
committerFabian Mastenbroek <mail.fabianm@gmail.com>2021-08-24 12:55:49 +0200
commit3721831204c2d350b93ea265731c0970cbd8fce4 (patch)
tree7c8e5c6fe5081e1e6fcae934f69e2a7aba31a37c /opendc-compute
parent709cd4909ccc1305c7acfdf666156168d66646eb (diff)
feat(compute): Add support for SimHost failure
This change adds support for failures in the SimHost implementation. Failing a host will now cause the virtual machine to enter an error state.
Diffstat (limited to 'opendc-compute')
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt26
-rw-r--r--opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt96
2 files changed, 117 insertions, 5 deletions
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
index 5ea577f3..be771f6d 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
@@ -46,6 +46,7 @@ import org.opendc.simulator.resources.SimResourceInterpreter
import java.util.*
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.resume
+import kotlin.coroutines.resumeWithException
/**
* A [Host] that is simulates virtual machines on a physical machine using [SimHypervisor].
@@ -315,10 +316,16 @@ public class SimHost(
override suspend fun fail() {
_state = HostState.DOWN
+ for (guest in guests.values) {
+ guest.fail()
+ }
}
override suspend fun recover() {
_state = HostState.UP
+ for (guest in guests.values) {
+ guest.start()
+ }
}
/**
@@ -329,7 +336,7 @@ public class SimHost(
suspend fun start() {
when (state) {
- ServerState.TERMINATED -> {
+ ServerState.TERMINATED, ServerState.ERROR -> {
logger.info { "User requested to start server ${server.uid}" }
launch()
}
@@ -356,9 +363,15 @@ public class SimHost(
suspend fun terminate() {
stop()
+ machine.close()
state = ServerState.DELETED
}
+ suspend fun fail() {
+ stop()
+ state = ServerState.ERROR
+ }
+
private var job: Job? = null
private suspend fun launch() = suspendCancellableCoroutine<Unit> { cont ->
@@ -366,16 +379,19 @@ public class SimHost(
val workload = mapper.createWorkload(server)
job = scope.launch {
- delay(1) // TODO Introduce boot time
- init()
- cont.resume(Unit)
+ try {
+ delay(1) // TODO Introduce boot time
+ init()
+ cont.resume(Unit)
+ } catch (e: Throwable) {
+ cont.resumeWithException(e)
+ }
try {
machine.run(workload, mapOf("driver" to this@SimHost, "server" to server))
exit(null)
} catch (cause: Throwable) {
exit(cause)
} finally {
- machine.close()
job = null
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt
index fc96cec8..93a2248a 100644
--- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt
@@ -185,6 +185,102 @@ internal class SimHostTest {
)
}
+ /**
+ * Test failure of the host.
+ */
+ @Test
+ fun testFailure() = runBlockingSimulation {
+ var requestedWork = 0L
+ var grantedWork = 0L
+
+ val meterProvider: MeterProvider = SdkMeterProvider
+ .builder()
+ .setClock(clock.toOtelClock())
+ .build()
+
+ val interpreter = SimResourceInterpreter(coroutineContext, clock)
+ val host = SimHost(
+ uid = UUID.randomUUID(),
+ name = "test",
+ model = machineModel,
+ meta = emptyMap(),
+ coroutineContext,
+ interpreter,
+ meterProvider.get("opendc-compute-simulator"),
+ SimFairShareHypervisorProvider()
+ )
+ val duration = 5 * 60L
+ val image = MockImage(
+ UUID.randomUUID(),
+ "<unnamed>",
+ emptyMap(),
+ mapOf(
+ "workload" to SimTraceWorkload(
+ sequenceOf(
+ SimTraceWorkload.Fragment(0, duration * 1000, 2 * 28.0, 2),
+ SimTraceWorkload.Fragment(duration * 1000L, duration * 1000, 2 * 3500.0, 2),
+ SimTraceWorkload.Fragment(duration * 2000L, duration * 1000, 0.0, 2),
+ SimTraceWorkload.Fragment(duration * 3000L, duration * 1000, 2 * 183.0, 2)
+ ),
+ offset = 1
+ )
+ )
+ )
+ val flavor = MockFlavor(2, 0)
+ val server = MockServer(UUID.randomUUID(), "a", flavor, image)
+
+ // Setup metric reader
+ val reader = CoroutineMetricReader(
+ this, listOf(meterProvider as MetricProducer),
+ object : MetricExporter {
+ override fun export(metrics: Collection<MetricData>): CompletableResultCode {
+ val metricsByName = metrics.associateBy { it.name }
+ metricsByName["cpu.work.total"]?.let {
+ requestedWork += it.doubleSummaryData.points.first().sum.toLong()
+ }
+ metricsByName["cpu.work.granted"]?.let {
+ grantedWork += it.doubleSummaryData.points.first().sum.toLong()
+ }
+ return CompletableResultCode.ofSuccess()
+ }
+
+ override fun flush(): CompletableResultCode = CompletableResultCode.ofSuccess()
+
+ override fun shutdown(): CompletableResultCode = CompletableResultCode.ofSuccess()
+ },
+ exportInterval = duration * 1000L
+ )
+
+ coroutineScope {
+ host.spawn(server)
+ delay(5000L)
+ host.fail()
+ delay(5000L)
+ host.recover()
+
+ suspendCancellableCoroutine<Unit> { cont ->
+ host.addListener(object : HostListener {
+ override fun onStateChanged(host: Host, server: Server, newState: ServerState) {
+ if (newState == ServerState.TERMINATED) {
+ cont.resume(Unit)
+ }
+ }
+ })
+ }
+ }
+
+ host.close()
+ // Ensure last cycle is collected
+ delay(1000L * duration)
+
+ reader.close()
+
+ assertAll(
+ { assertEquals(2226039, requestedWork, "Total time does not match") },
+ { assertEquals(1086039, grantedWork, "Down time does not match") },
+ )
+ }
+
private class MockFlavor(
override val cpuCount: Int,
override val memorySize: Long