From 9e5e830e15b74f040708e98c09ea41cd96d13871 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 27 May 2021 16:34:06 +0200 Subject: simulator: Centralize resource logic in SimResourceInterpreter This change introduces the SimResourceInterpreter which centralizes the logic for scheduling and interpreting the communication between resource consumer and provider. This approach offers better performance due to avoiding invalidating the state of the resource context when not necessary. Benchmarks show in the best case a 5x performance improvement and at worst a 2x improvement. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 12 +++++++++--- .../org/opendc/compute/simulator/SimHostTest.kt | 19 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 68667a8c..f08a7e1e 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -41,6 +41,7 @@ import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.power.ConstantPowerModel import org.opendc.simulator.compute.power.PowerModel import org.opendc.simulator.failures.FailureDomain +import org.opendc.simulator.resources.SimResourceInterpreter import java.time.Clock import java.util.* import kotlin.coroutines.CoroutineContext @@ -94,18 +95,23 @@ public class SimHost( /** * Current total memory use of the images on this hypervisor. */ - private var availableMemory: Long = model.memory.map { it.size }.sum() + private var availableMemory: Long = model.memory.sumOf { it.size } + + /** + * The resource interpreter to schedule the resource interactions. + */ + private val interpreter = SimResourceInterpreter(context, clock) /** * The machine to run on. */ - public val machine: SimBareMetalMachine = SimBareMetalMachine(context, clock, model, scalingGovernor, scalingDriver) + public val machine: SimBareMetalMachine = SimBareMetalMachine(interpreter, model, scalingGovernor, scalingDriver) /** * The hypervisor to run multiple workloads. */ public val hypervisor: SimHypervisor = hypervisor.create( - scope.coroutineContext, clock, + interpreter, object : SimHypervisor.Listener { override fun onSliceFinish( hypervisor: SimHypervisor, diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 5594fd59..a6cff3ba 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -124,9 +124,18 @@ internal class SimHostTest { object : MetricExporter { override fun export(metrics: Collection): CompletableResultCode { val metricsByName = metrics.associateBy { it.name } - requestedWork += metricsByName.getValue("cpu.work.total").doubleSummaryData.points.first().sum.toLong() - grantedWork += metricsByName.getValue("cpu.work.granted").doubleSummaryData.points.first().sum.toLong() - overcommittedWork += metricsByName.getValue("cpu.work.overcommit").doubleSummaryData.points.first().sum.toLong() + val totalWork = metricsByName["cpu.work.total"] + if (totalWork != null) { + requestedWork += totalWork.doubleSummaryData.points.first().sum.toLong() + } + val grantedWorkCycle = metricsByName["cpu.work.granted"] + if (grantedWorkCycle != null) { + grantedWork += grantedWorkCycle.doubleSummaryData.points.first().sum.toLong() + } + val overcommittedWorkCycle = metricsByName["cpu.work.overcommit"] + if (overcommittedWorkCycle != null) { + overcommittedWork += overcommittedWorkCycle.doubleSummaryData.points.first().sum.toLong() + } return CompletableResultCode.ofSuccess() } @@ -160,8 +169,8 @@ internal class SimHostTest { reader.close() assertAll( - { assertEquals(4197600, requestedWork, "Requested work does not match") }, - { assertEquals(2157600, grantedWork, "Granted work does not match") }, + { assertEquals(4147200, requestedWork, "Requested work does not match") }, + { assertEquals(2107200, grantedWork, "Granted work does not match") }, { assertEquals(2040000, overcommittedWork, "Overcommitted work does not match") }, { assertEquals(1500001, clock.millis()) } ) -- cgit v1.2.3 From cc87c9ad0b8e4ed3fa4fbad4ab94c5e53948ef3c Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 2 Jun 2021 13:03:10 +0200 Subject: simulator: Add uniform interface for resource metrics This change adds a new interface to the resources library for accessing metrics of resources such as work, demand and overcommitted work. With this change, we do not need an implementation specific listener interface in SimResourceSwitchMaxMin anymore. Another benefit of this approach is that updates will be scheduled more efficiently and progress will only be reported once the system has reached a steady-state for that timestamp. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index f08a7e1e..89016c55 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -112,7 +112,7 @@ public class SimHost( */ public val hypervisor: SimHypervisor = hypervisor.create( interpreter, - object : SimHypervisor.Listener { + listener = object : SimHypervisor.Listener { override fun onSliceFinish( hypervisor: SimHypervisor, requestedWork: Long, -- cgit v1.2.3 From 84468f4e3a331d7ea073ac7033b3d9937168ed9f Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 2 Jun 2021 23:23:55 +0200 Subject: simulator: Split CPUFreq subsystem in compute simulator This change splits the functionality present in the CPUFreq subsystem of the compute simulation. Currently, the DVFS functionality is embedded in SimBareMetalMachine. However, this functionality should not exist within the firmware layer of a machine. Instead, the operating system should perform this logic (in OpenDC this should be the hypervisor). Furthermore, this change moves the scaling driver into the power package. The power driver is a machine/firmware specific implementation that computes the power consumption of a machine. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 89016c55..d36717af 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -32,14 +32,14 @@ import org.opendc.compute.api.ServerState import org.opendc.compute.service.driver.* import org.opendc.simulator.compute.* import org.opendc.simulator.compute.cpufreq.PerformanceScalingGovernor -import org.opendc.simulator.compute.cpufreq.ScalingDriver import org.opendc.simulator.compute.cpufreq.ScalingGovernor -import org.opendc.simulator.compute.cpufreq.SimpleScalingDriver import org.opendc.simulator.compute.interference.IMAGE_PERF_INTERFERENCE_MODEL import org.opendc.simulator.compute.interference.PerformanceInterferenceModel import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.power.ConstantPowerModel +import org.opendc.simulator.compute.power.PowerDriver import org.opendc.simulator.compute.power.PowerModel +import org.opendc.simulator.compute.power.SimplePowerDriver import org.opendc.simulator.failures.FailureDomain import org.opendc.simulator.resources.SimResourceInterpreter import java.time.Clock @@ -60,7 +60,7 @@ public class SimHost( meter: Meter, hypervisor: SimHypervisorProvider, scalingGovernor: ScalingGovernor, - scalingDriver: ScalingDriver, + scalingDriver: PowerDriver, private val mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), ) : Host, FailureDomain, AutoCloseable { @@ -75,7 +75,7 @@ public class SimHost( hypervisor: SimHypervisorProvider, powerModel: PowerModel = ConstantPowerModel(0.0), mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), - ) : this(uid, name, model, meta, context, clock, meter, hypervisor, PerformanceScalingGovernor(), SimpleScalingDriver(powerModel), mapper) + ) : this(uid, name, model, meta, context, clock, meter, hypervisor, PerformanceScalingGovernor(), SimplePowerDriver(powerModel), mapper) /** * The [CoroutineScope] of the host bounded by the lifecycle of the host. @@ -105,7 +105,7 @@ public class SimHost( /** * The machine to run on. */ - public val machine: SimBareMetalMachine = SimBareMetalMachine(interpreter, model, scalingGovernor, scalingDriver) + public val machine: SimBareMetalMachine = SimBareMetalMachine(interpreter, model, scalingDriver) /** * The hypervisor to run multiple workloads. -- cgit v1.2.3 From 1768292251957da5ce6411ecc7d2dffebf8709c8 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 10 Jun 2021 17:32:43 +0200 Subject: simulator: Integrate power subsystem with compute subsystem This change integrates the power subsystem of the simulator with the compute subsystem by exposing a new field on a SimBareMetalMachine, psu, which provides access to the machine's PSU, which in turn can be connected to a SimPowerOutlet. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index d36717af..540e27fe 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -129,7 +129,7 @@ public class SimHost( _batch.put(_cpuWorkInterference, interferedWork.toDouble()) _batch.put(_cpuUsage, cpuUsage) _batch.put(_cpuDemand, cpuDemand) - _batch.put(_cpuPower, machine.powerDraw) + _batch.put(_cpuPower, machine.psu.powerDraw) _batch.record() } } -- cgit v1.2.3 From df3c9dc3fcd2f89910575bfdc24a3db3af9eba0f Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 20 Jun 2021 22:21:39 +0200 Subject: exp: Enable interpreter sharing across hosts This change enables the experiments to share the SimResourceInterpreter across multiple hosts, which allows updates to be scheduled efficiently for all machines at the same time. This is especially beneficial if the machines operate on the same time slices. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 11 +++-------- .../test/kotlin/org/opendc/compute/simulator/SimHostTest.kt | 4 +++- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 540e27fe..557fa97a 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -56,7 +56,7 @@ public class SimHost( model: SimMachineModel, override val meta: Map, context: CoroutineContext, - clock: Clock, + interpreter: SimResourceInterpreter, meter: Meter, hypervisor: SimHypervisorProvider, scalingGovernor: ScalingGovernor, @@ -70,12 +70,12 @@ public class SimHost( model: SimMachineModel, meta: Map, context: CoroutineContext, - clock: Clock, + interpreter: SimResourceInterpreter, meter: Meter, hypervisor: SimHypervisorProvider, powerModel: PowerModel = ConstantPowerModel(0.0), mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), - ) : this(uid, name, model, meta, context, clock, meter, hypervisor, PerformanceScalingGovernor(), SimplePowerDriver(powerModel), mapper) + ) : this(uid, name, model, meta, context, interpreter, meter, hypervisor, PerformanceScalingGovernor(), SimplePowerDriver(powerModel), mapper) /** * The [CoroutineScope] of the host bounded by the lifecycle of the host. @@ -97,11 +97,6 @@ public class SimHost( */ private var availableMemory: Long = model.memory.sumOf { it.size } - /** - * The resource interpreter to schedule the resource interactions. - */ - private val interpreter = SimResourceInterpreter(context, clock) - /** * The machine to run on. */ diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index a6cff3ba..79489fdb 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -47,6 +47,7 @@ import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit import org.opendc.simulator.compute.workload.SimTraceWorkload import org.opendc.simulator.core.runBlockingSimulation +import org.opendc.simulator.resources.SimResourceInterpreter import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader import org.opendc.telemetry.sdk.toOtelClock import java.util.UUID @@ -83,7 +84,8 @@ internal class SimHostTest { .setClock(clock.toOtelClock()) .build() - val virtDriver = SimHost(UUID.randomUUID(), "test", machineModel, emptyMap(), coroutineContext, clock, meterProvider.get("opendc-compute-simulator"), SimFairShareHypervisorProvider()) + val interpreter = SimResourceInterpreter(coroutineContext, clock) + val virtDriver = SimHost(UUID.randomUUID(), "test", machineModel, emptyMap(), coroutineContext, interpreter, meterProvider.get("opendc-compute-simulator"), SimFairShareHypervisorProvider()) val duration = 5 * 60L val vmImageA = MockImage( UUID.randomUUID(), -- cgit v1.2.3 From d54ac10449083a490e741d6c54e6f3aa07b71af0 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 20 Jun 2021 22:56:33 +0200 Subject: simulator: Remove concept of resource lifecycle This change removes the AutoCloseable interface from the SimResourceProvider and removes the concept of a resource lifecycle. Instead, resource providers are now either active (running a resource consumer) or in-active (being idle), which simplifies implementation. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 1 - 1 file changed, 1 deletion(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 557fa97a..c9527bb6 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -42,7 +42,6 @@ import org.opendc.simulator.compute.power.PowerModel import org.opendc.simulator.compute.power.SimplePowerDriver import org.opendc.simulator.failures.FailureDomain import org.opendc.simulator.resources.SimResourceInterpreter -import java.time.Clock import java.util.* import kotlin.coroutines.CoroutineContext import kotlin.coroutines.resume -- cgit v1.2.3 From b29f90e5ad5bcac29cde86e56c06e0b65a52cedc Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 21 Jun 2021 20:57:06 +0200 Subject: simulator: Re-organize compute simulator module This change re-organizes the classes of the compute simulator module to make a clearer distinction between the hardware, firmware and software interfaces in this module. --- .../main/kotlin/org/opendc/compute/simulator/SimHost.kt | 15 +++++++++------ .../kotlin/org/opendc/compute/simulator/SimHostTest.kt | 8 ++++---- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index c9527bb6..be7bc667 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -31,10 +31,13 @@ import org.opendc.compute.api.Server import org.opendc.compute.api.ServerState import org.opendc.compute.service.driver.* import org.opendc.simulator.compute.* -import org.opendc.simulator.compute.cpufreq.PerformanceScalingGovernor -import org.opendc.simulator.compute.cpufreq.ScalingGovernor import org.opendc.simulator.compute.interference.IMAGE_PERF_INTERFERENCE_MODEL import org.opendc.simulator.compute.interference.PerformanceInterferenceModel +import org.opendc.simulator.compute.kernel.SimHypervisor +import org.opendc.simulator.compute.kernel.SimHypervisorProvider +import org.opendc.simulator.compute.kernel.cpufreq.PerformanceScalingGovernor +import org.opendc.simulator.compute.kernel.cpufreq.ScalingGovernor +import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.power.ConstantPowerModel import org.opendc.simulator.compute.power.PowerDriver @@ -52,7 +55,7 @@ import kotlin.coroutines.resume public class SimHost( override val uid: UUID, override val name: String, - model: SimMachineModel, + model: MachineModel, override val meta: Map, context: CoroutineContext, interpreter: SimResourceInterpreter, @@ -66,7 +69,7 @@ public class SimHost( public constructor( uid: UUID, name: String, - model: SimMachineModel, + model: MachineModel, meta: Map, context: CoroutineContext, interpreter: SimResourceInterpreter, @@ -304,13 +307,13 @@ public class SimHost( /** * Convert flavor to machine model. */ - private fun Flavor.toMachineModel(): SimMachineModel { + private fun Flavor.toMachineModel(): MachineModel { val originalCpu = machine.model.cpus[0] val processingNode = originalCpu.node.copy(coreCount = cpuCount) val processingUnits = (0 until cpuCount).map { originalCpu.copy(id = it, node = processingNode) } val memoryUnits = listOf(MemoryUnit("Generic", "Generic", 3200.0, memorySize)) - return SimMachineModel(processingUnits, memoryUnits) + return MachineModel(processingUnits, memoryUnits) } private fun onGuestStart(vm: Guest) { diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 79489fdb..5414d042 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -40,8 +40,8 @@ import org.opendc.compute.api.ServerState import org.opendc.compute.api.ServerWatcher import org.opendc.compute.service.driver.Host import org.opendc.compute.service.driver.HostListener -import org.opendc.simulator.compute.SimFairShareHypervisorProvider -import org.opendc.simulator.compute.SimMachineModel +import org.opendc.simulator.compute.kernel.SimFairShareHypervisorProvider +import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit @@ -58,13 +58,13 @@ import kotlin.coroutines.resume */ @OptIn(ExperimentalCoroutinesApi::class) internal class SimHostTest { - private lateinit var machineModel: SimMachineModel + private lateinit var machineModel: MachineModel @BeforeEach fun setUp() { val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 2) - machineModel = SimMachineModel( + machineModel = MachineModel( cpus = List(cpuNode.coreCount) { ProcessingUnit(cpuNode, it, 3200.0) }, memory = List(4) { MemoryUnit("Crucial", "MTA18ASF4G72AZ-3G2B1", 3200.0, 32_000) } ) -- cgit v1.2.3 From e56967a29ac2b2d26cc085b1f3e27096dad6a170 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 24 Jun 2021 12:54:52 +0200 Subject: simulator: Re-implement performance interference model This change updates reimplements the performance interference model to work on top of the universal resource model in `opendc-simulator-resources`. This enables us to model interference and performance variability of other resources such as disk or network in the future. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 43 ++++------------------ .../org/opendc/compute/simulator/SimHostTest.kt | 19 ++++++---- 2 files changed, 20 insertions(+), 42 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index be7bc667..5ea577f3 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -31,17 +31,15 @@ import org.opendc.compute.api.Server import org.opendc.compute.api.ServerState import org.opendc.compute.service.driver.* import org.opendc.simulator.compute.* -import org.opendc.simulator.compute.interference.IMAGE_PERF_INTERFERENCE_MODEL -import org.opendc.simulator.compute.interference.PerformanceInterferenceModel import org.opendc.simulator.compute.kernel.SimHypervisor import org.opendc.simulator.compute.kernel.SimHypervisorProvider import org.opendc.simulator.compute.kernel.cpufreq.PerformanceScalingGovernor import org.opendc.simulator.compute.kernel.cpufreq.ScalingGovernor +import org.opendc.simulator.compute.kernel.interference.VmInterferenceDomain import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.power.ConstantPowerModel import org.opendc.simulator.compute.power.PowerDriver -import org.opendc.simulator.compute.power.PowerModel import org.opendc.simulator.compute.power.SimplePowerDriver import org.opendc.simulator.failures.FailureDomain import org.opendc.simulator.resources.SimResourceInterpreter @@ -61,24 +59,11 @@ public class SimHost( interpreter: SimResourceInterpreter, meter: Meter, hypervisor: SimHypervisorProvider, - scalingGovernor: ScalingGovernor, - scalingDriver: PowerDriver, + scalingGovernor: ScalingGovernor = PerformanceScalingGovernor(), + powerDriver: PowerDriver = SimplePowerDriver(ConstantPowerModel(0.0)), private val mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), + interferenceDomain: VmInterferenceDomain? = null ) : Host, FailureDomain, AutoCloseable { - - public constructor( - uid: UUID, - name: String, - model: MachineModel, - meta: Map, - context: CoroutineContext, - interpreter: SimResourceInterpreter, - meter: Meter, - hypervisor: SimHypervisorProvider, - powerModel: PowerModel = ConstantPowerModel(0.0), - mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), - ) : this(uid, name, model, meta, context, interpreter, meter, hypervisor, PerformanceScalingGovernor(), SimplePowerDriver(powerModel), mapper) - /** * The [CoroutineScope] of the host bounded by the lifecycle of the host. */ @@ -102,13 +87,15 @@ public class SimHost( /** * The machine to run on. */ - public val machine: SimBareMetalMachine = SimBareMetalMachine(interpreter, model, scalingDriver) + public val machine: SimBareMetalMachine = SimBareMetalMachine(interpreter, model, powerDriver) /** * The hypervisor to run multiple workloads. */ public val hypervisor: SimHypervisor = hypervisor.create( interpreter, + scalingGovernor = scalingGovernor, + interferenceDomain = interferenceDomain, listener = object : SimHypervisor.Listener { override fun onSliceFinish( hypervisor: SimHypervisor, @@ -260,7 +247,7 @@ public class SimHost( } require(canFit(server)) { "Server does not fit" } - val guest = Guest(server, hypervisor.createMachine(server.flavor.toMachineModel())) + val guest = Guest(server, hypervisor.createMachine(server.flavor.toMachineModel(), server.name)) guests[server] = guest _guests.add(1) @@ -317,23 +304,11 @@ public class SimHost( } private fun onGuestStart(vm: Guest) { - guests.forEach { (_, guest) -> - if (guest.state == ServerState.RUNNING) { - vm.performanceInterferenceModel?.onStart(vm.server.image.name) - } - } - _activeGuests.add(1) listeners.forEach { it.onStateChanged(this, vm.server, vm.state) } } private fun onGuestStop(vm: Guest) { - guests.forEach { (_, guest) -> - if (guest.state == ServerState.RUNNING) { - vm.performanceInterferenceModel?.onStop(vm.server.image.name) - } - } - _activeGuests.add(-1) listeners.forEach { it.onStateChanged(this, vm.server, vm.state) } } @@ -350,8 +325,6 @@ public class SimHost( * A virtual machine instance that the driver manages. */ private inner class Guest(val server: Server, val machine: SimMachine) { - val performanceInterferenceModel: PerformanceInterferenceModel? = server.meta[IMAGE_PERF_INTERFERENCE_MODEL] as? PerformanceInterferenceModel? - var state: ServerState = ServerState.TERMINATED suspend fun start() { diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 5414d042..5a6fb03d 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -33,11 +33,7 @@ import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertAll -import org.opendc.compute.api.Flavor -import org.opendc.compute.api.Image -import org.opendc.compute.api.Server -import org.opendc.compute.api.ServerState -import org.opendc.compute.api.ServerWatcher +import org.opendc.compute.api.* import org.opendc.compute.service.driver.Host import org.opendc.compute.service.driver.HostListener import org.opendc.simulator.compute.kernel.SimFairShareHypervisorProvider @@ -50,7 +46,7 @@ import org.opendc.simulator.core.runBlockingSimulation import org.opendc.simulator.resources.SimResourceInterpreter import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader import org.opendc.telemetry.sdk.toOtelClock -import java.util.UUID +import java.util.* import kotlin.coroutines.resume /** @@ -85,7 +81,16 @@ internal class SimHostTest { .build() val interpreter = SimResourceInterpreter(coroutineContext, clock) - val virtDriver = SimHost(UUID.randomUUID(), "test", machineModel, emptyMap(), coroutineContext, interpreter, meterProvider.get("opendc-compute-simulator"), SimFairShareHypervisorProvider()) + val virtDriver = SimHost( + uid = UUID.randomUUID(), + name = "test", + model = machineModel, + meta = emptyMap(), + coroutineContext, + interpreter, + meterProvider.get("opendc-compute-simulator"), + SimFairShareHypervisorProvider() + ) val duration = 5 * 60L val vmImageA = MockImage( UUID.randomUUID(), -- cgit v1.2.3 From 31a1f298c71cd3203fdcd57bd39ba8813009dd5b Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 17 Aug 2021 19:25:11 +0200 Subject: refactor(simulator): Execute traces based on timestamps This change refactors the trace workload in the OpenDC simulator to track execute a fragment based on the fragment's timestamp. This makes sure that the trace is replayed identically to the original execution. --- .../org/opendc/compute/simulator/SimHostTest.kt | 24 ++++++++++++---------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 5a6fb03d..45fdb268 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -99,11 +99,12 @@ internal class SimHostTest { mapOf( "workload" to SimTraceWorkload( sequenceOf( - SimTraceWorkload.Fragment(duration * 1000, 2 * 28.0, 2), - SimTraceWorkload.Fragment(duration * 1000, 2 * 3500.0, 2), - SimTraceWorkload.Fragment(duration * 1000, 0.0, 2), - SimTraceWorkload.Fragment(duration * 1000, 2 * 183.0, 2) + SimTraceWorkload.Fragment(0, duration * 1000, 2 * 28.0, 2), + SimTraceWorkload.Fragment(duration * 1000, duration * 1000, 2 * 3500.0, 2), + SimTraceWorkload.Fragment(duration * 2000, duration * 1000, 0.0, 2), + SimTraceWorkload.Fragment(duration * 3000, duration * 1000, 2 * 183.0, 2) ), + offset = 1 ) ) ) @@ -114,11 +115,12 @@ internal class SimHostTest { mapOf( "workload" to SimTraceWorkload( sequenceOf( - SimTraceWorkload.Fragment(duration * 1000, 2 * 28.0, 2), - SimTraceWorkload.Fragment(duration * 1000, 2 * 3100.0, 2), - SimTraceWorkload.Fragment(duration * 1000, 0.0, 2), - SimTraceWorkload.Fragment(duration * 1000, 2 * 73.0, 2) - ) + SimTraceWorkload.Fragment(0, duration * 1000, 2 * 28.0, 2), + SimTraceWorkload.Fragment(duration * 1000L, duration * 1000, 2 * 3100.0, 2), + SimTraceWorkload.Fragment(duration * 2000L, duration * 1000, 0.0, 2), + SimTraceWorkload.Fragment(duration * 3000L, duration * 1000, 2 * 73.0, 2) + ), + offset = 1 ) ) ) @@ -150,7 +152,7 @@ internal class SimHostTest { override fun shutdown(): CompletableResultCode = CompletableResultCode.ofSuccess() }, - exportInterval = duration * 1000 + exportInterval = duration * 1000L ) coroutineScope { @@ -171,7 +173,7 @@ internal class SimHostTest { } // Ensure last cycle is collected - delay(1000 * duration) + delay(1000L * duration) virtDriver.close() reader.close() -- cgit v1.2.3 From 97a28129e4638f601864a08f483908907b9026e6 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 18 Aug 2021 14:40:28 +0200 Subject: fix(capelin): Update Bitbrains trace tests This change updates the Bitbrains trace tests with the updated trace that does not hardcode the duration of the trace fragments. --- .../src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 45fdb268..fc96cec8 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -116,9 +116,9 @@ internal class SimHostTest { "workload" to SimTraceWorkload( sequenceOf( SimTraceWorkload.Fragment(0, duration * 1000, 2 * 28.0, 2), - SimTraceWorkload.Fragment(duration * 1000L, duration * 1000, 2 * 3100.0, 2), - SimTraceWorkload.Fragment(duration * 2000L, duration * 1000, 0.0, 2), - SimTraceWorkload.Fragment(duration * 3000L, duration * 1000, 2 * 73.0, 2) + SimTraceWorkload.Fragment(duration * 1000, duration * 1000, 2 * 3100.0, 2), + SimTraceWorkload.Fragment(duration * 2000, duration * 1000, 0.0, 2), + SimTraceWorkload.Fragment(duration * 3000, duration * 1000, 2 * 73.0, 2) ), offset = 1 ) -- cgit v1.2.3 From 3721831204c2d350b93ea265731c0970cbd8fce4 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 24 Aug 2021 12:55:49 +0200 Subject: feat(compute): Add support for SimHost failure This change adds support for failures in the SimHost implementation. Failing a host will now cause the virtual machine to enter an error state. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 26 ++++-- .../org/opendc/compute/simulator/SimHostTest.kt | 96 ++++++++++++++++++++++ 2 files changed, 117 insertions(+), 5 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 5ea577f3..be771f6d 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -46,6 +46,7 @@ import org.opendc.simulator.resources.SimResourceInterpreter import java.util.* import kotlin.coroutines.CoroutineContext import kotlin.coroutines.resume +import kotlin.coroutines.resumeWithException /** * A [Host] that is simulates virtual machines on a physical machine using [SimHypervisor]. @@ -315,10 +316,16 @@ public class SimHost( override suspend fun fail() { _state = HostState.DOWN + for (guest in guests.values) { + guest.fail() + } } override suspend fun recover() { _state = HostState.UP + for (guest in guests.values) { + guest.start() + } } /** @@ -329,7 +336,7 @@ public class SimHost( suspend fun start() { when (state) { - ServerState.TERMINATED -> { + ServerState.TERMINATED, ServerState.ERROR -> { logger.info { "User requested to start server ${server.uid}" } launch() } @@ -356,9 +363,15 @@ public class SimHost( suspend fun terminate() { stop() + machine.close() state = ServerState.DELETED } + suspend fun fail() { + stop() + state = ServerState.ERROR + } + private var job: Job? = null private suspend fun launch() = suspendCancellableCoroutine { cont -> @@ -366,16 +379,19 @@ public class SimHost( val workload = mapper.createWorkload(server) job = scope.launch { - delay(1) // TODO Introduce boot time - init() - cont.resume(Unit) + try { + delay(1) // TODO Introduce boot time + init() + cont.resume(Unit) + } catch (e: Throwable) { + cont.resumeWithException(e) + } try { machine.run(workload, mapOf("driver" to this@SimHost, "server" to server)) exit(null) } catch (cause: Throwable) { exit(cause) } finally { - machine.close() job = null } } diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index fc96cec8..93a2248a 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -185,6 +185,102 @@ internal class SimHostTest { ) } + /** + * Test failure of the host. + */ + @Test + fun testFailure() = runBlockingSimulation { + var requestedWork = 0L + var grantedWork = 0L + + val meterProvider: MeterProvider = SdkMeterProvider + .builder() + .setClock(clock.toOtelClock()) + .build() + + val interpreter = SimResourceInterpreter(coroutineContext, clock) + val host = SimHost( + uid = UUID.randomUUID(), + name = "test", + model = machineModel, + meta = emptyMap(), + coroutineContext, + interpreter, + meterProvider.get("opendc-compute-simulator"), + SimFairShareHypervisorProvider() + ) + val duration = 5 * 60L + val image = MockImage( + UUID.randomUUID(), + "", + emptyMap(), + mapOf( + "workload" to SimTraceWorkload( + sequenceOf( + SimTraceWorkload.Fragment(0, duration * 1000, 2 * 28.0, 2), + SimTraceWorkload.Fragment(duration * 1000L, duration * 1000, 2 * 3500.0, 2), + SimTraceWorkload.Fragment(duration * 2000L, duration * 1000, 0.0, 2), + SimTraceWorkload.Fragment(duration * 3000L, duration * 1000, 2 * 183.0, 2) + ), + offset = 1 + ) + ) + ) + val flavor = MockFlavor(2, 0) + val server = MockServer(UUID.randomUUID(), "a", flavor, image) + + // Setup metric reader + val reader = CoroutineMetricReader( + this, listOf(meterProvider as MetricProducer), + object : MetricExporter { + override fun export(metrics: Collection): CompletableResultCode { + val metricsByName = metrics.associateBy { it.name } + metricsByName["cpu.work.total"]?.let { + requestedWork += it.doubleSummaryData.points.first().sum.toLong() + } + metricsByName["cpu.work.granted"]?.let { + grantedWork += it.doubleSummaryData.points.first().sum.toLong() + } + return CompletableResultCode.ofSuccess() + } + + override fun flush(): CompletableResultCode = CompletableResultCode.ofSuccess() + + override fun shutdown(): CompletableResultCode = CompletableResultCode.ofSuccess() + }, + exportInterval = duration * 1000L + ) + + coroutineScope { + host.spawn(server) + delay(5000L) + host.fail() + delay(5000L) + host.recover() + + suspendCancellableCoroutine { cont -> + host.addListener(object : HostListener { + override fun onStateChanged(host: Host, server: Server, newState: ServerState) { + if (newState == ServerState.TERMINATED) { + cont.resume(Unit) + } + } + }) + } + } + + host.close() + // Ensure last cycle is collected + delay(1000L * duration) + + reader.close() + + assertAll( + { assertEquals(2226039, requestedWork, "Total time does not match") }, + { assertEquals(1086039, grantedWork, "Down time does not match") }, + ) + } + private class MockFlavor( override val cpuCount: Int, override val memorySize: Long -- cgit v1.2.3 From f111081627280d4e7e1d7147c56cdce708e32433 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 25 Aug 2021 14:06:39 +0200 Subject: build: Upgrade to OpenTelemetry 1.5 This change upgrades the OpenTelemetry dependency to version 1.5, which contains various breaking changes in the metrics API. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 81 ++++++++++++---------- .../org/opendc/compute/simulator/SimHostTest.kt | 19 +++-- 2 files changed, 51 insertions(+), 49 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index be771f6d..546584b6 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -22,8 +22,9 @@ package org.opendc.compute.simulator +import io.opentelemetry.api.common.Attributes import io.opentelemetry.api.metrics.Meter -import io.opentelemetry.api.metrics.common.Labels +import io.opentelemetry.semconv.resource.attributes.ResourceAttributes import kotlinx.coroutines.* import mu.KotlinLogging import org.opendc.compute.api.Flavor @@ -107,15 +108,13 @@ public class SimHost( cpuUsage: Double, cpuDemand: Double ) { - - _batch.put(_cpuWork, requestedWork.toDouble()) - _batch.put(_cpuWorkGranted, grantedWork.toDouble()) - _batch.put(_cpuWorkOvercommit, overcommittedWork.toDouble()) - _batch.put(_cpuWorkInterference, interferedWork.toDouble()) - _batch.put(_cpuUsage, cpuUsage) - _batch.put(_cpuDemand, cpuDemand) - _batch.put(_cpuPower, machine.psu.powerDraw) - _batch.record() + _totalWork.add(requestedWork.toDouble()) + _grantedWork.add(grantedWork.toDouble()) + _overcommittedWork.add(overcommittedWork.toDouble()) + _interferedWork.add(interferedWork.toDouble()) + _cpuDemand.record(cpuDemand) + _cpuUsage.record(cpuUsage) + _powerUsage.record(machine.psu.powerDraw) } } ) @@ -135,86 +134,92 @@ public class SimHost( field = value } - override val model: HostModel = HostModel(model.cpus.size, model.memory.map { it.size }.sum()) + override val model: HostModel = HostModel(model.cpus.size, model.memory.sumOf { it.size }) /** - * The number of guests on the host. + * The total number of guests. */ - private val _guests = meter.longUpDownCounterBuilder("guests.total") + private val _guests = meter.upDownCounterBuilder("guests.total") .setDescription("Number of guests") .setUnit("1") .build() - .bind(Labels.of("host", uid.toString())) + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The number of active guests on the host. */ - private val _activeGuests = meter.longUpDownCounterBuilder("guests.active") + private val _activeGuests = meter.upDownCounterBuilder("guests.active") .setDescription("Number of active guests") .setUnit("1") .build() - .bind(Labels.of("host", uid.toString())) + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** - * The CPU usage on the host. + * The CPU demand of the host. */ - private val _cpuUsage = meter.doubleValueRecorderBuilder("cpu.usage") - .setDescription("The amount of CPU resources used by the host") + private val _cpuDemand = meter.histogramBuilder("cpu.demand") + .setDescription("The amount of CPU resources the guests would use if there were no CPU contention or CPU limits") .setUnit("MHz") .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** - * The CPU demand on the host. + * The CPU usage of the host. */ - private val _cpuDemand = meter.doubleValueRecorderBuilder("cpu.demand") - .setDescription("The amount of CPU resources the guests would use if there were no CPU contention or CPU limits") + private val _cpuUsage = meter.histogramBuilder("cpu.usage") + .setDescription("The amount of CPU resources used by the host") .setUnit("MHz") .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** - * The requested work for the CPU. + * The power usage of the host. */ - private val _cpuPower = meter.doubleValueRecorderBuilder("power.usage") + private val _powerUsage = meter.histogramBuilder("power.usage") .setDescription("The amount of power used by the CPU") .setUnit("W") .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** - * The requested work for the CPU. + * The total amount of work supplied to the CPU. */ - private val _cpuWork = meter.doubleValueRecorderBuilder("cpu.work.total") + private val _totalWork = meter.counterBuilder("cpu.work.total") .setDescription("The amount of work supplied to the CPU") .setUnit("1") + .ofDoubles() .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** - * The work actually performed by the CPU. + * The work performed by the CPU. */ - private val _cpuWorkGranted = meter.doubleValueRecorderBuilder("cpu.work.granted") + private val _grantedWork = meter.counterBuilder("cpu.work.granted") .setDescription("The amount of work performed by the CPU") .setUnit("1") + .ofDoubles() .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** - * The work that could not be performed by the CPU due to overcommitting resource. + * The amount not performed by the CPU due to overcommitment. */ - private val _cpuWorkOvercommit = meter.doubleValueRecorderBuilder("cpu.work.overcommit") + private val _overcommittedWork = meter.counterBuilder("cpu.work.overcommit") .setDescription("The amount of work not performed by the CPU due to overcommitment") .setUnit("1") + .ofDoubles() .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** - * The work that could not be performed by the CPU due to interference. + * The amount of work not performed by the CPU due to interference. */ - private val _cpuWorkInterference = meter.doubleValueRecorderBuilder("cpu.work.interference") + private val _interferedWork = meter.counterBuilder("cpu.work.interference") .setDescription("The amount of work not performed by the CPU due to interference") .setUnit("1") + .ofDoubles() .build() - - /** - * The batch recorder used to record multiple metrics atomically. - */ - private val _batch = meter.newBatchRecorder("host", uid.toString()) + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) init { // Launch hypervisor onto machine @@ -273,8 +278,8 @@ public class SimHost( override suspend fun delete(server: Server) { val guest = guests.remove(server) ?: return - guest.terminate() _guests.add(-1) + guest.terminate() } override fun addListener(listener: HostListener) { diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 93a2248a..1ba3a9a1 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -133,17 +133,14 @@ internal class SimHostTest { object : MetricExporter { override fun export(metrics: Collection): CompletableResultCode { val metricsByName = metrics.associateBy { it.name } - val totalWork = metricsByName["cpu.work.total"] - if (totalWork != null) { - requestedWork += totalWork.doubleSummaryData.points.first().sum.toLong() + metricsByName["cpu.work.total"]?.let { + requestedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() } - val grantedWorkCycle = metricsByName["cpu.work.granted"] - if (grantedWorkCycle != null) { - grantedWork += grantedWorkCycle.doubleSummaryData.points.first().sum.toLong() + metricsByName["cpu.work.granted"]?.let { + grantedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() } - val overcommittedWorkCycle = metricsByName["cpu.work.overcommit"] - if (overcommittedWorkCycle != null) { - overcommittedWork += overcommittedWorkCycle.doubleSummaryData.points.first().sum.toLong() + metricsByName["cpu.work.overcommit"]?.let { + overcommittedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() } return CompletableResultCode.ofSuccess() } @@ -236,10 +233,10 @@ internal class SimHostTest { override fun export(metrics: Collection): CompletableResultCode { val metricsByName = metrics.associateBy { it.name } metricsByName["cpu.work.total"]?.let { - requestedWork += it.doubleSummaryData.points.first().sum.toLong() + requestedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() } metricsByName["cpu.work.granted"]?.let { - grantedWork += it.doubleSummaryData.points.first().sum.toLong() + grantedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() } return CompletableResultCode.ofSuccess() } -- cgit v1.2.3 From e6dd553cf77445083f2c7632bd3b4c3611d76d0a Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 25 Aug 2021 18:15:07 +0200 Subject: fix(simulator): Eliminate unnecessary double to long conversions This change eliminates unnecessary double to long conversions in the simulator. Previously, we used longs to denote the amount of work. However, in the mean time we have switched to doubles in the lower stack. --- .../main/kotlin/org/opendc/compute/simulator/SimHost.kt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 546584b6..dcc525cb 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -101,17 +101,17 @@ public class SimHost( listener = object : SimHypervisor.Listener { override fun onSliceFinish( hypervisor: SimHypervisor, - requestedWork: Long, - grantedWork: Long, - overcommittedWork: Long, - interferedWork: Long, + requestedWork: Double, + grantedWork: Double, + overcommittedWork: Double, + interferedWork: Double, cpuUsage: Double, cpuDemand: Double ) { - _totalWork.add(requestedWork.toDouble()) - _grantedWork.add(grantedWork.toDouble()) - _overcommittedWork.add(overcommittedWork.toDouble()) - _interferedWork.add(interferedWork.toDouble()) + _totalWork.add(requestedWork) + _grantedWork.add(grantedWork) + _overcommittedWork.add(overcommittedWork) + _interferedWork.add(interferedWork) _cpuDemand.record(cpuDemand) _cpuUsage.record(cpuUsage) _powerUsage.record(machine.psu.powerDraw) -- cgit v1.2.3 From b0f6402f60ddbba1aad7e198fe6757792337f4d4 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 25 Aug 2021 20:46:36 +0200 Subject: refactor(compute): Measure power draw without PSU overhead This change updates the SimHost implementation to measure the power draw of the machine without PSU overhead to make the results more realistic. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index dcc525cb..20e5a9db 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -114,7 +114,7 @@ public class SimHost( _interferedWork.add(interferedWork) _cpuDemand.record(cpuDemand) _cpuUsage.record(cpuUsage) - _powerUsage.record(machine.psu.powerDraw) + _powerUsage.record(machine.powerDraw) } } ) -- cgit v1.2.3 From 9236b3cfb7be1e9d44fe60cbdd699c19c70f6411 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 17 Aug 2021 19:22:34 +0200 Subject: feat(compute): Track host up/down time This change adds new metrics for tracking the up and downtime of hosts due to failures. In addition, this change adds a test to verify whether the metrics are collected correctly. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 55 ++++++++++++++++++++++ .../org/opendc/compute/simulator/SimHostTest.kt | 10 ++++ 2 files changed, 65 insertions(+) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 20e5a9db..e12bd37b 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -71,6 +71,11 @@ public class SimHost( */ override val scope: CoroutineScope = CoroutineScope(context + Job()) + /** + * The clock instance used by the host. + */ + private val clock = interpreter.clock + /** * The logger instance of this server. */ @@ -115,6 +120,8 @@ public class SimHost( _cpuDemand.record(cpuDemand) _cpuUsage.record(cpuUsage) _powerUsage.record(machine.powerDraw) + + reportTime() } } ) @@ -221,6 +228,33 @@ public class SimHost( .build() .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + /** + * The amount of time in the system. + */ + private val _totalTime = meter.counterBuilder("host.time.total") + .setDescription("The amount of time in the system") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + + /** + * The uptime of the host. + */ + private val _upTime = meter.counterBuilder("host.time.up") + .setDescription("The uptime of the host") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + + /** + * The downtime of the host. + */ + private val _downTime = meter.counterBuilder("host.time.down") + .setDescription("The downtime of the host") + .setUnit("ms") + .build() + .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) + init { // Launch hypervisor onto machine scope.launch { @@ -238,6 +272,24 @@ public class SimHost( } } + private var _lastReport = clock.millis() + + private fun reportTime() { + if (!scope.isActive) + return + + val now = clock.millis() + val duration = now - _lastReport + + _totalTime.add(duration) + when (_state) { + HostState.UP -> _upTime.add(duration) + HostState.DOWN -> _downTime.add(duration) + } + + _lastReport = now + } + override fun canFit(server: Server): Boolean { val sufficientMemory = availableMemory > server.flavor.memorySize val enoughCpus = machine.model.cpus.size >= server.flavor.cpuCount @@ -291,6 +343,7 @@ public class SimHost( } override fun close() { + reportTime() scope.cancel() machine.close() } @@ -320,6 +373,7 @@ public class SimHost( } override suspend fun fail() { + reportTime() _state = HostState.DOWN for (guest in guests.values) { guest.fail() @@ -327,6 +381,7 @@ public class SimHost( } override suspend fun recover() { + reportTime() _state = HostState.UP for (guest in guests.values) { guest.start() diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 1ba3a9a1..0a2ced7b 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -189,6 +189,8 @@ internal class SimHostTest { fun testFailure() = runBlockingSimulation { var requestedWork = 0L var grantedWork = 0L + var totalTime = 0L + var downTime = 0L val meterProvider: MeterProvider = SdkMeterProvider .builder() @@ -238,6 +240,12 @@ internal class SimHostTest { metricsByName["cpu.work.granted"]?.let { grantedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() } + metricsByName["host.time.total"]?.let { + totalTime = it.longSumData.points.first().value + } + metricsByName["host.time.down"]?.let { + downTime = it.longSumData.points.first().value + } return CompletableResultCode.ofSuccess() } @@ -275,6 +283,8 @@ internal class SimHostTest { assertAll( { assertEquals(2226039, requestedWork, "Total time does not match") }, { assertEquals(1086039, grantedWork, "Down time does not match") }, + { assertEquals(1200001, totalTime, "Total time does not match") }, + { assertEquals(5000, downTime, "Down time does not match") }, ) } -- cgit v1.2.3 From d097d65851619483a85ce16ab56f61a726bbe756 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 24 Aug 2021 16:55:42 +0200 Subject: fix(compute): Support overcommitted memory in SimHost This change enables host to overcommit their memory when testing whether new servers can fit on the host. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index e12bd37b..76cc7dfe 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -86,11 +86,6 @@ public class SimHost( */ private val listeners = mutableListOf() - /** - * Current total memory use of the images on this hypervisor. - */ - private var availableMemory: Long = model.memory.sumOf { it.size } - /** * The machine to run on. */ @@ -291,7 +286,7 @@ public class SimHost( } override fun canFit(server: Server): Boolean { - val sufficientMemory = availableMemory > server.flavor.memorySize + val sufficientMemory = machine.model.memory.size >= server.flavor.memorySize val enoughCpus = machine.model.cpus.size >= server.flavor.cpuCount val canFit = hypervisor.canFit(server.flavor.toMachineModel()) @@ -469,7 +464,6 @@ public class SimHost( else ServerState.ERROR - availableMemory += server.flavor.memorySize onGuestStop(this) } } -- cgit v1.2.3 From 83d7e45a6b749df0c208e56885402c66e54c4b23 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 24 Aug 2021 17:06:35 +0200 Subject: fix(compute): Start host even if it already exists on host --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 76cc7dfe..4526537d 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -294,16 +294,12 @@ public class SimHost( } override suspend fun spawn(server: Server, start: Boolean) { - // Return if the server already exists on this host - if (server in this) { - return + val guest = guests.computeIfAbsent(server) { key -> + require(canFit(key)) { "Server does not fit" } + _guests.add(1) + Guest(key, hypervisor.createMachine(key.flavor.toMachineModel(), key.name)) } - require(canFit(server)) { "Server does not fit" } - val guest = Guest(server, hypervisor.createMachine(server.flavor.toMachineModel(), server.name)) - guests[server] = guest - _guests.add(1) - if (start) { guest.start() } -- cgit v1.2.3 From 2c507c6ca4b7d809b410d351f0c1c5c3ddf7bb5c Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 25 Aug 2021 14:38:08 +0200 Subject: feat(compute): Track guest up/down time This change updates the SimHost implementation to track the up and downtime of hypervisor guests. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 54 +++++++++++++++++++++- .../org/opendc/compute/simulator/SimHostTest.kt | 10 ++++ 2 files changed, 63 insertions(+), 1 deletion(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 4526537d..9a1f05fc 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -22,6 +22,7 @@ package org.opendc.compute.simulator +import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.api.common.Attributes import io.opentelemetry.api.metrics.Meter import io.opentelemetry.semconv.resource.attributes.ResourceAttributes @@ -59,7 +60,7 @@ public class SimHost( override val meta: Map, context: CoroutineContext, interpreter: SimResourceInterpreter, - meter: Meter, + private val meter: Meter, hypervisor: SimHypervisorProvider, scalingGovernor: ScalingGovernor = PerformanceScalingGovernor(), powerDriver: PowerDriver = SimplePowerDriver(ConstantPowerModel(0.0)), @@ -282,6 +283,11 @@ public class SimHost( HostState.DOWN -> _downTime.add(duration) } + // Track time of guests + for (guest in guests.values) { + guest.reportTime() + } + _lastReport = now } @@ -385,6 +391,33 @@ public class SimHost( private inner class Guest(val server: Server, val machine: SimMachine) { var state: ServerState = ServerState.TERMINATED + /** + * The amount of time in the system. + */ + private val _totalTime = meter.counterBuilder("guest.time.total") + .setDescription("The amount of time in the system") + .setUnit("ms") + .build() + .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + + /** + * The uptime of the guest. + */ + private val _runningTime = meter.counterBuilder("guest.time.running") + .setDescription("The uptime of the guest") + .setUnit("ms") + .build() + .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + + /** + * The time the guest is in an error state. + */ + private val _errorTime = meter.counterBuilder("guest.time.error") + .setDescription("The time the guest is in an error state") + .setUnit("ms") + .build() + .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + suspend fun start() { when (state) { ServerState.TERMINATED, ServerState.ERROR -> { @@ -462,5 +495,24 @@ public class SimHost( onGuestStop(this) } + + private var _lastReport = clock.millis() + + fun reportTime() { + if (state == ServerState.DELETED) + return + + val now = clock.millis() + val duration = now - _lastReport + + _totalTime.add(duration) + when (state) { + ServerState.RUNNING -> _runningTime.add(duration) + ServerState.ERROR -> _errorTime.add(duration) + else -> {} + } + + _lastReport = now + } } } diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 0a2ced7b..31215e9a 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -191,6 +191,8 @@ internal class SimHostTest { var grantedWork = 0L var totalTime = 0L var downTime = 0L + var guestTotalTime = 0L + var guestDownTime = 0L val meterProvider: MeterProvider = SdkMeterProvider .builder() @@ -246,6 +248,12 @@ internal class SimHostTest { metricsByName["host.time.down"]?.let { downTime = it.longSumData.points.first().value } + metricsByName["guest.time.total"]?.let { + guestTotalTime = it.longSumData.points.first().value + } + metricsByName["guest.time.error"]?.let { + guestDownTime = it.longSumData.points.first().value + } return CompletableResultCode.ofSuccess() } @@ -284,7 +292,9 @@ internal class SimHostTest { { assertEquals(2226039, requestedWork, "Total time does not match") }, { assertEquals(1086039, grantedWork, "Down time does not match") }, { assertEquals(1200001, totalTime, "Total time does not match") }, + { assertEquals(1200001, guestTotalTime, "Guest total time does not match") }, { assertEquals(5000, downTime, "Down time does not match") }, + { assertEquals(5000, guestDownTime, "Guest down time does not match") }, ) } -- cgit v1.2.3 From b6a8c642b598bfb2eaaea2a8a7e6ad6702d349b6 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 26 Aug 2021 10:29:41 +0200 Subject: fix(compute): Use correct memory size for host memory This change fixes an issue where all servers could not be scheduled due to the memory size of the host being computed incorrectly. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 9a1f05fc..a4d24740 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -292,8 +292,8 @@ public class SimHost( } override fun canFit(server: Server): Boolean { - val sufficientMemory = machine.model.memory.size >= server.flavor.memorySize - val enoughCpus = machine.model.cpus.size >= server.flavor.cpuCount + val sufficientMemory = model.memorySize >= server.flavor.memorySize + val enoughCpus = model.cpuCount >= server.flavor.cpuCount val canFit = hypervisor.canFit(server.flavor.toMachineModel()) return sufficientMemory && enoughCpus && canFit -- cgit v1.2.3 From 561f07bd6547c8273627e9c860324ab892ba5fa7 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 26 Aug 2021 10:30:53 +0200 Subject: fix(compute): Do not allow failure of inactive guests This change fixes an issue in SimHost where guests that where inactive were also failed, causing an IllegalStateException. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index a4d24740..213d20ee 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -452,6 +452,9 @@ public class SimHost( } suspend fun fail() { + if (state != ServerState.RUNNING) { + return + } stop() state = ServerState.ERROR } -- cgit v1.2.3 From d24cc0cc9c4fe2145f0337d65e9a75f631365973 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 10 Sep 2021 10:59:44 +0200 Subject: refactor(compute): Integrate fault injection into compute simulator This change moves the fault injection logic directly into the opendc-compute-simulator module, so that it can operate at a higher abstraction. In the future, we might again split the module if we can re-use some of its logic. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 37 ++++---- .../opendc/compute/simulator/failure/HostFault.kt | 36 +++++++ .../compute/simulator/failure/HostFaultInjector.kt | 65 +++++++++++++ .../simulator/failure/StartStopHostFault.kt | 55 +++++++++++ .../simulator/failure/StochasticVictimSelector.kt | 44 +++++++++ .../compute/simulator/failure/VictimSelector.kt | 35 +++++++ .../simulator/internal/HostFaultInjectorImpl.kt | 103 +++++++++++++++++++++ 7 files changed, 356 insertions(+), 19 deletions(-) create mode 100644 opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/HostFault.kt create mode 100644 opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/HostFaultInjector.kt create mode 100644 opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StartStopHostFault.kt create mode 100644 opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StochasticVictimSelector.kt create mode 100644 opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/VictimSelector.kt create mode 100644 opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/HostFaultInjectorImpl.kt (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 213d20ee..a1cc3390 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -43,7 +43,6 @@ import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.power.ConstantPowerModel import org.opendc.simulator.compute.power.PowerDriver import org.opendc.simulator.compute.power.SimplePowerDriver -import org.opendc.simulator.failures.FailureDomain import org.opendc.simulator.resources.SimResourceInterpreter import java.util.* import kotlin.coroutines.CoroutineContext @@ -66,11 +65,11 @@ public class SimHost( powerDriver: PowerDriver = SimplePowerDriver(ConstantPowerModel(0.0)), private val mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), interferenceDomain: VmInterferenceDomain? = null -) : Host, FailureDomain, AutoCloseable { +) : Host, AutoCloseable { /** * The [CoroutineScope] of the host bounded by the lifecycle of the host. */ - override val scope: CoroutineScope = CoroutineScope(context + Job()) + private val scope: CoroutineScope = CoroutineScope(context + Job()) /** * The clock instance used by the host. @@ -347,6 +346,22 @@ public class SimHost( override fun toString(): String = "SimHost[uid=$uid,name=$name,model=$model]" + public suspend fun fail() { + reportTime() + _state = HostState.DOWN + for (guest in guests.values) { + guest.fail() + } + } + + public suspend fun recover() { + reportTime() + _state = HostState.UP + for (guest in guests.values) { + guest.start() + } + } + /** * Convert flavor to machine model. */ @@ -369,22 +384,6 @@ public class SimHost( listeners.forEach { it.onStateChanged(this, vm.server, vm.state) } } - override suspend fun fail() { - reportTime() - _state = HostState.DOWN - for (guest in guests.values) { - guest.fail() - } - } - - override suspend fun recover() { - reportTime() - _state = HostState.UP - for (guest in guests.values) { - guest.start() - } - } - /** * A virtual machine instance that the driver manages. */ diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/HostFault.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/HostFault.kt new file mode 100644 index 00000000..258ccc89 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/HostFault.kt @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.failure + +import org.opendc.compute.simulator.SimHost +import java.time.Clock + +/** + * Interface responsible for applying the fault to a host. + */ +public interface HostFault { + /** + * Apply the fault to the specified [victims]. + */ + public suspend fun apply(clock: Clock, victims: List) +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/HostFaultInjector.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/HostFaultInjector.kt new file mode 100644 index 00000000..5eff439f --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/HostFaultInjector.kt @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.failure + +import org.apache.commons.math3.distribution.RealDistribution +import org.opendc.compute.simulator.SimHost +import org.opendc.compute.simulator.internal.HostFaultInjectorImpl +import java.time.Clock +import kotlin.coroutines.CoroutineContext + +/** + * An interface for stochastically injecting faults into a set of hosts. + */ +public interface HostFaultInjector : AutoCloseable { + /** + * Start fault injection. + */ + public fun start() + + /** + * Stop fault injection into the system. + */ + public override fun close() + + public companion object { + /** + * Construct a new [HostFaultInjector]. + * + * @param context The scope to run the fault injector in. + * @param clock The [Clock] to keep track of simulation time. + * @param hosts The hosts to inject the faults into. + * @param iat The inter-arrival time distribution of the failures (in hours). + * @param selector The [VictimSelector] to select the host victims. + * @param fault The type of [HostFault] to inject. + */ + public operator fun invoke( + context: CoroutineContext, + clock: Clock, + hosts: Set, + iat: RealDistribution, + selector: VictimSelector, + fault: HostFault + ): HostFaultInjector = HostFaultInjectorImpl(context, clock, hosts, iat, selector, fault) + } +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StartStopHostFault.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StartStopHostFault.kt new file mode 100644 index 00000000..fc7cebfc --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StartStopHostFault.kt @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.failure + +import kotlinx.coroutines.delay +import org.apache.commons.math3.distribution.RealDistribution +import org.opendc.compute.simulator.SimHost +import java.time.Clock +import kotlin.math.roundToLong + +/** + * A type of [HostFault] where the hosts are stopped and recover after some random amount of time. + */ +public class StartStopHostFault(private val duration: RealDistribution) : HostFault { + override suspend fun apply(clock: Clock, victims: List) { + for (host in victims) { + host.fail() + } + + val df = (duration.sample() * 1000).roundToLong() // seconds to milliseconds + + // Handle long overflow + if (clock.millis() + df <= 0) { + return + } + + delay(df) + + for (host in victims) { + host.recover() + } + } + + override fun toString(): String = "StartStopHostFault[$duration]" +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StochasticVictimSelector.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StochasticVictimSelector.kt new file mode 100644 index 00000000..87903623 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StochasticVictimSelector.kt @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.failure + +import org.apache.commons.math3.distribution.RealDistribution +import org.opendc.compute.simulator.SimHost +import kotlin.math.roundToInt +import kotlin.random.Random + +/** + * A [VictimSelector] that stochastically selects a set of hosts to be failed. + */ +public class StochasticVictimSelector( + private val size: RealDistribution, + private val random: Random = Random(0) +) : VictimSelector { + + override fun select(hosts: Set): List { + val n = size.sample().roundToInt() + return hosts.shuffled(random).take(n) + } + + override fun toString(): String = "StochasticVictimSelector[$size]" +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/VictimSelector.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/VictimSelector.kt new file mode 100644 index 00000000..b5610284 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/VictimSelector.kt @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.failure + +import org.opendc.compute.simulator.SimHost + +/** + * Interface responsible for selecting the victim(s) for fault injection. + */ +public interface VictimSelector { + /** + * Select the hosts from [hosts] where a fault will be injected. + */ + public fun select(hosts: Set): List +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/HostFaultInjectorImpl.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/HostFaultInjectorImpl.kt new file mode 100644 index 00000000..6919b7fd --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/HostFaultInjectorImpl.kt @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.internal + +import kotlinx.coroutines.* +import org.apache.commons.math3.distribution.RealDistribution +import org.opendc.compute.simulator.SimHost +import org.opendc.compute.simulator.failure.HostFault +import org.opendc.compute.simulator.failure.HostFaultInjector +import org.opendc.compute.simulator.failure.VictimSelector +import java.time.Clock +import kotlin.coroutines.CoroutineContext +import kotlin.math.roundToLong + +/** + * Internal implementation of the [HostFaultInjector] interface. + * + * @param context The scope to run the fault injector in. + * @param clock The [Clock] to keep track of simulation time. + * @param hosts The set of hosts to inject faults into. + * @param iat The inter-arrival time distribution of the failures (in hours). + * @param selector The [VictimSelector] to select the host victims. + * @param fault The type of [HostFault] to inject. + */ +internal class HostFaultInjectorImpl( + private val context: CoroutineContext, + private val clock: Clock, + private val hosts: Set, + private val iat: RealDistribution, + private val selector: VictimSelector, + private val fault: HostFault +) : HostFaultInjector { + /** + * The scope in which the injector runs. + */ + private val scope = CoroutineScope(context + Job()) + + /** + * The [Job] that awaits the nearest fault in the system. + */ + private var job: Job? = null + + /** + * Start the fault injection into the system. + */ + override fun start() { + if (job != null) { + return + } + + job = scope.launch { + runInjector() + job = null + } + } + + /** + * Run the injection process. + */ + private suspend fun runInjector() { + while (true) { + // Make sure to convert delay from hours to milliseconds + val d = (iat.sample() * 3.6e6).roundToLong() + + // Handle long overflow + if (clock.millis() + d <= 0) { + return + } + + delay(d) + + val victims = selector.select(hosts) + fault.apply(clock, victims) + } + } + + /** + * Stop the fault injector. + */ + public override fun close() { + scope.cancel() + } +} -- cgit v1.2.3 From f142be8c2afd1f19bdec183f1169639b7cd1a472 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 10 Sep 2021 16:54:46 +0200 Subject: test(compute): Add test suite for fault injector --- .../simulator/failure/HostFaultInjectorTest.kt | 111 +++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/failure/HostFaultInjectorTest.kt (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/failure/HostFaultInjectorTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/failure/HostFaultInjectorTest.kt new file mode 100644 index 00000000..f240a25f --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/failure/HostFaultInjectorTest.kt @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.failure + +import io.mockk.coVerify +import io.mockk.mockk +import kotlinx.coroutines.delay +import org.apache.commons.math3.distribution.LogNormalDistribution +import org.apache.commons.math3.random.Well19937c +import org.junit.jupiter.api.Test +import org.opendc.compute.simulator.SimHost +import org.opendc.simulator.core.runBlockingSimulation +import java.time.Clock +import java.time.Duration +import kotlin.coroutines.CoroutineContext +import kotlin.math.ln + +/** + * Test suite for [HostFaultInjector] class. + */ +internal class HostFaultInjectorTest { + /** + * Simple test case to test that nothing happens when the injector is not started. + */ + @Test + fun testInjectorNotStarted() = runBlockingSimulation { + val host = mockk(relaxUnitFun = true) + + val injector = createSimpleInjector(coroutineContext, clock, setOf(host)) + + coVerify(exactly = 0) { host.fail() } + coVerify(exactly = 0) { host.recover() } + + injector.close() + } + + /** + * Simple test case to test a start stop fault where the machine is stopped and started after some time. + */ + @Test + fun testInjectorStopsMachine() = runBlockingSimulation { + val host = mockk(relaxUnitFun = true) + + val injector = createSimpleInjector(coroutineContext, clock, setOf(host)) + + injector.start() + + delay(Duration.ofDays(55).toMillis()) + + injector.close() + + coVerify(exactly = 1) { host.fail() } + coVerify(exactly = 1) { host.recover() } + } + + /** + * Simple test case to test a start stop fault where multiple machines are stopped. + */ + @Test + fun testInjectorStopsMultipleMachines() = runBlockingSimulation { + val hosts = listOf( + mockk(relaxUnitFun = true), + mockk(relaxUnitFun = true) + ) + + val injector = createSimpleInjector(coroutineContext, clock, hosts.toSet()) + + injector.start() + + delay(Duration.ofDays(55).toMillis()) + + injector.close() + + coVerify(exactly = 1) { hosts[0].fail() } + coVerify(exactly = 1) { hosts[1].fail() } + coVerify(exactly = 1) { hosts[0].recover() } + coVerify(exactly = 1) { hosts[1].recover() } + } + + /** + * Create a simple start stop fault injector. + */ + private fun createSimpleInjector(context: CoroutineContext, clock: Clock, hosts: Set): HostFaultInjector { + val rng = Well19937c(0) + val iat = LogNormalDistribution(rng, ln(24 * 7.0), 1.03) + val selector = StochasticVictimSelector(LogNormalDistribution(rng, 1.88, 1.25)) + val fault = StartStopHostFault(LogNormalDistribution(rng, 8.89, 2.71)) + + return HostFaultInjector(context, clock, hosts, iat, selector, fault) + } +} -- cgit v1.2.3 From 5f0b6b372487d79594cf59010822e160f351e0be Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 13 Sep 2021 14:48:02 +0200 Subject: refactor(telemetry): Simplify CoroutineMetricReader This change simplifies the CoroutineMetricReader implementation by removing the seperation of reader and exporter jobs. --- .../src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 31215e9a..9fa8af34 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -46,6 +46,7 @@ import org.opendc.simulator.core.runBlockingSimulation import org.opendc.simulator.resources.SimResourceInterpreter import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader import org.opendc.telemetry.sdk.toOtelClock +import java.time.Duration import java.util.* import kotlin.coroutines.resume @@ -149,7 +150,7 @@ internal class SimHostTest { override fun shutdown(): CompletableResultCode = CompletableResultCode.ofSuccess() }, - exportInterval = duration * 1000L + exportInterval = Duration.ofSeconds(duration) ) coroutineScope { @@ -261,7 +262,7 @@ internal class SimHostTest { override fun shutdown(): CompletableResultCode = CompletableResultCode.ofSuccess() }, - exportInterval = duration * 1000L + exportInterval = Duration.ofSeconds(duration) ) coroutineScope { -- cgit v1.2.3 From 3ca64e0110adab65526a0ccfd5b252e9f047ab10 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 14 Sep 2021 14:41:05 +0200 Subject: refactor(telemetry): Create separate MeterProvider per service/host This change refactors the telemetry implementation by creating a separate MeterProvider per service or host. This means we have to keep track of multiple metric producers, but that we can attach resource information to each of the MeterProviders like we would in a real world scenario. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 43 ++++++++++------- .../org/opendc/compute/simulator/SimHostTest.kt | 54 ++++++++-------------- 2 files changed, 46 insertions(+), 51 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index a1cc3390..be6ef11e 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -25,6 +25,7 @@ package org.opendc.compute.simulator import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.api.common.Attributes import io.opentelemetry.api.metrics.Meter +import io.opentelemetry.api.metrics.MeterProvider import io.opentelemetry.semconv.resource.attributes.ResourceAttributes import kotlinx.coroutines.* import mu.KotlinLogging @@ -59,7 +60,7 @@ public class SimHost( override val meta: Map, context: CoroutineContext, interpreter: SimResourceInterpreter, - private val meter: Meter, + meterProvider: MeterProvider, hypervisor: SimHypervisorProvider, scalingGovernor: ScalingGovernor = PerformanceScalingGovernor(), powerDriver: PowerDriver = SimplePowerDriver(ConstantPowerModel(0.0)), @@ -81,6 +82,11 @@ public class SimHost( */ private val logger = KotlinLogging.logger {} + /** + * The [Meter] to track metrics of the simulated host. + */ + private val meter = meterProvider.get("org.opendc.compute.simulator") + /** * The event listeners registered with this host. */ @@ -142,10 +148,9 @@ public class SimHost( * The total number of guests. */ private val _guests = meter.upDownCounterBuilder("guests.total") - .setDescription("Number of guests") + .setDescription("Total number of guests") .setUnit("1") .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The number of active guests on the host. @@ -154,7 +159,6 @@ public class SimHost( .setDescription("Number of active guests") .setUnit("1") .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The CPU demand of the host. @@ -163,7 +167,6 @@ public class SimHost( .setDescription("The amount of CPU resources the guests would use if there were no CPU contention or CPU limits") .setUnit("MHz") .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The CPU usage of the host. @@ -172,7 +175,6 @@ public class SimHost( .setDescription("The amount of CPU resources used by the host") .setUnit("MHz") .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The power usage of the host. @@ -181,7 +183,6 @@ public class SimHost( .setDescription("The amount of power used by the CPU") .setUnit("W") .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The total amount of work supplied to the CPU. @@ -191,7 +192,6 @@ public class SimHost( .setUnit("1") .ofDoubles() .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The work performed by the CPU. @@ -201,7 +201,6 @@ public class SimHost( .setUnit("1") .ofDoubles() .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The amount not performed by the CPU due to overcommitment. @@ -211,7 +210,6 @@ public class SimHost( .setUnit("1") .ofDoubles() .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The amount of work not performed by the CPU due to interference. @@ -221,7 +219,6 @@ public class SimHost( .setUnit("1") .ofDoubles() .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The amount of time in the system. @@ -230,7 +227,6 @@ public class SimHost( .setDescription("The amount of time in the system") .setUnit("ms") .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The uptime of the host. @@ -239,7 +235,6 @@ public class SimHost( .setDescription("The uptime of the host") .setUnit("ms") .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) /** * The downtime of the host. @@ -248,7 +243,6 @@ public class SimHost( .setDescription("The downtime of the host") .setUnit("ms") .build() - .bind(Attributes.of(ResourceAttributes.HOST_ID, uid.toString())) init { // Launch hypervisor onto machine @@ -390,6 +384,21 @@ public class SimHost( private inner class Guest(val server: Server, val machine: SimMachine) { var state: ServerState = ServerState.TERMINATED + /** + * The attributes of the guest. + */ + val attributes: Attributes = Attributes.builder() + .put(ResourceAttributes.HOST_NAME, server.name) + .put(ResourceAttributes.HOST_ID, server.uid.toString()) + .put(ResourceAttributes.HOST_TYPE, server.flavor.name) + .put(AttributeKey.longKey("host.num_cpus"), server.flavor.cpuCount.toLong()) + .put(AttributeKey.longKey("host.mem_capacity"), server.flavor.memorySize) + .put(AttributeKey.stringArrayKey("host.labels"), server.labels.map { (k, v) -> "$k:$v" }) + .put(ResourceAttributes.HOST_ARCH, ResourceAttributes.HostArchValues.AMD64) + .put(ResourceAttributes.HOST_IMAGE_NAME, server.image.name) + .put(ResourceAttributes.HOST_IMAGE_ID, server.image.uid.toString()) + .build() + /** * The amount of time in the system. */ @@ -397,7 +406,7 @@ public class SimHost( .setDescription("The amount of time in the system") .setUnit("ms") .build() - .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + .bind(attributes) /** * The uptime of the guest. @@ -406,7 +415,7 @@ public class SimHost( .setDescription("The uptime of the guest") .setUnit("ms") .build() - .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + .bind(attributes) /** * The time the guest is in an error state. @@ -415,7 +424,7 @@ public class SimHost( .setDescription("The time the guest is in an error state") .setUnit("ms") .build() - .bind(Attributes.of(AttributeKey.stringKey("server.id"), server.uid.toString())) + .bind(attributes) suspend fun start() { when (state) { diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 9fa8af34..318b5a5d 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -49,6 +49,7 @@ import org.opendc.telemetry.sdk.toOtelClock import java.time.Duration import java.util.* import kotlin.coroutines.resume +import kotlin.math.roundToLong /** * Basic test-suite for the hypervisor. @@ -72,7 +73,7 @@ internal class SimHostTest { */ @Test fun testOvercommitted() = runBlockingSimulation { - var requestedWork = 0L + var totalWork = 0L var grantedWork = 0L var overcommittedWork = 0L @@ -89,7 +90,7 @@ internal class SimHostTest { meta = emptyMap(), coroutineContext, interpreter, - meterProvider.get("opendc-compute-simulator"), + meterProvider, SimFairShareHypervisorProvider() ) val duration = 5 * 60L @@ -134,15 +135,10 @@ internal class SimHostTest { object : MetricExporter { override fun export(metrics: Collection): CompletableResultCode { val metricsByName = metrics.associateBy { it.name } - metricsByName["cpu.work.total"]?.let { - requestedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() - } - metricsByName["cpu.work.granted"]?.let { - grantedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() - } - metricsByName["cpu.work.overcommit"]?.let { - overcommittedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() - } + + totalWork = metricsByName.getValue("cpu.work.total").doubleSumData.points.first().value.roundToLong() + grantedWork = metricsByName.getValue("cpu.work.granted").doubleSumData.points.first().value.roundToLong() + overcommittedWork = metricsByName.getValue("cpu.work.overcommit").doubleSumData.points.first().value.roundToLong() return CompletableResultCode.ofSuccess() } @@ -176,7 +172,7 @@ internal class SimHostTest { reader.close() assertAll( - { assertEquals(4147200, requestedWork, "Requested work does not match") }, + { assertEquals(4147200, totalWork, "Requested work does not match") }, { assertEquals(2107200, grantedWork, "Granted work does not match") }, { assertEquals(2040000, overcommittedWork, "Overcommitted work does not match") }, { assertEquals(1500001, clock.millis()) } @@ -188,7 +184,7 @@ internal class SimHostTest { */ @Test fun testFailure() = runBlockingSimulation { - var requestedWork = 0L + var totalWork = 0L var grantedWork = 0L var totalTime = 0L var downTime = 0L @@ -208,7 +204,7 @@ internal class SimHostTest { meta = emptyMap(), coroutineContext, interpreter, - meterProvider.get("opendc-compute-simulator"), + meterProvider, SimFairShareHypervisorProvider() ) val duration = 5 * 60L @@ -237,24 +233,14 @@ internal class SimHostTest { object : MetricExporter { override fun export(metrics: Collection): CompletableResultCode { val metricsByName = metrics.associateBy { it.name } - metricsByName["cpu.work.total"]?.let { - requestedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() - } - metricsByName["cpu.work.granted"]?.let { - grantedWork = it.doubleSumData.points.sumOf { point -> point.value }.toLong() - } - metricsByName["host.time.total"]?.let { - totalTime = it.longSumData.points.first().value - } - metricsByName["host.time.down"]?.let { - downTime = it.longSumData.points.first().value - } - metricsByName["guest.time.total"]?.let { - guestTotalTime = it.longSumData.points.first().value - } - metricsByName["guest.time.error"]?.let { - guestDownTime = it.longSumData.points.first().value - } + + totalWork = metricsByName.getValue("cpu.work.total").doubleSumData.points.first().value.roundToLong() + grantedWork = metricsByName.getValue("cpu.work.granted").doubleSumData.points.first().value.roundToLong() + totalTime = metricsByName.getValue("host.time.total").longSumData.points.first().value + downTime = metricsByName.getValue("host.time.down").longSumData.points.first().value + guestTotalTime = metricsByName.getValue("guest.time.total").longSumData.points.first().value + guestDownTime = metricsByName.getValue("guest.time.error").longSumData.points.first().value + return CompletableResultCode.ofSuccess() } @@ -290,8 +276,8 @@ internal class SimHostTest { reader.close() assertAll( - { assertEquals(2226039, requestedWork, "Total time does not match") }, - { assertEquals(1086039, grantedWork, "Down time does not match") }, + { assertEquals(2226040, totalWork, "Total time does not match") }, + { assertEquals(1086040, grantedWork, "Down time does not match") }, { assertEquals(1200001, totalTime, "Total time does not match") }, { assertEquals(1200001, guestTotalTime, "Guest total time does not match") }, { assertEquals(5000, downTime, "Down time does not match") }, -- cgit v1.2.3 From 0d8bccc68705d036fbf60f312d9c34ca4392c6b2 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 7 Sep 2021 17:30:46 +0200 Subject: refactor(telemetry): Standardize SimHost metrics This change standardizes the metrics emitted by SimHost instances and their guests based on the OpenTelemetry semantic conventions. We now also report CPU time as opposed to CPU work as this metric is more commonly used. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 504 ++++++++++----------- .../org/opendc/compute/simulator/internal/Guest.kt | 305 +++++++++++++ .../compute/simulator/internal/GuestListener.kt | 38 ++ .../org/opendc/compute/simulator/SimHostTest.kt | 117 ++--- 4 files changed, 636 insertions(+), 328 deletions(-) create mode 100644 opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt create mode 100644 opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/GuestListener.kt (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index be6ef11e..793db907 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -26,13 +26,16 @@ import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.api.common.Attributes import io.opentelemetry.api.metrics.Meter import io.opentelemetry.api.metrics.MeterProvider -import io.opentelemetry.semconv.resource.attributes.ResourceAttributes +import io.opentelemetry.api.metrics.ObservableDoubleMeasurement +import io.opentelemetry.api.metrics.ObservableLongMeasurement import kotlinx.coroutines.* import mu.KotlinLogging import org.opendc.compute.api.Flavor import org.opendc.compute.api.Server import org.opendc.compute.api.ServerState import org.opendc.compute.service.driver.* +import org.opendc.compute.simulator.internal.Guest +import org.opendc.compute.simulator.internal.GuestListener import org.opendc.simulator.compute.* import org.opendc.simulator.compute.kernel.SimHypervisor import org.opendc.simulator.compute.kernel.SimHypervisorProvider @@ -44,11 +47,11 @@ import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.power.ConstantPowerModel import org.opendc.simulator.compute.power.PowerDriver import org.opendc.simulator.compute.power.SimplePowerDriver +import org.opendc.simulator.resources.SimResourceDistributorMaxMin import org.opendc.simulator.resources.SimResourceInterpreter import java.util.* import kotlin.coroutines.CoroutineContext -import kotlin.coroutines.resume -import kotlin.coroutines.resumeWithException +import kotlin.math.roundToLong /** * A [Host] that is simulates virtual machines on a physical machine using [SimHypervisor]. @@ -100,32 +103,29 @@ public class SimHost( /** * The hypervisor to run multiple workloads. */ - public val hypervisor: SimHypervisor = hypervisor.create( + private val hypervisor: SimHypervisor = hypervisor.create( interpreter, scalingGovernor = scalingGovernor, interferenceDomain = interferenceDomain, listener = object : SimHypervisor.Listener { override fun onSliceFinish( hypervisor: SimHypervisor, - requestedWork: Double, + totalWork: Double, grantedWork: Double, overcommittedWork: Double, interferedWork: Double, cpuUsage: Double, cpuDemand: Double ) { - _totalWork.add(requestedWork) - _grantedWork.add(grantedWork) - _overcommittedWork.add(overcommittedWork) - _interferedWork.add(interferedWork) - _cpuDemand.record(cpuDemand) - _cpuUsage.record(cpuUsage) - _powerUsage.record(machine.powerDraw) - - reportTime() + _cpuDemand = cpuDemand + _cpuUsage = cpuUsage + + collectTime() } } ) + private var _cpuUsage = 0.0 + private var _cpuDemand = 0.0 /** * The virtual machines running on the hypervisor. @@ -145,109 +145,23 @@ public class SimHost( override val model: HostModel = HostModel(model.cpus.size, model.memory.sumOf { it.size }) /** - * The total number of guests. - */ - private val _guests = meter.upDownCounterBuilder("guests.total") - .setDescription("Total number of guests") - .setUnit("1") - .build() - - /** - * The number of active guests on the host. - */ - private val _activeGuests = meter.upDownCounterBuilder("guests.active") - .setDescription("Number of active guests") - .setUnit("1") - .build() - - /** - * The CPU demand of the host. - */ - private val _cpuDemand = meter.histogramBuilder("cpu.demand") - .setDescription("The amount of CPU resources the guests would use if there were no CPU contention or CPU limits") - .setUnit("MHz") - .build() - - /** - * The CPU usage of the host. - */ - private val _cpuUsage = meter.histogramBuilder("cpu.usage") - .setDescription("The amount of CPU resources used by the host") - .setUnit("MHz") - .build() - - /** - * The power usage of the host. - */ - private val _powerUsage = meter.histogramBuilder("power.usage") - .setDescription("The amount of power used by the CPU") - .setUnit("W") - .build() - - /** - * The total amount of work supplied to the CPU. - */ - private val _totalWork = meter.counterBuilder("cpu.work.total") - .setDescription("The amount of work supplied to the CPU") - .setUnit("1") - .ofDoubles() - .build() - - /** - * The work performed by the CPU. - */ - private val _grantedWork = meter.counterBuilder("cpu.work.granted") - .setDescription("The amount of work performed by the CPU") - .setUnit("1") - .ofDoubles() - .build() - - /** - * The amount not performed by the CPU due to overcommitment. - */ - private val _overcommittedWork = meter.counterBuilder("cpu.work.overcommit") - .setDescription("The amount of work not performed by the CPU due to overcommitment") - .setUnit("1") - .ofDoubles() - .build() - - /** - * The amount of work not performed by the CPU due to interference. - */ - private val _interferedWork = meter.counterBuilder("cpu.work.interference") - .setDescription("The amount of work not performed by the CPU due to interference") - .setUnit("1") - .ofDoubles() - .build() - - /** - * The amount of time in the system. + * The [GuestListener] that listens for guest events. */ - private val _totalTime = meter.counterBuilder("host.time.total") - .setDescription("The amount of time in the system") - .setUnit("ms") - .build() - - /** - * The uptime of the host. - */ - private val _upTime = meter.counterBuilder("host.time.up") - .setDescription("The uptime of the host") - .setUnit("ms") - .build() + private val guestListener = object : GuestListener { + override fun onStart(guest: Guest) { + listeners.forEach { it.onStateChanged(this@SimHost, guest.server, guest.state) } + } - /** - * The downtime of the host. - */ - private val _downTime = meter.counterBuilder("host.time.down") - .setDescription("The downtime of the host") - .setUnit("ms") - .build() + override fun onStop(guest: Guest) { + listeners.forEach { it.onStateChanged(this@SimHost, guest.server, guest.state) } + } + } init { // Launch hypervisor onto machine scope.launch { try { + _bootTime = clock.millis() _state = HostState.UP machine.run(this@SimHost.hypervisor, emptyMap()) } catch (_: CancellationException) { @@ -259,29 +173,48 @@ public class SimHost( _state = HostState.DOWN } } - } - - private var _lastReport = clock.millis() - - private fun reportTime() { - if (!scope.isActive) - return - - val now = clock.millis() - val duration = now - _lastReport - - _totalTime.add(duration) - when (_state) { - HostState.UP -> _upTime.add(duration) - HostState.DOWN -> _downTime.add(duration) - } - // Track time of guests - for (guest in guests.values) { - guest.reportTime() - } - - _lastReport = now + meter.upDownCounterBuilder("system.guests") + .setDescription("Number of guests on this host") + .setUnit("1") + .buildWithCallback(::collectGuests) + meter.gaugeBuilder("system.cpu.limit") + .setDescription("Amount of CPU resources available to the host") + .buildWithCallback(::collectCpuLimit) + meter.gaugeBuilder("system.cpu.demand") + .setDescription("Amount of CPU resources the guests would use if there were no CPU contention or CPU limits") + .setUnit("MHz") + .buildWithCallback { result -> result.observe(_cpuDemand) } + meter.gaugeBuilder("system.cpu.usage") + .setDescription("Amount of CPU resources used by the host") + .setUnit("MHz") + .buildWithCallback { result -> result.observe(_cpuUsage) } + meter.gaugeBuilder("system.cpu.utilization") + .setDescription("Utilization of the CPU resources of the host") + .setUnit("%") + .buildWithCallback { result -> result.observe(_cpuUsage / _cpuLimit) } + meter.counterBuilder("system.cpu.time") + .setDescription("Amount of CPU time spent by the host") + .setUnit("s") + .buildWithCallback(::collectCpuTime) + meter.gaugeBuilder("system.power.usage") + .setDescription("Power usage of the host ") + .setUnit("W") + .buildWithCallback { result -> result.observe(machine.powerDraw) } + meter.counterBuilder("system.power.total") + .setDescription("Amount of energy used by the CPU") + .setUnit("J") + .ofDoubles() + .buildWithCallback(::collectPowerTotal) + meter.counterBuilder("system.time") + .setDescription("The uptime of the host") + .setUnit("s") + .buildWithCallback(::collectTime) + meter.gaugeBuilder("system.time.boot") + .setDescription("The boot time of the host") + .setUnit("1") + .ofLongs() + .buildWithCallback(::collectBootTime) } override fun canFit(server: Server): Boolean { @@ -295,8 +228,17 @@ public class SimHost( override suspend fun spawn(server: Server, start: Boolean) { val guest = guests.computeIfAbsent(server) { key -> require(canFit(key)) { "Server does not fit" } - _guests.add(1) - Guest(key, hypervisor.createMachine(key.flavor.toMachineModel(), key.name)) + + val machine = hypervisor.createMachine(key.flavor.toMachineModel(), key.name) + Guest( + scope.coroutineContext, + clock, + this, + mapper, + guestListener, + server, + machine + ) } if (start) { @@ -320,7 +262,6 @@ public class SimHost( override suspend fun delete(server: Server) { val guest = guests.remove(server) ?: return - _guests.add(-1) guest.terminate() } @@ -333,7 +274,7 @@ public class SimHost( } override fun close() { - reportTime() + reset() scope.cancel() machine.close() } @@ -341,21 +282,34 @@ public class SimHost( override fun toString(): String = "SimHost[uid=$uid,name=$name,model=$model]" public suspend fun fail() { - reportTime() - _state = HostState.DOWN + reset() + for (guest in guests.values) { guest.fail() } } public suspend fun recover() { - reportTime() + collectTime() _state = HostState.UP + _bootTime = clock.millis() + for (guest in guests.values) { guest.start() } } + /** + * Reset the machine. + */ + private fun reset() { + collectTime() + + _state = HostState.DOWN + _cpuUsage = 0.0 + _cpuDemand = 0.0 + } + /** * Convert flavor to machine model. */ @@ -368,162 +322,168 @@ public class SimHost( return MachineModel(processingUnits, memoryUnits) } - private fun onGuestStart(vm: Guest) { - _activeGuests.add(1) - listeners.forEach { it.onStateChanged(this, vm.server, vm.state) } - } + private val STATE_KEY = AttributeKey.stringKey("state") - private fun onGuestStop(vm: Guest) { - _activeGuests.add(-1) - listeners.forEach { it.onStateChanged(this, vm.server, vm.state) } - } + private val terminatedState = Attributes.of(STATE_KEY, "terminated") + private val runningState = Attributes.of(STATE_KEY, "running") + private val errorState = Attributes.of(STATE_KEY, "error") + private val invalidState = Attributes.of(STATE_KEY, "invalid") /** - * A virtual machine instance that the driver manages. + * Helper function to collect the guest counts on this host. */ - private inner class Guest(val server: Server, val machine: SimMachine) { - var state: ServerState = ServerState.TERMINATED - - /** - * The attributes of the guest. - */ - val attributes: Attributes = Attributes.builder() - .put(ResourceAttributes.HOST_NAME, server.name) - .put(ResourceAttributes.HOST_ID, server.uid.toString()) - .put(ResourceAttributes.HOST_TYPE, server.flavor.name) - .put(AttributeKey.longKey("host.num_cpus"), server.flavor.cpuCount.toLong()) - .put(AttributeKey.longKey("host.mem_capacity"), server.flavor.memorySize) - .put(AttributeKey.stringArrayKey("host.labels"), server.labels.map { (k, v) -> "$k:$v" }) - .put(ResourceAttributes.HOST_ARCH, ResourceAttributes.HostArchValues.AMD64) - .put(ResourceAttributes.HOST_IMAGE_NAME, server.image.name) - .put(ResourceAttributes.HOST_IMAGE_ID, server.image.uid.toString()) - .build() - - /** - * The amount of time in the system. - */ - private val _totalTime = meter.counterBuilder("guest.time.total") - .setDescription("The amount of time in the system") - .setUnit("ms") - .build() - .bind(attributes) - - /** - * The uptime of the guest. - */ - private val _runningTime = meter.counterBuilder("guest.time.running") - .setDescription("The uptime of the guest") - .setUnit("ms") - .build() - .bind(attributes) - - /** - * The time the guest is in an error state. - */ - private val _errorTime = meter.counterBuilder("guest.time.error") - .setDescription("The time the guest is in an error state") - .setUnit("ms") - .build() - .bind(attributes) - - suspend fun start() { - when (state) { - ServerState.TERMINATED, ServerState.ERROR -> { - logger.info { "User requested to start server ${server.uid}" } - launch() - } - ServerState.RUNNING -> return - ServerState.DELETED -> { - logger.warn { "User tried to start terminated server" } - throw IllegalArgumentException("Server is terminated") - } - else -> assert(false) { "Invalid state transition" } + private fun collectGuests(result: ObservableLongMeasurement) { + var terminated = 0L + var running = 0L + var error = 0L + var invalid = 0L + + for ((_, guest) in guests) { + when (guest.state) { + ServerState.TERMINATED -> terminated++ + ServerState.RUNNING -> running++ + ServerState.ERROR -> error++ + else -> invalid++ } } - suspend fun stop() { - when (state) { - ServerState.RUNNING, ServerState.ERROR -> { - val job = job ?: throw IllegalStateException("Server should be active") - job.cancel() - job.join() - } - ServerState.TERMINATED, ServerState.DELETED -> return - else -> assert(false) { "Invalid state transition" } - } - } + result.observe(terminated, terminatedState) + result.observe(running, runningState) + result.observe(error, errorState) + result.observe(invalid, invalidState) + } - suspend fun terminate() { - stop() - machine.close() - state = ServerState.DELETED + private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } + + /** + * Helper function to collect the CPU limits of a machine. + */ + private fun collectCpuLimit(result: ObservableDoubleMeasurement) { + result.observe(_cpuLimit) + + for (guest in guests.values) { + guest.collectCpuLimit(result) } + } - suspend fun fail() { - if (state != ServerState.RUNNING) { - return - } - stop() - state = ServerState.ERROR + private var _lastCpuTimeCallback = clock.millis() + + /** + * Helper function to track the CPU time of a machine. + */ + private fun collectCpuTime(result: ObservableLongMeasurement) { + val now = clock.millis() + val duration = now - _lastCpuTimeCallback + + try { + collectCpuTime(duration, result) + } finally { + _lastCpuTimeCallback = now } + } - private var job: Job? = null - - private suspend fun launch() = suspendCancellableCoroutine { cont -> - assert(job == null) { "Concurrent job running" } - val workload = mapper.createWorkload(server) - - job = scope.launch { - try { - delay(1) // TODO Introduce boot time - init() - cont.resume(Unit) - } catch (e: Throwable) { - cont.resumeWithException(e) - } - try { - machine.run(workload, mapOf("driver" to this@SimHost, "server" to server)) - exit(null) - } catch (cause: Throwable) { - exit(cause) - } finally { - job = null - } - } + private val _activeState = Attributes.of(STATE_KEY, "active") + private val _stealState = Attributes.of(STATE_KEY, "steal") + private val _lostState = Attributes.of(STATE_KEY, "lost") + private val _idleState = Attributes.of(STATE_KEY, "idle") + private var _totalTime = 0.0 + + /** + * Helper function to track the CPU time of a machine. + */ + private fun collectCpuTime(duration: Long, result: ObservableLongMeasurement) { + val coreCount = this.model.cpuCount + val d = coreCount / _cpuLimit + + val counters = hypervisor.counters + val grantedWork = counters.actual + val overcommittedWork = counters.overcommit + val interferedWork = (counters as? SimResourceDistributorMaxMin.Counters)?.interference ?: 0.0 + + _totalTime += (duration / 1000.0) * coreCount + val activeTime = (grantedWork * d).roundToLong() + val idleTime = (_totalTime - grantedWork * d).roundToLong() + val stealTime = (overcommittedWork * d).roundToLong() + val lostTime = (interferedWork * d).roundToLong() + + result.observe(activeTime, _activeState) + result.observe(idleTime, _idleState) + result.observe(stealTime, _stealState) + result.observe(lostTime, _lostState) + + for (guest in guests.values) { + guest.collectCpuTime(duration, result) } + } + + private var _lastPowerCallback = clock.millis() + private var _totalPower = 0.0 - private fun init() { - state = ServerState.RUNNING - onGuestStart(this) + /** + * Helper function to collect the total power usage of the machine. + */ + private fun collectPowerTotal(result: ObservableDoubleMeasurement) { + val now = clock.millis() + val duration = now - _lastPowerCallback + + _totalPower += duration / 1000.0 * machine.powerDraw + result.observe(_totalPower) + + _lastPowerCallback = now + } + + private var _lastReport = clock.millis() + + /** + * Helper function to track the uptime of a machine. + */ + private fun collectTime(result: ObservableLongMeasurement? = null) { + val now = clock.millis() + val duration = now - _lastReport + + try { + collectTime(duration, result) + } finally { + _lastReport = now } + } - private fun exit(cause: Throwable?) { - state = - if (cause == null) - ServerState.TERMINATED - else - ServerState.ERROR + private var _uptime = 0L + private var _downtime = 0L + private val _upState = Attributes.of(STATE_KEY, "up") + private val _downState = Attributes.of(STATE_KEY, "down") - onGuestStop(this) + /** + * Helper function to track the uptime of a machine. + */ + private fun collectTime(duration: Long, result: ObservableLongMeasurement? = null) { + if (state == HostState.UP) { + _uptime += duration + } else if (state == HostState.DOWN && scope.isActive) { + // Only increment downtime if the machine is in a failure state + _downtime += duration } - private var _lastReport = clock.millis() + result?.observe(_uptime, _upState) + result?.observe(_downtime, _downState) - fun reportTime() { - if (state == ServerState.DELETED) - return + for (guest in guests.values) { + guest.collectUptime(duration, result) + } + } - val now = clock.millis() - val duration = now - _lastReport + private var _bootTime = Long.MIN_VALUE - _totalTime.add(duration) - when (state) { - ServerState.RUNNING -> _runningTime.add(duration) - ServerState.ERROR -> _errorTime.add(duration) - else -> {} - } + /** + * Helper function to track the boot time of a machine. + */ + private fun collectBootTime(result: ObservableLongMeasurement? = null) { + if (_bootTime != Long.MIN_VALUE) { + result?.observe(_bootTime) + } - _lastReport = now + for (guest in guests.values) { + guest.collectBootTime(result) } } } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt new file mode 100644 index 00000000..90562e2f --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.internal + +import io.opentelemetry.api.common.AttributeKey +import io.opentelemetry.api.common.Attributes +import io.opentelemetry.api.metrics.ObservableDoubleMeasurement +import io.opentelemetry.api.metrics.ObservableLongMeasurement +import io.opentelemetry.semconv.resource.attributes.ResourceAttributes +import kotlinx.coroutines.* +import mu.KotlinLogging +import org.opendc.compute.api.Server +import org.opendc.compute.api.ServerState +import org.opendc.compute.simulator.SimHost +import org.opendc.compute.simulator.SimWorkloadMapper +import org.opendc.simulator.compute.SimAbstractMachine +import org.opendc.simulator.compute.SimMachine +import org.opendc.simulator.compute.workload.SimWorkload +import java.time.Clock +import kotlin.coroutines.CoroutineContext +import kotlin.math.roundToLong + +/** + * A virtual machine instance that is managed by a [SimHost]. + */ +internal class Guest( + context: CoroutineContext, + private val clock: Clock, + val host: SimHost, + private val mapper: SimWorkloadMapper, + private val listener: GuestListener, + val server: Server, + val machine: SimMachine +) { + /** + * The [CoroutineScope] of the guest. + */ + private val scope: CoroutineScope = CoroutineScope(context + Job()) + + /** + * The logger instance of this guest. + */ + private val logger = KotlinLogging.logger {} + + /** + * The state of the [Guest]. + * + * [ServerState.PROVISIONING] is an invalid value for a guest, since it applies before the host is selected for + * a server. + */ + var state: ServerState = ServerState.TERMINATED + + /** + * The attributes of the guest. + */ + val attributes: Attributes = Attributes.builder() + .put(ResourceAttributes.HOST_NAME, server.name) + .put(ResourceAttributes.HOST_ID, server.uid.toString()) + .put(ResourceAttributes.HOST_TYPE, server.flavor.name) + .put(AttributeKey.longKey("host.num_cpus"), server.flavor.cpuCount.toLong()) + .put(AttributeKey.longKey("host.mem_capacity"), server.flavor.memorySize) + .put(AttributeKey.stringArrayKey("host.labels"), server.labels.map { (k, v) -> "$k:$v" }) + .put(ResourceAttributes.HOST_ARCH, ResourceAttributes.HostArchValues.AMD64) + .put(ResourceAttributes.HOST_IMAGE_NAME, server.image.name) + .put(ResourceAttributes.HOST_IMAGE_ID, server.image.uid.toString()) + .build() + + /** + * Start the guest. + */ + suspend fun start() { + when (state) { + ServerState.TERMINATED, ServerState.ERROR -> { + logger.info { "User requested to start server ${server.uid}" } + doStart() + } + ServerState.RUNNING -> return + ServerState.DELETED -> { + logger.warn { "User tried to start terminated server" } + throw IllegalArgumentException("Server is terminated") + } + else -> assert(false) { "Invalid state transition" } + } + } + + /** + * Stop the guest. + */ + suspend fun stop() { + when (state) { + ServerState.RUNNING -> doStop(ServerState.TERMINATED) + ServerState.ERROR -> doRecover() + ServerState.TERMINATED, ServerState.DELETED -> return + else -> assert(false) { "Invalid state transition" } + } + } + + /** + * Terminate the guest. + * + * This operation will stop the guest if it is running on the host and remove all resources associated with the + * guest. + */ + suspend fun terminate() { + stop() + + state = ServerState.DELETED + + machine.close() + scope.cancel() + } + + /** + * Fail the guest if it is active. + * + * This operation forcibly stops the guest and puts the server into an error state. + */ + suspend fun fail() { + if (state != ServerState.RUNNING) { + return + } + + doStop(ServerState.ERROR) + } + + /** + * The [Job] representing the current active virtual machine instance or `null` if no virtual machine is active. + */ + private var job: Job? = null + + /** + * Launch the guest on the simulated + */ + private suspend fun doStart() { + assert(job == null) { "Concurrent job running" } + val workload = mapper.createWorkload(server) + + val job = scope.launch { runMachine(workload) } + this.job = job + + state = ServerState.RUNNING + onStart() + + job.invokeOnCompletion { cause -> + this.job = null + onStop(if (cause != null && cause !is CancellationException) ServerState.ERROR else ServerState.TERMINATED) + } + } + + /** + * Attempt to stop the server and put it into [target] state. + */ + private suspend fun doStop(target: ServerState) { + assert(job != null) { "Invalid job state" } + val job = job ?: return + job.cancel() + job.join() + + state = target + } + + /** + * Attempt to recover from an error state. + */ + private fun doRecover() { + state = ServerState.TERMINATED + } + + /** + * Run the process that models the virtual machine lifecycle as a coroutine. + */ + private suspend fun runMachine(workload: SimWorkload) { + delay(1) // TODO Introduce model for boot time + machine.run(workload, mapOf("driver" to host, "server" to server)) + } + + /** + * This method is invoked when the guest was started on the host and has booted into a running state. + */ + private fun onStart() { + _bootTime = clock.millis() + state = ServerState.RUNNING + listener.onStart(this) + } + + /** + * This method is invoked when the guest stopped. + */ + private fun onStop(target: ServerState) { + state = target + listener.onStop(this) + } + + private val STATE_KEY = AttributeKey.stringKey("state") + + private var _uptime = 0L + private var _downtime = 0L + private val _upState = Attributes.builder() + .putAll(attributes) + .put(STATE_KEY, "up") + .build() + private val _downState = Attributes.builder() + .putAll(attributes) + .put(STATE_KEY, "down") + .build() + + /** + * Helper function to track the uptime of the guest. + */ + fun collectUptime(duration: Long, result: ObservableLongMeasurement? = null) { + if (state == ServerState.RUNNING) { + _uptime += duration + } else if (state == ServerState.ERROR) { + _downtime += duration + } + + result?.observe(_uptime, _upState) + result?.observe(_downtime, _downState) + } + + private var _bootTime = Long.MIN_VALUE + + /** + * Helper function to track the boot time of the guest. + */ + fun collectBootTime(result: ObservableLongMeasurement? = null) { + if (_bootTime != Long.MIN_VALUE) { + result?.observe(_bootTime) + } + } + + private val _activeState = Attributes.builder() + .putAll(attributes) + .put(STATE_KEY, "active") + .build() + private val _stealState = Attributes.builder() + .putAll(attributes) + .put(STATE_KEY, "steal") + .build() + private val _lostState = Attributes.builder() + .putAll(attributes) + .put(STATE_KEY, "lost") + .build() + private val _idleState = Attributes.builder() + .putAll(attributes) + .put(STATE_KEY, "idle") + .build() + private var _totalTime = 0.0 + + /** + * Helper function to track the CPU time of a machine. + */ + fun collectCpuTime(duration: Long, result: ObservableLongMeasurement) { + val coreCount = server.flavor.cpuCount + val d = coreCount / _cpuLimit + + var grantedWork = 0.0 + var overcommittedWork = 0.0 + + for (cpu in (machine as SimAbstractMachine).cpus) { + val counters = cpu.counters + grantedWork += counters.actual + overcommittedWork += counters.overcommit + } + + _totalTime += (duration / 1000.0) * coreCount + val activeTime = (grantedWork * d).roundToLong() + val idleTime = (_totalTime - grantedWork * d).roundToLong() + val stealTime = (overcommittedWork * d).roundToLong() + + result.observe(activeTime, _activeState) + result.observe(idleTime, _idleState) + result.observe(stealTime, _stealState) + result.observe(0, _lostState) + } + + private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } + + /** + * Helper function to collect the CPU limits of a machine. + */ + fun collectCpuLimit(result: ObservableDoubleMeasurement) { + result.observe(_cpuLimit, attributes) + } +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/GuestListener.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/GuestListener.kt new file mode 100644 index 00000000..e6d0fdad --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/GuestListener.kt @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.internal + +/** + * Helper interface to listen for [Guest] events. + */ +internal interface GuestListener { + /** + * This method is invoked when the guest machine is running. + */ + fun onStart(guest: Guest) + + /** + * This method is invoked when the guest machine is stopped. + */ + fun onStop(guest: Guest) +} diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 318b5a5d..9c879e5e 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -23,11 +23,9 @@ package org.opendc.compute.simulator import io.opentelemetry.api.metrics.MeterProvider -import io.opentelemetry.sdk.common.CompletableResultCode import io.opentelemetry.sdk.metrics.SdkMeterProvider -import io.opentelemetry.sdk.metrics.data.MetricData -import io.opentelemetry.sdk.metrics.export.MetricExporter import io.opentelemetry.sdk.metrics.export.MetricProducer +import io.opentelemetry.sdk.resources.Resource import kotlinx.coroutines.* import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.BeforeEach @@ -44,17 +42,20 @@ import org.opendc.simulator.compute.model.ProcessingUnit import org.opendc.simulator.compute.workload.SimTraceWorkload import org.opendc.simulator.core.runBlockingSimulation import org.opendc.simulator.resources.SimResourceInterpreter +import org.opendc.telemetry.compute.ComputeMetricExporter +import org.opendc.telemetry.compute.ComputeMonitor +import org.opendc.telemetry.compute.HOST_ID +import org.opendc.telemetry.compute.table.HostData +import org.opendc.telemetry.compute.table.ServerData import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader import org.opendc.telemetry.sdk.toOtelClock import java.time.Duration import java.util.* import kotlin.coroutines.resume -import kotlin.math.roundToLong /** * Basic test-suite for the hypervisor. */ -@OptIn(ExperimentalCoroutinesApi::class) internal class SimHostTest { private lateinit var machineModel: MachineModel @@ -73,18 +74,23 @@ internal class SimHostTest { */ @Test fun testOvercommitted() = runBlockingSimulation { - var totalWork = 0L - var grantedWork = 0L - var overcommittedWork = 0L + var idleTime = 0L + var activeTime = 0L + var stealTime = 0L + val hostId = UUID.randomUUID() + val hostResource = Resource.builder() + .put(HOST_ID, hostId.toString()) + .build() val meterProvider: MeterProvider = SdkMeterProvider .builder() + .setResource(hostResource) .setClock(clock.toOtelClock()) .build() val interpreter = SimResourceInterpreter(coroutineContext, clock) val virtDriver = SimHost( - uid = UUID.randomUUID(), + uid = hostId, name = "test", model = machineModel, meta = emptyMap(), @@ -132,20 +138,16 @@ internal class SimHostTest { // Setup metric reader val reader = CoroutineMetricReader( this, listOf(meterProvider as MetricProducer), - object : MetricExporter { - override fun export(metrics: Collection): CompletableResultCode { - val metricsByName = metrics.associateBy { it.name } - - totalWork = metricsByName.getValue("cpu.work.total").doubleSumData.points.first().value.roundToLong() - grantedWork = metricsByName.getValue("cpu.work.granted").doubleSumData.points.first().value.roundToLong() - overcommittedWork = metricsByName.getValue("cpu.work.overcommit").doubleSumData.points.first().value.roundToLong() - return CompletableResultCode.ofSuccess() + ComputeMetricExporter( + clock, + object : ComputeMonitor { + override fun record(data: HostData) { + activeTime += data.cpuActiveTime + idleTime += data.cpuIdleTime + stealTime += data.cpuStealTime + } } - - override fun flush(): CompletableResultCode = CompletableResultCode.ofSuccess() - - override fun shutdown(): CompletableResultCode = CompletableResultCode.ofSuccess() - }, + ), exportInterval = Duration.ofSeconds(duration) ) @@ -172,9 +174,9 @@ internal class SimHostTest { reader.close() assertAll( - { assertEquals(4147200, totalWork, "Requested work does not match") }, - { assertEquals(2107200, grantedWork, "Granted work does not match") }, - { assertEquals(2040000, overcommittedWork, "Overcommitted work does not match") }, + { assertEquals(659, activeTime, "Active time does not match") }, + { assertEquals(2342, idleTime, "Idle time does not match") }, + { assertEquals(638, stealTime, "Steal time does not match") }, { assertEquals(1500001, clock.millis()) } ) } @@ -184,21 +186,26 @@ internal class SimHostTest { */ @Test fun testFailure() = runBlockingSimulation { - var totalWork = 0L - var grantedWork = 0L - var totalTime = 0L - var downTime = 0L - var guestTotalTime = 0L - var guestDownTime = 0L - + var activeTime = 0L + var idleTime = 0L + var uptime = 0L + var downtime = 0L + var guestUptime = 0L + var guestDowntime = 0L + + val hostId = UUID.randomUUID() + val hostResource = Resource.builder() + .put(HOST_ID, hostId.toString()) + .build() val meterProvider: MeterProvider = SdkMeterProvider .builder() + .setResource(hostResource) .setClock(clock.toOtelClock()) .build() val interpreter = SimResourceInterpreter(coroutineContext, clock) val host = SimHost( - uid = UUID.randomUUID(), + uid = hostId, name = "test", model = machineModel, meta = emptyMap(), @@ -230,24 +237,22 @@ internal class SimHostTest { // Setup metric reader val reader = CoroutineMetricReader( this, listOf(meterProvider as MetricProducer), - object : MetricExporter { - override fun export(metrics: Collection): CompletableResultCode { - val metricsByName = metrics.associateBy { it.name } - - totalWork = metricsByName.getValue("cpu.work.total").doubleSumData.points.first().value.roundToLong() - grantedWork = metricsByName.getValue("cpu.work.granted").doubleSumData.points.first().value.roundToLong() - totalTime = metricsByName.getValue("host.time.total").longSumData.points.first().value - downTime = metricsByName.getValue("host.time.down").longSumData.points.first().value - guestTotalTime = metricsByName.getValue("guest.time.total").longSumData.points.first().value - guestDownTime = metricsByName.getValue("guest.time.error").longSumData.points.first().value - - return CompletableResultCode.ofSuccess() - } - - override fun flush(): CompletableResultCode = CompletableResultCode.ofSuccess() + ComputeMetricExporter( + clock, + object : ComputeMonitor { + override fun record(data: HostData) { + activeTime += data.cpuActiveTime + idleTime += data.cpuIdleTime + uptime += data.uptime + downtime += data.downtime + } - override fun shutdown(): CompletableResultCode = CompletableResultCode.ofSuccess() - }, + override fun record(data: ServerData) { + guestUptime += data.uptime + guestDowntime += data.downtime + } + } + ), exportInterval = Duration.ofSeconds(duration) ) @@ -276,12 +281,12 @@ internal class SimHostTest { reader.close() assertAll( - { assertEquals(2226040, totalWork, "Total time does not match") }, - { assertEquals(1086040, grantedWork, "Down time does not match") }, - { assertEquals(1200001, totalTime, "Total time does not match") }, - { assertEquals(1200001, guestTotalTime, "Guest total time does not match") }, - { assertEquals(5000, downTime, "Down time does not match") }, - { assertEquals(5000, guestDownTime, "Guest down time does not match") }, + { assertEquals(2661, idleTime, "Idle time does not match") }, + { assertEquals(339, activeTime, "Active time does not match") }, + { assertEquals(1195001, uptime, "Uptime does not match") }, + { assertEquals(5000, downtime, "Downtime does not match") }, + { assertEquals(1195000, guestUptime, "Guest uptime does not match") }, + { assertEquals(5000, guestDowntime, "Guest downtime does not match") }, ) } -- cgit v1.2.3 From 3d68aaf17fc8836f7b2bde0d874929cb58c9a8ca Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 13 Sep 2021 14:35:29 +0200 Subject: perf(compute): Add option for optimizing SimHost simulation This change adds an option for optimizing SimHost simulation by combining all the CPUs of a machine into a single large CPU. For most workloads, this does not significantly affect the simulation results, but does improve the simulation time by a lot. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 24 ++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 793db907..ff55c585 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -68,7 +68,8 @@ public class SimHost( scalingGovernor: ScalingGovernor = PerformanceScalingGovernor(), powerDriver: PowerDriver = SimplePowerDriver(ConstantPowerModel(0.0)), private val mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), - interferenceDomain: VmInterferenceDomain? = null + interferenceDomain: VmInterferenceDomain? = null, + private val optimize: Boolean = false ) : Host, AutoCloseable { /** * The [CoroutineScope] of the host bounded by the lifecycle of the host. @@ -98,7 +99,7 @@ public class SimHost( /** * The machine to run on. */ - public val machine: SimBareMetalMachine = SimBareMetalMachine(interpreter, model, powerDriver) + public val machine: SimBareMetalMachine = SimBareMetalMachine(interpreter, model.optimize(), powerDriver) /** * The hypervisor to run multiple workloads. @@ -319,6 +320,25 @@ public class SimHost( val processingUnits = (0 until cpuCount).map { originalCpu.copy(id = it, node = processingNode) } val memoryUnits = listOf(MemoryUnit("Generic", "Generic", 3200.0, memorySize)) + return MachineModel(processingUnits, memoryUnits).optimize() + } + + /** + * Optimize the [MachineModel] for simulation. + */ + private fun MachineModel.optimize(): MachineModel { + if (!optimize) { + return this + } + + val originalCpu = cpus[0] + val freq = cpus.sumOf { it.frequency } + val processingNode = originalCpu.node.copy(coreCount = 1) + val processingUnits = listOf(originalCpu.copy(frequency = freq, node = processingNode)) + + val memorySize = memory.sumOf { it.size } + val memoryUnits = listOf(MemoryUnit("Generic", "Generic", 3200.0, memorySize)) + return MachineModel(processingUnits, memoryUnits) } -- cgit v1.2.3 From b14df2a0924774c5aed15cedeb1027abf8ee5361 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 16 Sep 2021 16:52:00 +0200 Subject: refactor(capelin): Make workload sampling model extensible This change updates the workload sampling implementation to be more flexible in the way the workload is constructed. Users can now sample multiple workloads at the same time using multiple samplers and use them as a single workload to simulate. --- .../org/opendc/compute/simulator/failure/StochasticVictimSelector.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StochasticVictimSelector.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StochasticVictimSelector.kt index 87903623..fcd9dd7e 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StochasticVictimSelector.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/failure/StochasticVictimSelector.kt @@ -24,8 +24,8 @@ package org.opendc.compute.simulator.failure import org.apache.commons.math3.distribution.RealDistribution import org.opendc.compute.simulator.SimHost +import java.util.* import kotlin.math.roundToInt -import kotlin.random.Random /** * A [VictimSelector] that stochastically selects a set of hosts to be failed. -- cgit v1.2.3 From 30cd010f1f98262aa7f264bb3c3eb6028b8495c5 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 22 Sep 2021 12:43:01 +0200 Subject: refactor(telemetry): Do not require clock for ComputeMetricExporter This change drops the requirement for a clock parameter when constructing a ComputeMetricExporter, since it will now derive the timestamp from the recorded metrics. --- .../org/opendc/compute/simulator/SimHostTest.kt | 41 +++++++++------------- 1 file changed, 17 insertions(+), 24 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 9c879e5e..e75c31a0 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -43,7 +43,6 @@ import org.opendc.simulator.compute.workload.SimTraceWorkload import org.opendc.simulator.core.runBlockingSimulation import org.opendc.simulator.resources.SimResourceInterpreter import org.opendc.telemetry.compute.ComputeMetricExporter -import org.opendc.telemetry.compute.ComputeMonitor import org.opendc.telemetry.compute.HOST_ID import org.opendc.telemetry.compute.table.HostData import org.opendc.telemetry.compute.table.ServerData @@ -138,16 +137,13 @@ internal class SimHostTest { // Setup metric reader val reader = CoroutineMetricReader( this, listOf(meterProvider as MetricProducer), - ComputeMetricExporter( - clock, - object : ComputeMonitor { - override fun record(data: HostData) { - activeTime += data.cpuActiveTime - idleTime += data.cpuIdleTime - stealTime += data.cpuStealTime - } + object : ComputeMetricExporter() { + override fun record(data: HostData) { + activeTime += data.cpuActiveTime + idleTime += data.cpuIdleTime + stealTime += data.cpuStealTime } - ), + }, exportInterval = Duration.ofSeconds(duration) ) @@ -237,22 +233,19 @@ internal class SimHostTest { // Setup metric reader val reader = CoroutineMetricReader( this, listOf(meterProvider as MetricProducer), - ComputeMetricExporter( - clock, - object : ComputeMonitor { - override fun record(data: HostData) { - activeTime += data.cpuActiveTime - idleTime += data.cpuIdleTime - uptime += data.uptime - downtime += data.downtime - } + object : ComputeMetricExporter() { + override fun record(data: HostData) { + activeTime += data.cpuActiveTime + idleTime += data.cpuIdleTime + uptime += data.uptime + downtime += data.downtime + } - override fun record(data: ServerData) { - guestUptime += data.uptime - guestDowntime += data.downtime - } + override fun record(data: ServerData) { + guestUptime += data.uptime + guestDowntime += data.downtime } - ), + }, exportInterval = Duration.ofSeconds(duration) ) -- cgit v1.2.3 From 3b928a66d992ec32ff28bfd4c11b0c7b2ac631ca Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 23 Sep 2021 14:44:46 +0200 Subject: fix(compute): Do not recover guests in non-error state --- .../main/kotlin/org/opendc/compute/simulator/SimHost.kt | 4 ++-- .../kotlin/org/opendc/compute/simulator/internal/Guest.kt | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index ff55c585..fdb3f1dc 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -262,7 +262,7 @@ public class SimHost( } override suspend fun delete(server: Server) { - val guest = guests.remove(server) ?: return + val guest = guests[server] ?: return guest.terminate() } @@ -296,7 +296,7 @@ public class SimHost( _bootTime = clock.millis() for (guest in guests.values) { - guest.start() + guest.recover() } } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt index 90562e2f..7f33154a 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -96,8 +96,8 @@ internal class Guest( } ServerState.RUNNING -> return ServerState.DELETED -> { - logger.warn { "User tried to start terminated server" } - throw IllegalArgumentException("Server is terminated") + logger.warn { "User tried to start deleted server" } + throw IllegalArgumentException("Server is deleted") } else -> assert(false) { "Invalid state transition" } } @@ -143,6 +143,17 @@ internal class Guest( doStop(ServerState.ERROR) } + /** + * Recover the guest if it is in an error state. + */ + suspend fun recover() { + if (state != ServerState.ERROR) { + return + } + + doStart() + } + /** * The [Job] representing the current active virtual machine instance or `null` if no virtual machine is active. */ -- cgit v1.2.3 From d2f15fd7fd16922c11b0dcaa8807e8a321859773 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 29 Sep 2021 22:05:03 +0200 Subject: refactor(simulator): Merge distributor and aggregator into switch This change removes the distributor and aggregator interfaces in favour of a single switch interface. Since the switch interface is as powerful as both the distributor and aggregator, we don't need the latter two. --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index fdb3f1dc..a8b8afe9 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -47,7 +47,6 @@ import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.power.ConstantPowerModel import org.opendc.simulator.compute.power.PowerDriver import org.opendc.simulator.compute.power.SimplePowerDriver -import org.opendc.simulator.resources.SimResourceDistributorMaxMin import org.opendc.simulator.resources.SimResourceInterpreter import java.util.* import kotlin.coroutines.CoroutineContext @@ -418,7 +417,7 @@ public class SimHost( val counters = hypervisor.counters val grantedWork = counters.actual val overcommittedWork = counters.overcommit - val interferedWork = (counters as? SimResourceDistributorMaxMin.Counters)?.interference ?: 0.0 + val interferedWork = counters.interference _totalTime += (duration / 1000.0) * coreCount val activeTime = (grantedWork * d).roundToLong() -- cgit v1.2.3 From 4cc1d40d421c8736f8b21b360b61d6b065158b7a Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 29 Sep 2021 23:56:16 +0200 Subject: refactor(simulator): Migrate to flow-based simulation This change renames the `opendc-simulator-resources` module into the `opendc-simulator-flow` module to indicate that the core simulation model of OpenDC is based around modelling and simulating flows. Previously, the distinction between resource consumer and provider, and input and output caused some confusion. By switching to a flow-based model, this distinction is now clear (as in, the water flows from source to consumer/sink). --- .../src/main/kotlin/org/opendc/compute/simulator/SimHost.kt | 10 +++++----- .../main/kotlin/org/opendc/compute/simulator/internal/Guest.kt | 2 +- .../opendc/compute/simulator/internal/HostFaultInjectorImpl.kt | 2 +- .../test/kotlin/org/opendc/compute/simulator/SimHostTest.kt | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index a8b8afe9..f499927d 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -47,7 +47,7 @@ import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.power.ConstantPowerModel import org.opendc.simulator.compute.power.PowerDriver import org.opendc.simulator.compute.power.SimplePowerDriver -import org.opendc.simulator.resources.SimResourceInterpreter +import org.opendc.simulator.flow.FlowEngine import java.util.* import kotlin.coroutines.CoroutineContext import kotlin.math.roundToLong @@ -61,7 +61,7 @@ public class SimHost( model: MachineModel, override val meta: Map, context: CoroutineContext, - interpreter: SimResourceInterpreter, + engine: FlowEngine, meterProvider: MeterProvider, hypervisor: SimHypervisorProvider, scalingGovernor: ScalingGovernor = PerformanceScalingGovernor(), @@ -78,7 +78,7 @@ public class SimHost( /** * The clock instance used by the host. */ - private val clock = interpreter.clock + private val clock = engine.clock /** * The logger instance of this server. @@ -98,13 +98,13 @@ public class SimHost( /** * The machine to run on. */ - public val machine: SimBareMetalMachine = SimBareMetalMachine(interpreter, model.optimize(), powerDriver) + public val machine: SimBareMetalMachine = SimBareMetalMachine(engine, model.optimize(), powerDriver) /** * The hypervisor to run multiple workloads. */ private val hypervisor: SimHypervisor = hypervisor.create( - interpreter, + engine, scalingGovernor = scalingGovernor, interferenceDomain = interferenceDomain, listener = object : SimHypervisor.Listener { diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt index 7f33154a..eda76ba0 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -198,7 +198,7 @@ internal class Guest( } /** - * Run the process that models the virtual machine lifecycle as a coroutine. + * Converge the process that models the virtual machine lifecycle as a coroutine. */ private suspend fun runMachine(workload: SimWorkload) { delay(1) // TODO Introduce model for boot time diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/HostFaultInjectorImpl.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/HostFaultInjectorImpl.kt index 6919b7fd..7d46e626 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/HostFaultInjectorImpl.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/HostFaultInjectorImpl.kt @@ -75,7 +75,7 @@ internal class HostFaultInjectorImpl( } /** - * Run the injection process. + * Converge the injection process. */ private suspend fun runInjector() { while (true) { diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index e75c31a0..d2293be7 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -41,7 +41,7 @@ import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit import org.opendc.simulator.compute.workload.SimTraceWorkload import org.opendc.simulator.core.runBlockingSimulation -import org.opendc.simulator.resources.SimResourceInterpreter +import org.opendc.simulator.flow.FlowEngine import org.opendc.telemetry.compute.ComputeMetricExporter import org.opendc.telemetry.compute.HOST_ID import org.opendc.telemetry.compute.table.HostData @@ -87,14 +87,14 @@ internal class SimHostTest { .setClock(clock.toOtelClock()) .build() - val interpreter = SimResourceInterpreter(coroutineContext, clock) + val engine = FlowEngine(coroutineContext, clock) val virtDriver = SimHost( uid = hostId, name = "test", model = machineModel, meta = emptyMap(), coroutineContext, - interpreter, + engine, meterProvider, SimFairShareHypervisorProvider() ) @@ -199,14 +199,14 @@ internal class SimHostTest { .setClock(clock.toOtelClock()) .build() - val interpreter = SimResourceInterpreter(coroutineContext, clock) + val engine = FlowEngine(coroutineContext, clock) val host = SimHost( uid = hostId, name = "test", model = machineModel, meta = emptyMap(), coroutineContext, - interpreter, + engine, meterProvider, SimFairShareHypervisorProvider() ) -- cgit v1.2.3 From 081221684fb826ab5a00c1d8cc5a9886b9e2203c Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 1 Oct 2021 22:04:35 +0200 Subject: feat(simulator): Expose CPU time counters directly on hypervisor This change adds a new interface to the SimHypervisor interface that exposes the CPU time counters directly. These are derived from the flow counters and will be used by SimHost to expose them via telemetry. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 196 ++++++++------------- .../org/opendc/compute/simulator/internal/Guest.kt | 51 ++---- .../org/opendc/compute/simulator/SimHostTest.kt | 20 +-- 3 files changed, 105 insertions(+), 162 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index f499927d..4b96872b 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -50,7 +50,6 @@ import org.opendc.simulator.compute.power.SimplePowerDriver import org.opendc.simulator.flow.FlowEngine import java.util.* import kotlin.coroutines.CoroutineContext -import kotlin.math.roundToLong /** * A [Host] that is simulates virtual machines on a physical machine using [SimHypervisor]. @@ -63,7 +62,7 @@ public class SimHost( context: CoroutineContext, engine: FlowEngine, meterProvider: MeterProvider, - hypervisor: SimHypervisorProvider, + hypervisorProvider: SimHypervisorProvider, scalingGovernor: ScalingGovernor = PerformanceScalingGovernor(), powerDriver: PowerDriver = SimplePowerDriver(ConstantPowerModel(0.0)), private val mapper: SimWorkloadMapper = SimMetaWorkloadMapper(), @@ -103,29 +102,8 @@ public class SimHost( /** * The hypervisor to run multiple workloads. */ - private val hypervisor: SimHypervisor = hypervisor.create( - engine, - scalingGovernor = scalingGovernor, - interferenceDomain = interferenceDomain, - listener = object : SimHypervisor.Listener { - override fun onSliceFinish( - hypervisor: SimHypervisor, - totalWork: Double, - grantedWork: Double, - overcommittedWork: Double, - interferedWork: Double, - cpuUsage: Double, - cpuDemand: Double - ) { - _cpuDemand = cpuDemand - _cpuUsage = cpuUsage - - collectTime() - } - } - ) - private var _cpuUsage = 0.0 - private var _cpuDemand = 0.0 + private val hypervisor: SimHypervisor = hypervisorProvider + .create(engine, scalingGovernor = scalingGovernor, interferenceDomain = interferenceDomain) /** * The virtual machines running on the hypervisor. @@ -157,22 +135,13 @@ public class SimHost( } } + /** + * The [Job] that represents the machine running the hypervisor. + */ + private var _job: Job? = null + init { - // Launch hypervisor onto machine - scope.launch { - try { - _bootTime = clock.millis() - _state = HostState.UP - machine.run(this@SimHost.hypervisor, emptyMap()) - } catch (_: CancellationException) { - // Ignored - } catch (cause: Throwable) { - logger.error(cause) { "Host failed" } - throw cause - } finally { - _state = HostState.DOWN - } - } + launch() meter.upDownCounterBuilder("system.guests") .setDescription("Number of guests on this host") @@ -184,15 +153,15 @@ public class SimHost( meter.gaugeBuilder("system.cpu.demand") .setDescription("Amount of CPU resources the guests would use if there were no CPU contention or CPU limits") .setUnit("MHz") - .buildWithCallback { result -> result.observe(_cpuDemand) } + .buildWithCallback { result -> result.observe(hypervisor.cpuDemand) } meter.gaugeBuilder("system.cpu.usage") .setDescription("Amount of CPU resources used by the host") .setUnit("MHz") - .buildWithCallback { result -> result.observe(_cpuUsage) } + .buildWithCallback { result -> result.observe(hypervisor.cpuUsage) } meter.gaugeBuilder("system.cpu.utilization") .setDescription("Utilization of the CPU resources of the host") .setUnit("%") - .buildWithCallback { result -> result.observe(_cpuUsage / _cpuLimit) } + .buildWithCallback { result -> result.observe(hypervisor.cpuUsage / _cpuLimit) } meter.counterBuilder("system.cpu.time") .setDescription("Amount of CPU time spent by the host") .setUnit("s") @@ -200,16 +169,16 @@ public class SimHost( meter.gaugeBuilder("system.power.usage") .setDescription("Power usage of the host ") .setUnit("W") - .buildWithCallback { result -> result.observe(machine.powerDraw) } + .buildWithCallback { result -> result.observe(machine.powerUsage) } meter.counterBuilder("system.power.total") .setDescription("Amount of energy used by the CPU") .setUnit("J") .ofDoubles() - .buildWithCallback(::collectPowerTotal) + .buildWithCallback { result -> result.observe(machine.energyUsage) } meter.counterBuilder("system.time") .setDescription("The uptime of the host") .setUnit("s") - .buildWithCallback(::collectTime) + .buildWithCallback(::collectUptime) meter.gaugeBuilder("system.time.boot") .setDescription("The boot time of the host") .setUnit("1") @@ -290,24 +259,55 @@ public class SimHost( } public suspend fun recover() { - collectTime() - _state = HostState.UP - _bootTime = clock.millis() + updateUptime() + + launch() + + // Wait for the hypervisor to launch before recovering the guests + yield() for (guest in guests.values) { guest.recover() } } + /** + * Launch the hypervisor. + */ + private fun launch() { + check(_job == null) { "Concurrent hypervisor running" } + + // Launch hypervisor onto machine + _job = scope.launch { + try { + _bootTime = clock.millis() + _state = HostState.UP + machine.run(hypervisor, emptyMap()) + } catch (_: CancellationException) { + // Ignored + } catch (cause: Throwable) { + logger.error(cause) { "Host failed" } + throw cause + } finally { + _state = HostState.DOWN + } + } + } + /** * Reset the machine. */ private fun reset() { - collectTime() + updateUptime() + + // Stop the hypervisor + val job = _job + if (job != null) { + job.cancel() + _job = null + } _state = HostState.DOWN - _cpuUsage = 0.0 - _cpuDemand = 0.0 } /** @@ -385,85 +385,46 @@ public class SimHost( } } - private var _lastCpuTimeCallback = clock.millis() - - /** - * Helper function to track the CPU time of a machine. - */ - private fun collectCpuTime(result: ObservableLongMeasurement) { - val now = clock.millis() - val duration = now - _lastCpuTimeCallback - - try { - collectCpuTime(duration, result) - } finally { - _lastCpuTimeCallback = now - } - } - private val _activeState = Attributes.of(STATE_KEY, "active") private val _stealState = Attributes.of(STATE_KEY, "steal") private val _lostState = Attributes.of(STATE_KEY, "lost") private val _idleState = Attributes.of(STATE_KEY, "idle") - private var _totalTime = 0.0 /** * Helper function to track the CPU time of a machine. */ - private fun collectCpuTime(duration: Long, result: ObservableLongMeasurement) { - val coreCount = this.model.cpuCount - val d = coreCount / _cpuLimit - + private fun collectCpuTime(result: ObservableLongMeasurement) { val counters = hypervisor.counters - val grantedWork = counters.actual - val overcommittedWork = counters.overcommit - val interferedWork = counters.interference - _totalTime += (duration / 1000.0) * coreCount - val activeTime = (grantedWork * d).roundToLong() - val idleTime = (_totalTime - grantedWork * d).roundToLong() - val stealTime = (overcommittedWork * d).roundToLong() - val lostTime = (interferedWork * d).roundToLong() - - result.observe(activeTime, _activeState) - result.observe(idleTime, _idleState) - result.observe(stealTime, _stealState) - result.observe(lostTime, _lostState) + result.observe(counters.cpuActiveTime / 1000L, _activeState) + result.observe(counters.cpuIdleTime / 1000L, _idleState) + result.observe(counters.cpuStealTime / 1000L, _stealState) + result.observe(counters.cpuLostTime / 1000L, _lostState) for (guest in guests.values) { - guest.collectCpuTime(duration, result) + guest.collectCpuTime(result) } } - private var _lastPowerCallback = clock.millis() - private var _totalPower = 0.0 - - /** - * Helper function to collect the total power usage of the machine. - */ - private fun collectPowerTotal(result: ObservableDoubleMeasurement) { - val now = clock.millis() - val duration = now - _lastPowerCallback - - _totalPower += duration / 1000.0 * machine.powerDraw - result.observe(_totalPower) - - _lastPowerCallback = now - } - private var _lastReport = clock.millis() /** * Helper function to track the uptime of a machine. */ - private fun collectTime(result: ObservableLongMeasurement? = null) { + private fun updateUptime() { val now = clock.millis() val duration = now - _lastReport + _lastReport = now - try { - collectTime(duration, result) - } finally { - _lastReport = now + if (_state == HostState.UP) { + _uptime += duration + } else if (_state == HostState.DOWN && scope.isActive) { + // Only increment downtime if the machine is in a failure state + _downtime += duration + } + + for (guest in guests.values) { + guest.updateUptime(duration) } } @@ -475,19 +436,14 @@ public class SimHost( /** * Helper function to track the uptime of a machine. */ - private fun collectTime(duration: Long, result: ObservableLongMeasurement? = null) { - if (state == HostState.UP) { - _uptime += duration - } else if (state == HostState.DOWN && scope.isActive) { - // Only increment downtime if the machine is in a failure state - _downtime += duration - } + private fun collectUptime(result: ObservableLongMeasurement) { + updateUptime() - result?.observe(_uptime, _upState) - result?.observe(_downtime, _downState) + result.observe(_uptime, _upState) + result.observe(_downtime, _downState) for (guest in guests.values) { - guest.collectUptime(duration, result) + guest.collectUptime(result) } } @@ -496,9 +452,9 @@ public class SimHost( /** * Helper function to track the boot time of a machine. */ - private fun collectBootTime(result: ObservableLongMeasurement? = null) { + private fun collectBootTime(result: ObservableLongMeasurement) { if (_bootTime != Long.MIN_VALUE) { - result?.observe(_bootTime) + result.observe(_bootTime) } for (guest in guests.values) { diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt index eda76ba0..3ac165c8 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -33,12 +33,10 @@ import org.opendc.compute.api.Server import org.opendc.compute.api.ServerState import org.opendc.compute.simulator.SimHost import org.opendc.compute.simulator.SimWorkloadMapper -import org.opendc.simulator.compute.SimAbstractMachine -import org.opendc.simulator.compute.SimMachine +import org.opendc.simulator.compute.kernel.SimVirtualMachine import org.opendc.simulator.compute.workload.SimWorkload import java.time.Clock import kotlin.coroutines.CoroutineContext -import kotlin.math.roundToLong /** * A virtual machine instance that is managed by a [SimHost]. @@ -50,7 +48,7 @@ internal class Guest( private val mapper: SimWorkloadMapper, private val listener: GuestListener, val server: Server, - val machine: SimMachine + val machine: SimVirtualMachine ) { /** * The [CoroutineScope] of the guest. @@ -236,17 +234,22 @@ internal class Guest( .build() /** - * Helper function to track the uptime of the guest. + * Helper function to track the uptime and downtime of the guest. */ - fun collectUptime(duration: Long, result: ObservableLongMeasurement? = null) { + fun updateUptime(duration: Long) { if (state == ServerState.RUNNING) { _uptime += duration } else if (state == ServerState.ERROR) { _downtime += duration } + } - result?.observe(_uptime, _upState) - result?.observe(_downtime, _downState) + /** + * Helper function to track the uptime of the guest. + */ + fun collectUptime(result: ObservableLongMeasurement) { + result.observe(_uptime, _upState) + result.observe(_downtime, _downState) } private var _bootTime = Long.MIN_VALUE @@ -254,9 +257,9 @@ internal class Guest( /** * Helper function to track the boot time of the guest. */ - fun collectBootTime(result: ObservableLongMeasurement? = null) { + fun collectBootTime(result: ObservableLongMeasurement) { if (_bootTime != Long.MIN_VALUE) { - result?.observe(_bootTime) + result.observe(_bootTime) } } @@ -276,33 +279,17 @@ internal class Guest( .putAll(attributes) .put(STATE_KEY, "idle") .build() - private var _totalTime = 0.0 /** * Helper function to track the CPU time of a machine. */ - fun collectCpuTime(duration: Long, result: ObservableLongMeasurement) { - val coreCount = server.flavor.cpuCount - val d = coreCount / _cpuLimit - - var grantedWork = 0.0 - var overcommittedWork = 0.0 - - for (cpu in (machine as SimAbstractMachine).cpus) { - val counters = cpu.counters - grantedWork += counters.actual - overcommittedWork += counters.overcommit - } - - _totalTime += (duration / 1000.0) * coreCount - val activeTime = (grantedWork * d).roundToLong() - val idleTime = (_totalTime - grantedWork * d).roundToLong() - val stealTime = (overcommittedWork * d).roundToLong() + fun collectCpuTime(result: ObservableLongMeasurement) { + val counters = machine.counters - result.observe(activeTime, _activeState) - result.observe(idleTime, _idleState) - result.observe(stealTime, _stealState) - result.observe(0, _lostState) + result.observe(counters.cpuActiveTime / 1000, _activeState) + result.observe(counters.cpuIdleTime / 1000, _idleState) + result.observe(counters.cpuStealTime / 1000, _stealState) + result.observe(counters.cpuLostTime / 1000, _lostState) } private val _cpuLimit = machine.model.cpus.sumOf { it.frequency } diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index d2293be7..26089b6d 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -170,9 +170,9 @@ internal class SimHostTest { reader.close() assertAll( - { assertEquals(659, activeTime, "Active time does not match") }, - { assertEquals(2342, idleTime, "Idle time does not match") }, - { assertEquals(638, stealTime, "Steal time does not match") }, + { assertEquals(658, activeTime, "Active time does not match") }, + { assertEquals(1741, idleTime, "Idle time does not match") }, + { assertEquals(637, stealTime, "Steal time does not match") }, { assertEquals(1500001, clock.millis()) } ) } @@ -253,7 +253,7 @@ internal class SimHostTest { host.spawn(server) delay(5000L) host.fail() - delay(5000L) + delay(duration * 1000) host.recover() suspendCancellableCoroutine { cont -> @@ -274,12 +274,12 @@ internal class SimHostTest { reader.close() assertAll( - { assertEquals(2661, idleTime, "Idle time does not match") }, - { assertEquals(339, activeTime, "Active time does not match") }, - { assertEquals(1195001, uptime, "Uptime does not match") }, - { assertEquals(5000, downtime, "Downtime does not match") }, - { assertEquals(1195000, guestUptime, "Guest uptime does not match") }, - { assertEquals(5000, guestDowntime, "Guest downtime does not match") }, + { assertEquals(1175, idleTime, "Idle time does not match") }, + { assertEquals(624, activeTime, "Active time does not match") }, + { assertEquals(900001, uptime, "Uptime does not match") }, + { assertEquals(300000, downtime, "Downtime does not match") }, + { assertEquals(900000, guestUptime, "Guest uptime does not match") }, + { assertEquals(300000, guestDowntime, "Guest downtime does not match") }, ) } -- cgit v1.2.3 From 012fe8fa9be1676b8eef0cce795738a00c4260c0 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sat, 2 Oct 2021 13:08:06 +0200 Subject: perf(compute): Optimize telemetry collection This change optimizes the telemetry collection in the SimHost class. Previously, there was significant overhead in collecting the metrics of this and associated classes due large `Attributes` object that did not cache accesses to `hashCode()`. We now wrap this object and manually cache the hash code. --- .../kotlin/org/opendc/compute/simulator/SimHost.kt | 45 ++++++---- .../org/opendc/compute/simulator/internal/Guest.kt | 97 ++++++++++++++++------ 2 files changed, 102 insertions(+), 40 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt index 4b96872b..b9d02185 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt @@ -109,6 +109,7 @@ public class SimHost( * The virtual machines running on the hypervisor. */ private val guests = HashMap() + private val _guests = mutableListOf() override val state: HostState get() = _state @@ -199,7 +200,7 @@ public class SimHost( require(canFit(key)) { "Server does not fit" } val machine = hypervisor.createMachine(key.flavor.toMachineModel(), key.name) - Guest( + val newGuest = Guest( scope.coroutineContext, clock, this, @@ -208,6 +209,9 @@ public class SimHost( server, machine ) + + _guests.add(newGuest) + newGuest } if (start) { @@ -231,7 +235,7 @@ public class SimHost( override suspend fun delete(server: Server) { val guest = guests[server] ?: return - guest.terminate() + guest.delete() } override fun addListener(listener: HostListener) { @@ -253,7 +257,7 @@ public class SimHost( public suspend fun fail() { reset() - for (guest in guests.values) { + for (guest in _guests) { guest.fail() } } @@ -266,7 +270,7 @@ public class SimHost( // Wait for the hypervisor to launch before recovering the guests yield() - for (guest in guests.values) { + for (guest in _guests) { guest.recover() } } @@ -357,11 +361,17 @@ public class SimHost( var error = 0L var invalid = 0L - for ((_, guest) in guests) { + val guests = _guests.listIterator() + for (guest in guests) { when (guest.state) { ServerState.TERMINATED -> terminated++ ServerState.RUNNING -> running++ ServerState.ERROR -> error++ + ServerState.DELETED -> { + // Remove guests that have been deleted + this.guests.remove(guest.server) + guests.remove() + } else -> invalid++ } } @@ -380,8 +390,9 @@ public class SimHost( private fun collectCpuLimit(result: ObservableDoubleMeasurement) { result.observe(_cpuLimit) - for (guest in guests.values) { - guest.collectCpuLimit(result) + val guests = _guests + for (i in guests.indices) { + guests[i].collectCpuLimit(result) } } @@ -401,8 +412,9 @@ public class SimHost( result.observe(counters.cpuStealTime / 1000L, _stealState) result.observe(counters.cpuLostTime / 1000L, _lostState) - for (guest in guests.values) { - guest.collectCpuTime(result) + val guests = _guests + for (i in guests.indices) { + guests[i].collectCpuTime(result) } } @@ -423,8 +435,9 @@ public class SimHost( _downtime += duration } - for (guest in guests.values) { - guest.updateUptime(duration) + val guests = _guests + for (i in guests.indices) { + guests[i].updateUptime(duration) } } @@ -442,8 +455,9 @@ public class SimHost( result.observe(_uptime, _upState) result.observe(_downtime, _downState) - for (guest in guests.values) { - guest.collectUptime(result) + val guests = _guests + for (i in guests.indices) { + guests[i].collectUptime(result) } } @@ -457,8 +471,9 @@ public class SimHost( result.observe(_bootTime) } - for (guest in guests.values) { - guest.collectBootTime(result) + val guests = _guests + for (i in guests.indices) { + guests[i].collectBootTime(result) } } } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt index 3ac165c8..5ea1860d 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -24,6 +24,7 @@ package org.opendc.compute.simulator.internal import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.api.common.Attributes +import io.opentelemetry.api.common.AttributesBuilder import io.opentelemetry.api.metrics.ObservableDoubleMeasurement import io.opentelemetry.api.metrics.ObservableLongMeasurement import io.opentelemetry.semconv.resource.attributes.ResourceAttributes @@ -71,17 +72,7 @@ internal class Guest( /** * The attributes of the guest. */ - val attributes: Attributes = Attributes.builder() - .put(ResourceAttributes.HOST_NAME, server.name) - .put(ResourceAttributes.HOST_ID, server.uid.toString()) - .put(ResourceAttributes.HOST_TYPE, server.flavor.name) - .put(AttributeKey.longKey("host.num_cpus"), server.flavor.cpuCount.toLong()) - .put(AttributeKey.longKey("host.mem_capacity"), server.flavor.memorySize) - .put(AttributeKey.stringArrayKey("host.labels"), server.labels.map { (k, v) -> "$k:$v" }) - .put(ResourceAttributes.HOST_ARCH, ResourceAttributes.HostArchValues.AMD64) - .put(ResourceAttributes.HOST_IMAGE_NAME, server.image.name) - .put(ResourceAttributes.HOST_IMAGE_ID, server.image.uid.toString()) - .build() + val attributes: Attributes = GuestAttributes(this) /** * Start the guest. @@ -114,12 +105,12 @@ internal class Guest( } /** - * Terminate the guest. + * Delete the guest. * * This operation will stop the guest if it is running on the host and remove all resources associated with the * guest. */ - suspend fun terminate() { + suspend fun delete() { stop() state = ServerState.DELETED @@ -224,12 +215,10 @@ internal class Guest( private var _uptime = 0L private var _downtime = 0L - private val _upState = Attributes.builder() - .putAll(attributes) + private val _upState = attributes.toBuilder() .put(STATE_KEY, "up") .build() - private val _downState = Attributes.builder() - .putAll(attributes) + private val _downState = attributes.toBuilder() .put(STATE_KEY, "down") .build() @@ -263,20 +252,16 @@ internal class Guest( } } - private val _activeState = Attributes.builder() - .putAll(attributes) + private val _activeState = attributes.toBuilder() .put(STATE_KEY, "active") .build() - private val _stealState = Attributes.builder() - .putAll(attributes) + private val _stealState = attributes.toBuilder() .put(STATE_KEY, "steal") .build() - private val _lostState = Attributes.builder() - .putAll(attributes) + private val _lostState = attributes.toBuilder() .put(STATE_KEY, "lost") .build() - private val _idleState = Attributes.builder() - .putAll(attributes) + private val _idleState = attributes.toBuilder() .put(STATE_KEY, "idle") .build() @@ -300,4 +285,66 @@ internal class Guest( fun collectCpuLimit(result: ObservableDoubleMeasurement) { result.observe(_cpuLimit, attributes) } + + /** + * An optimized [Attributes] implementation. + */ + private class GuestAttributes(private val uid: String, private val attributes: Attributes) : Attributes by attributes { + /** + * Construct a [GuestAttributes] instance from a [Guest]. + */ + constructor(guest: Guest) : this( + guest.server.uid.toString(), + Attributes.builder() + .put(ResourceAttributes.HOST_NAME, guest.server.name) + .put(ResourceAttributes.HOST_ID, guest.server.uid.toString()) + .put(ResourceAttributes.HOST_TYPE, guest.server.flavor.name) + .put(AttributeKey.longKey("host.num_cpus"), guest.server.flavor.cpuCount.toLong()) + .put(AttributeKey.longKey("host.mem_capacity"), guest.server.flavor.memorySize) + .put(AttributeKey.stringArrayKey("host.labels"), guest.server.labels.map { (k, v) -> "$k:$v" }) + .put(ResourceAttributes.HOST_ARCH, ResourceAttributes.HostArchValues.AMD64) + .put(ResourceAttributes.HOST_IMAGE_NAME, guest.server.image.name) + .put(ResourceAttributes.HOST_IMAGE_ID, guest.server.image.uid.toString()) + .build() + ) + + override fun get(key: AttributeKey): T? { + // Optimize access to the HOST_ID key which is accessed quite often + if (key == ResourceAttributes.HOST_ID) { + @Suppress("UNCHECKED_CAST") + return uid as T? + } + return attributes.get(key) + } + + override fun toBuilder(): AttributesBuilder { + val delegate = attributes.toBuilder() + return object : AttributesBuilder { + + override fun putAll(attributes: Attributes): AttributesBuilder { + delegate.putAll(attributes) + return this + } + + override fun put(key: AttributeKey, value: Int): AttributesBuilder { + delegate.put(key, value) + return this + } + + override fun put(key: AttributeKey, value: T): AttributesBuilder { + delegate.put(key, value) + return this + } + + override fun build(): Attributes = GuestAttributes(uid, delegate.build()) + } + } + + override fun equals(other: Any?): Boolean = attributes == other + + // Cache hash code + private val _hash = attributes.hashCode() + + override fun hashCode(): Int = _hash + } } -- cgit v1.2.3 From 94fa3d33d4ef77aca5e70cc7f91ae9dca71d25e7 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 6 Oct 2021 13:12:18 +0200 Subject: perf(simulator): Optimize SimTraceWorkload This change improves the performance of the SimTraceWorkload class by changing the way trace fragments are read and processed by the CPU consumers. --- .../org/opendc/compute/simulator/SimHostTest.kt | 32 ++++++++++++---------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'opendc-compute/opendc-compute-simulator/src') diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt index 26089b6d..a0ff9228 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt @@ -39,6 +39,8 @@ import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit +import org.opendc.simulator.compute.workload.SimTrace +import org.opendc.simulator.compute.workload.SimTraceFragment import org.opendc.simulator.compute.workload.SimTraceWorkload import org.opendc.simulator.core.runBlockingSimulation import org.opendc.simulator.flow.FlowEngine @@ -105,11 +107,11 @@ internal class SimHostTest { emptyMap(), mapOf( "workload" to SimTraceWorkload( - sequenceOf( - SimTraceWorkload.Fragment(0, duration * 1000, 2 * 28.0, 2), - SimTraceWorkload.Fragment(duration * 1000, duration * 1000, 2 * 3500.0, 2), - SimTraceWorkload.Fragment(duration * 2000, duration * 1000, 0.0, 2), - SimTraceWorkload.Fragment(duration * 3000, duration * 1000, 2 * 183.0, 2) + SimTrace.ofFragments( + SimTraceFragment(0, duration * 1000, 2 * 28.0, 2), + SimTraceFragment(duration * 1000, duration * 1000, 2 * 3500.0, 2), + SimTraceFragment(duration * 2000, duration * 1000, 0.0, 2), + SimTraceFragment(duration * 3000, duration * 1000, 2 * 183.0, 2) ), offset = 1 ) @@ -121,11 +123,11 @@ internal class SimHostTest { emptyMap(), mapOf( "workload" to SimTraceWorkload( - sequenceOf( - SimTraceWorkload.Fragment(0, duration * 1000, 2 * 28.0, 2), - SimTraceWorkload.Fragment(duration * 1000, duration * 1000, 2 * 3100.0, 2), - SimTraceWorkload.Fragment(duration * 2000, duration * 1000, 0.0, 2), - SimTraceWorkload.Fragment(duration * 3000, duration * 1000, 2 * 73.0, 2) + SimTrace.ofFragments( + SimTraceFragment(0, duration * 1000, 2 * 28.0, 2), + SimTraceFragment(duration * 1000, duration * 1000, 2 * 3100.0, 2), + SimTraceFragment(duration * 2000, duration * 1000, 0.0, 2), + SimTraceFragment(duration * 3000, duration * 1000, 2 * 73.0, 2) ), offset = 1 ) @@ -217,11 +219,11 @@ internal class SimHostTest { emptyMap(), mapOf( "workload" to SimTraceWorkload( - sequenceOf( - SimTraceWorkload.Fragment(0, duration * 1000, 2 * 28.0, 2), - SimTraceWorkload.Fragment(duration * 1000L, duration * 1000, 2 * 3500.0, 2), - SimTraceWorkload.Fragment(duration * 2000L, duration * 1000, 0.0, 2), - SimTraceWorkload.Fragment(duration * 3000L, duration * 1000, 2 * 183.0, 2) + SimTrace.ofFragments( + SimTraceFragment(0, duration * 1000, 2 * 28.0, 2), + SimTraceFragment(duration * 1000L, duration * 1000, 2 * 3500.0, 2), + SimTraceFragment(duration * 2000L, duration * 1000, 0.0, 2), + SimTraceFragment(duration * 3000L, duration * 1000, 2 * 183.0, 2) ), offset = 1 ) -- cgit v1.2.3