diff options
| author | Georgios Andreadis <g.andreadis@student.tudelft.nl> | 2020-04-10 23:21:21 +0200 |
|---|---|---|
| committer | Georgios Andreadis <g.andreadis@student.tudelft.nl> | 2020-04-10 23:21:21 +0200 |
| commit | 5310dd64606f398bddcaef87f26eee94b663b39d (patch) | |
| tree | 8cb4c2a4643962d43b83bb8804f84611d5196d18 | |
| parent | ebcda5df6a858256d2022f091d10da4083b00258 (diff) | |
| parent | ead9f9680792878f51be58d931c6337edeefae4b (diff) | |
Merge branch 'bug/virt-driver-interference' into '2.x'
Fix incorrect reporting of overcommission
See merge request opendc/opendc-simulator!53
13 files changed, 93 insertions, 43 deletions
diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt index 92c0ab0c..5b0035e3 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt @@ -1,3 +1,3 @@ package com.atlarge.opendc.compute.core.image -data class FlopsHistoryFragment(val tick: Long, val flops: Long, val duration: Long, val usage: Double) +data class FlopsHistoryFragment(val tick: Long, val flops: Long, val duration: Long, val usage: Double, val cores: Int) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt index 79021d6b..9ad88c17 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt @@ -14,7 +14,7 @@ class VmImage( public override val name: String, public override val tags: TagContainer, public val flopsHistory: List<FlopsHistoryFragment>, - public val cores: Int, + public val maxCores: Int, public val requiredMemory: Long ) : Image { @@ -25,7 +25,7 @@ class VmImage( if (fragment.flops == 0L) { delay(fragment.duration) } else { - val cores = min(this.cores, ctx.server.flavor.cpuCount) + val cores = min(fragment.cores, ctx.server.flavor.cpuCount) val burst = LongArray(cores) { fragment.flops / cores } val usage = DoubleArray(cores) { fragment.usage } @@ -34,5 +34,5 @@ class VmImage( } } - override fun toString(): String = "VmImage(uid=$uid, name=$name, cores=$cores, requiredMemory=$requiredMemory)" + override fun toString(): String = "VmImage(uid=$uid, name=$name, cores=$maxCores, requiredMemory=$requiredMemory)" } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index e3cb6e35..8e15584a 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -282,7 +282,9 @@ public class SimpleBareMetalDriver( } if (!unavailable) { - usageState.value = totalUsage / cpus.size + delay.invokeOnTimeout(1, Runnable { + usageState.value = totalUsage / cpus.size + }) } val action = Runnable { diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt index 5c19b00d..9ceb8bfc 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt @@ -54,13 +54,20 @@ public sealed class HypervisorEvent { * * @property driver The driver that emitted the event. * @property requestedBurst The total requested CPU time (can be above capacity). - * @property grantedBurst The actual total granted capacity. + * @property grantedBurst The actual total granted capacity, which might be lower than the requested burst due to + * the hypervisor being interrupted during a slice. + * @property overcommissionedBurst The CPU time that the hypervisor could not grant to the virtual machine since + * it did not have the capacity. + * @property interferedBurst The sum of CPU time that virtual machines could not utilize due to performance + * interference. * @property numberOfDeployedImages The number of images deployed on this hypervisor. */ public data class SliceFinished( override val driver: VirtDriver, public val requestedBurst: Long, public val grantedBurst: Long, + public val overcommissionedBurst: Long, + public val interferedBurst: Long, public val numberOfDeployedImages: Int, public val hostServer: Server ) : HypervisorEvent() diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt index 4939a624..dcfa1174 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt @@ -213,9 +213,12 @@ class SimpleVirtDriver( val start = clock.millis() + val vmCount = vms.size var duration: Double = Double.POSITIVE_INFINITY var deadline: Long = Long.MAX_VALUE var availableUsage = maxUsage + var totalRequestedUsage = 0.0 + var totalRequestedBurst = 0L // Divide the available host capacity fairly across the vCPUs using max-min fair sharing for ((i, req) in requests.withIndex()) { @@ -223,6 +226,9 @@ class SimpleVirtDriver( val availableShare = availableUsage / remaining val grantedUsage = min(req.limit, availableShare) + totalRequestedUsage += req.limit + totalRequestedBurst += req.burst + req.allocatedUsage = grantedUsage availableUsage -= grantedUsage @@ -231,21 +237,23 @@ class SimpleVirtDriver( deadline = min(deadline, req.vm.deadline) } - val totalUsage = maxUsage - availableUsage - var totalBurst = 0L - availableUsage = totalUsage - val serverLoad = totalUsage / maxUsage + duration = ceil(duration) + + val totalAllocatedUsage = maxUsage - availableUsage + var totalAllocatedBurst = 0L + availableUsage = totalAllocatedUsage + val serverLoad = totalAllocatedUsage / maxUsage // Divide the requests over the available capacity of the pCPUs fairly for (i in pCPUs) { val remaining = hostContext.cpus.size - i val availableShare = availableUsage / remaining val grantedUsage = min(hostContext.cpus[i].frequency, availableShare) - val pBurst = (duration * grantedUsage).toLong() + val pBurst = ceil(duration * grantedUsage).toLong() usage[i] = grantedUsage burst[i] = pBurst - totalBurst += pBurst + totalAllocatedBurst += pBurst availableUsage -= grantedUsage } @@ -263,7 +271,14 @@ class SimpleVirtDriver( continue } + // The total burst that the VMs wanted to run in the time-frame that we ran. val totalRemainder = burst.sum() + val totalGrantedBurst = totalAllocatedBurst - totalRemainder + + // The burst that was lost due to overcommissioning of CPU resources + var totalOvercommissionedBurst = 0L + // The burst that was lost due to interference. + var totalInterferedBurst = 0L val entryIterator = vms.entries.iterator() while (entryIterator.hasNext()) { @@ -277,13 +292,15 @@ class SimpleVirtDriver( for ((i, req) in vmRequests.withIndex()) { // Compute the fraction of compute time allocated to the VM - val fraction = req.allocatedUsage / totalUsage - - // Derive the burst that was allocated to this vCPU - val allocatedBurst = ceil(totalBurst * fraction).toLong() + val fraction = req.allocatedUsage / totalAllocatedUsage // Compute the burst time that the VM was actually granted - val grantedBurst = (performanceScore * (allocatedBurst - ceil(totalRemainder * fraction))).toLong() + val grantedBurst = ceil(totalGrantedBurst * fraction).toLong() + + // The burst that was actually used by the VM + val usedBurst = ceil(grantedBurst * performanceScore).toLong() + + totalInterferedBurst += grantedBurst - usedBurst // Compute remaining burst time to be executed for the request req.burst = max(0, vm.burst[i] - grantedBurst) @@ -292,6 +309,11 @@ class SimpleVirtDriver( if (req.burst <= 0L || req.isCancelled) { hasFinished = true } + + if (vm.deadline <= end) { + // Request must have its entire burst consumed or otherwise we have overcommission + totalOvercommissionedBurst += req.burst + } } if (hasFinished || vm.deadline <= end) { @@ -307,9 +329,11 @@ class SimpleVirtDriver( eventFlow.emit( HypervisorEvent.SliceFinished( this@SimpleVirtDriver, - totalBurst, - totalBurst - totalRemainder, - vms.size, + totalRequestedBurst, + min(totalRequestedBurst, totalGrantedBurst), // We can run more than requested due to timing + totalOvercommissionedBurst, + totalInterferedBurst, // Might be smaller than zero due to FP rounding errors + vmCount, // Some of the VMs might already have finished, so keep initial VM count server ) ) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt index 85bdc438..2d467e92 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/SimpleVirtProvisioningService.kt @@ -121,8 +121,9 @@ class SimpleVirtProvisioningService( for (imageInstance in imagesToBeScheduled) { val requiredMemory = (imageInstance.image as VmImage).requiredMemory val selectedHv = allocationLogic.select(availableHypervisors, imageInstance) ?: break + try { - log.info("Spawning ${imageInstance.image} on ${selectedHv.server} ${availableHypervisors.size}") + log.info("Spawning ${imageInstance.image} on ${selectedHv.server}") incomingImages -= imageInstance // Speculatively update the hypervisor view information to prevent other images in the queue from diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/ComparableAllocationPolicyLogic.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/ComparableAllocationPolicyLogic.kt index 5e41bcef..79dd95f3 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/ComparableAllocationPolicyLogic.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/ComparableAllocationPolicyLogic.kt @@ -42,7 +42,11 @@ interface ComparableAllocationPolicyLogic : AllocationPolicy.Logic { image: SimpleVirtProvisioningService.ImageView ): HypervisorView? { return hypervisors.asSequence() - .filter { it.availableMemory >= (image.image as VmImage).requiredMemory } + .filter { hv -> + val fitsMemory = hv.availableMemory >= (image.image as VmImage).requiredMemory + val fitsCpu = hv.server.flavor.cpuCount >= image.flavor.cpuCount + fitsMemory && fitsCpu + } .minWith(comparator.thenBy { it.server.uid }) } } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/RandomAllocationPolicy.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/RandomAllocationPolicy.kt index 142846ac..07dcf1c5 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/RandomAllocationPolicy.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/service/allocation/RandomAllocationPolicy.kt @@ -40,7 +40,11 @@ public class RandomAllocationPolicy(val random: Random = Random(0)) : Allocation image: SimpleVirtProvisioningService.ImageView ): HypervisorView? { return hypervisors.asIterable() - .filter { it.availableMemory >= (image.image as VmImage).requiredMemory } + .filter { hv -> + val fitsMemory = hv.availableMemory >= (image.image as VmImage).requiredMemory + val fitsCpu = hv.server.flavor.cpuCount >= image.flavor.cpuCount + fitsMemory && fitsCpu + } .randomOrNull(random) } } diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt index 58d784b0..4f3abc02 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt @@ -62,8 +62,8 @@ internal class HypervisorTest { val driverDom = root.newDomain("driver") - val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 2) - val cpus = List(2) { ProcessingUnit(cpuNode, it, 2000.0) } + val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 1) + val cpus = List(1) { ProcessingUnit(cpuNode, it, 2000.0) } val metalDriver = SimpleBareMetalDriver(driverDom, UUID.randomUUID(), "test", emptyMap(), cpus, emptyList()) metalDriver.init() @@ -75,6 +75,7 @@ internal class HypervisorTest { val flavor = Flavor(1, 0) val vmDriver = metalDriver.refresh().server!!.services[VirtDriver] + vmDriver.events.onEach { println(it) }.launchIn(this) val vmA = vmDriver.spawn("a", workloadA, flavor) vmA.events.onEach { println(it) }.launchIn(this) val vmB = vmDriver.spawn("b", workloadB, flavor) diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index 212b1bfb..c0d6de03 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -17,21 +17,22 @@ class Sc20Monitor( private val lastServerStates = mutableMapOf<Server, Pair<ServerState, Long>>() init { - outputFile.write("time,duration,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostState,hostUsage,powerDraw,failedVms\n") + outputFile.write("time,duration,requestedBurst,grantedBurst,overcommissionedBurst,interferedBurst,numberOfDeployedImages,server,hostState,hostUsage,powerDraw,failedVms\n") } suspend fun onVmStateChanged(server: Server) {} suspend fun serverStateChanged(driver: VirtDriver, server: Server) { - if ((server.state == ServerState.SHUTOFF || server.state == ServerState.ERROR) && - lastServerStates.containsKey(server) - ) { - val duration = simulationContext.clock.millis() - lastServerStates[server]!!.second + val lastServerState = lastServerStates[server] + if (server.state == ServerState.SHUTOFF && lastServerState != null) { + val duration = simulationContext.clock.millis() - lastServerState.second onSliceFinish( simulationContext.clock.millis(), 0, 0, 0, + 0, + 0, server, duration ) @@ -46,6 +47,8 @@ class Sc20Monitor( time: Long, requestedBurst: Long, grantedBurst: Long, + overcommissionedBurst: Long, + interferedBurst: Long, numberOfDeployedImages: Int, hostServer: Server, duration: Long = 5 * 60 * 1000L @@ -57,7 +60,7 @@ class Sc20Monitor( val usage = driver.usage.first() val powerDraw = driver.powerDraw.first() - outputFile.write("$time,$duration,$requestedBurst,$grantedBurst,$numberOfDeployedImages,${hostServer.uid},${hostServer.state},$usage,$powerDraw") + outputFile.write("$time,$duration,$requestedBurst,$grantedBurst,$overcommissionedBurst,$interferedBurst,$numberOfDeployedImages,${hostServer.uid},${hostServer.state},$usage,$powerDraw") outputFile.newLine() } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 3392bd02..ede18b40 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -28,7 +28,6 @@ import com.atlarge.odcsim.Domain import com.atlarge.odcsim.SimulationEngineProvider import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.Flavor -import com.atlarge.opendc.compute.core.Server import com.atlarge.opendc.compute.core.ServerEvent import com.atlarge.opendc.compute.core.ServerState import com.atlarge.opendc.compute.metal.NODE_CLUSTER @@ -200,6 +199,8 @@ fun main(args: Array<String>) { simulationContext.clock.millis(), event.requestedBurst, event.grantedBurst, + event.overcommissionedBurst, + event.interferedBurst, event.numberOfDeployedImages, event.hostServer ) @@ -226,20 +227,21 @@ fun main(args: Array<String>) { null } - val running = mutableSetOf<Server>() val finish = Channel<Unit>(Channel.RENDEZVOUS) + var submitted = 0 + var finished = 0 val reader = Sc20TraceReader(File(traceDirectory), performanceInterferenceModel, getSelectedVmList()) while (reader.hasNext()) { val (time, workload) = reader.next() delay(max(0, time - simulationContext.clock.millis())) + submitted++ launch { chan.send(Unit) val server = scheduler.deploy( workload.image.name, workload.image, - Flavor(workload.image.cores, workload.image.requiredMemory) + Flavor(workload.image.maxCores, workload.image.requiredMemory) ) - running += server // Monitor server events server.events .onEach { @@ -248,10 +250,10 @@ fun main(args: Array<String>) { // Detect whether the VM has finished running if (it.server.state == ServerState.SHUTOFF) { - running -= server + finished++ } - if (running.isEmpty() && !reader.hasNext()) { + if (finished == submitted && !reader.hasNext()) { finish.send(Unit) } } diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/sc20/Sc20TraceReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/sc20/Sc20TraceReader.kt index 78a58671..e0d81b38 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/sc20/Sc20TraceReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/sc20/Sc20TraceReader.kt @@ -103,10 +103,10 @@ class Sc20TraceReader( val flops: Long = (cpuUsage * 5 * 60 * cores).toLong() if (flopsHistory.isEmpty()) { - flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage)) + flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage, cores)) } else { if (flopsHistory.last().flops != flops) { - flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage)) + flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage, cores)) } else { val oldFragment = flopsHistory.removeAt(flopsHistory.size - 1) flopsHistory.add( @@ -114,7 +114,8 @@ class Sc20TraceReader( oldFragment.tick, oldFragment.flops + flops, oldFragment.duration + traceInterval, - cpuUsage + cpuUsage, + cores ) ) } diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/vm/VmTraceReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/vm/VmTraceReader.kt index 6b8843aa..fbe77654 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/vm/VmTraceReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/vm/VmTraceReader.kt @@ -101,10 +101,10 @@ class VmTraceReader( val flops: Long = (cpuUsage * 5 * 60 * cores).toLong() if (flopsHistory.isEmpty()) { - flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage)) + flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage, cores)) } else { if (flopsHistory.last().flops != flops) { - flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage)) + flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage, cores)) } else { val oldFragment = flopsHistory.removeAt(flopsHistory.size - 1) flopsHistory.add( @@ -112,7 +112,8 @@ class VmTraceReader( oldFragment.tick, oldFragment.flops + flops, oldFragment.duration + traceInterval, - cpuUsage + cpuUsage, + cores ) ) } |
