diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2020-04-08 15:27:24 +0200 |
|---|---|---|
| committer | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2020-04-08 15:54:49 +0200 |
| commit | 7a11aff2ff46b0fb3bf01f537946d5fcd66a1e90 (patch) | |
| tree | 71be757043da58f0adaec71c4432e6f5f0135656 /opendc | |
| parent | 02864ba50fafffd19bb1b635eea06004d9fd78aa (diff) | |
bug: Fix incorrect reporting of overcommission
Diffstat (limited to 'opendc')
10 files changed, 75 insertions, 29 deletions
diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt index 92c0ab0c..5b0035e3 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt @@ -1,3 +1,3 @@ package com.atlarge.opendc.compute.core.image -data class FlopsHistoryFragment(val tick: Long, val flops: Long, val duration: Long, val usage: Double) +data class FlopsHistoryFragment(val tick: Long, val flops: Long, val duration: Long, val usage: Double, val cores: Int) diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt index 79021d6b..9ad88c17 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt @@ -14,7 +14,7 @@ class VmImage( public override val name: String, public override val tags: TagContainer, public val flopsHistory: List<FlopsHistoryFragment>, - public val cores: Int, + public val maxCores: Int, public val requiredMemory: Long ) : Image { @@ -25,7 +25,7 @@ class VmImage( if (fragment.flops == 0L) { delay(fragment.duration) } else { - val cores = min(this.cores, ctx.server.flavor.cpuCount) + val cores = min(fragment.cores, ctx.server.flavor.cpuCount) val burst = LongArray(cores) { fragment.flops / cores } val usage = DoubleArray(cores) { fragment.usage } @@ -34,5 +34,5 @@ class VmImage( } } - override fun toString(): String = "VmImage(uid=$uid, name=$name, cores=$cores, requiredMemory=$requiredMemory)" + override fun toString(): String = "VmImage(uid=$uid, name=$name, cores=$maxCores, requiredMemory=$requiredMemory)" } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index e3cb6e35..8e15584a 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -282,7 +282,9 @@ public class SimpleBareMetalDriver( } if (!unavailable) { - usageState.value = totalUsage / cpus.size + delay.invokeOnTimeout(1, Runnable { + usageState.value = totalUsage / cpus.size + }) } val action = Runnable { diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt index 5c19b00d..24b19ada 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt @@ -54,13 +54,20 @@ public sealed class HypervisorEvent { * * @property driver The driver that emitted the event. * @property requestedBurst The total requested CPU time (can be above capacity). - * @property grantedBurst The actual total granted capacity. + * @property grantedBurst The actual total granted capacity, which might be lower than the requested burst due to + * the hypervisor being interrupted during a slice. + * @property overcommissionedBurst The CPU time that the hypervisor could not grant to the virtual machine since + * it did not have the capacity. + * @property interferredBurst The sum of CPU time that virtual machines could not utilize due to performance + * interference. * @property numberOfDeployedImages The number of images deployed on this hypervisor. */ public data class SliceFinished( override val driver: VirtDriver, public val requestedBurst: Long, public val grantedBurst: Long, + public val overcommissionedBurst: Long, + public val interferredBurst: Long, public val numberOfDeployedImages: Int, public val hostServer: Server ) : HypervisorEvent() diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt index 4939a624..08d7e840 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt @@ -64,6 +64,7 @@ import java.util.Objects import java.util.TreeSet import java.util.UUID import kotlin.math.ceil +import kotlin.math.floor import kotlin.math.max import kotlin.math.min @@ -213,15 +214,21 @@ class SimpleVirtDriver( val start = clock.millis() + val vmCount = vms.size var duration: Double = Double.POSITIVE_INFINITY var deadline: Long = Long.MAX_VALUE var availableUsage = maxUsage + var totalRequestedUsage = 0.0 + var totalRequestedBurst = 0L // Divide the available host capacity fairly across the vCPUs using max-min fair sharing for ((i, req) in requests.withIndex()) { val remaining = requests.size - i val availableShare = availableUsage / remaining - val grantedUsage = min(req.limit, availableShare) + val grantedUsage = floor(min(req.limit, availableShare)) + + totalRequestedUsage += req.limit + totalRequestedBurst += req.burst req.allocatedUsage = grantedUsage availableUsage -= grantedUsage @@ -231,21 +238,23 @@ class SimpleVirtDriver( deadline = min(deadline, req.vm.deadline) } - val totalUsage = maxUsage - availableUsage - var totalBurst = 0L - availableUsage = totalUsage - val serverLoad = totalUsage / maxUsage + duration = ceil(duration) + + val totalAllocatedUsage = maxUsage - availableUsage + var totalAllocatedBurst = 0L + availableUsage = totalAllocatedUsage + val serverLoad = totalAllocatedUsage / maxUsage // Divide the requests over the available capacity of the pCPUs fairly for (i in pCPUs) { val remaining = hostContext.cpus.size - i val availableShare = availableUsage / remaining val grantedUsage = min(hostContext.cpus[i].frequency, availableShare) - val pBurst = (duration * grantedUsage).toLong() + val pBurst = ceil(duration * grantedUsage).toLong() usage[i] = grantedUsage burst[i] = pBurst - totalBurst += pBurst + totalAllocatedBurst += pBurst availableUsage -= grantedUsage } @@ -263,7 +272,22 @@ class SimpleVirtDriver( continue } + // The total burst that the VMs wanted to run in the time-frame that we ran. + val totalRequestedSubBurst = + if (interrupted && deadline - end > 0) + min(totalRequestedBurst, requests.sumByDouble { ceil((end - start) / 1000.0 * it.limit) }.toLong()) // Replicate behavior of SimpleBareMetalDriver + else + totalRequestedBurst + // The total burst that the host ran in the time-frame we ran. + val totalAllocatedSubBurst = + if (interrupted && deadline - end > 0) + min(totalRequestedBurst, usage.sumByDouble { ceil((end - start) / 1000.0 * it) }.toLong()) // Replicate behavior of SimpleBareMetalDriver + else + totalAllocatedBurst val totalRemainder = burst.sum() + val totalGrantedBurst = min(totalRequestedBurst, totalAllocatedBurst - totalRemainder) + // The burst that was actually utilized by the VMs (this may be affected by interference) + var totalUsedBurst = 0L val entryIterator = vms.entries.iterator() while (entryIterator.hasNext()) { @@ -277,14 +301,16 @@ class SimpleVirtDriver( for ((i, req) in vmRequests.withIndex()) { // Compute the fraction of compute time allocated to the VM - val fraction = req.allocatedUsage / totalUsage + val fraction = req.allocatedUsage / totalAllocatedUsage // Derive the burst that was allocated to this vCPU - val allocatedBurst = ceil(totalBurst * fraction).toLong() + val allocatedBurst = ceil(totalAllocatedBurst * fraction).toLong() // Compute the burst time that the VM was actually granted val grantedBurst = (performanceScore * (allocatedBurst - ceil(totalRemainder * fraction))).toLong() + totalUsedBurst += grantedBurst + // Compute remaining burst time to be executed for the request req.burst = max(0, vm.burst[i] - grantedBurst) vm.burst[i] = req.burst @@ -307,9 +333,11 @@ class SimpleVirtDriver( eventFlow.emit( HypervisorEvent.SliceFinished( this@SimpleVirtDriver, - totalBurst, - totalBurst - totalRemainder, - vms.size, + totalRequestedBurst, + totalGrantedBurst, + totalRequestedSubBurst - totalGrantedBurst, + max(0, totalAllocatedSubBurst - totalUsedBurst), // Might be smaller than zero due to FP rounding errors + vmCount, // Some of the VMs might already have finished, so keep initial VM count server ) ) diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt index 58d784b0..4f3abc02 100644 --- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt +++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt @@ -62,8 +62,8 @@ internal class HypervisorTest { val driverDom = root.newDomain("driver") - val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 2) - val cpus = List(2) { ProcessingUnit(cpuNode, it, 2000.0) } + val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 1) + val cpus = List(1) { ProcessingUnit(cpuNode, it, 2000.0) } val metalDriver = SimpleBareMetalDriver(driverDom, UUID.randomUUID(), "test", emptyMap(), cpus, emptyList()) metalDriver.init() @@ -75,6 +75,7 @@ internal class HypervisorTest { val flavor = Flavor(1, 0) val vmDriver = metalDriver.refresh().server!!.services[VirtDriver] + vmDriver.events.onEach { println(it) }.launchIn(this) val vmA = vmDriver.spawn("a", workloadA, flavor) vmA.events.onEach { println(it) }.launchIn(this) val vmB = vmDriver.spawn("b", workloadB, flavor) diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt index 212b1bfb..eb6ff8de 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/Sc20Monitor.kt @@ -17,7 +17,7 @@ class Sc20Monitor( private val lastServerStates = mutableMapOf<Server, Pair<ServerState, Long>>() init { - outputFile.write("time,duration,requestedBurst,grantedBurst,numberOfDeployedImages,server,hostState,hostUsage,powerDraw,failedVms\n") + outputFile.write("time,duration,requestedBurst,grantedBurst,overcommissionedBurst,interferredBurst,numberOfDeployedImages,server,hostState,hostUsage,powerDraw,failedVms\n") } suspend fun onVmStateChanged(server: Server) {} @@ -32,6 +32,8 @@ class Sc20Monitor( 0, 0, 0, + 0, + 0, server, duration ) @@ -46,6 +48,8 @@ class Sc20Monitor( time: Long, requestedBurst: Long, grantedBurst: Long, + overcommissionedBurst: Long, + interferredBurst: Long, numberOfDeployedImages: Int, hostServer: Server, duration: Long = 5 * 60 * 1000L @@ -57,7 +61,7 @@ class Sc20Monitor( val usage = driver.usage.first() val powerDraw = driver.powerDraw.first() - outputFile.write("$time,$duration,$requestedBurst,$grantedBurst,$numberOfDeployedImages,${hostServer.uid},${hostServer.state},$usage,$powerDraw") + outputFile.write("$time,$duration,$requestedBurst,$grantedBurst,$overcommissionedBurst,$interferredBurst,$numberOfDeployedImages,${hostServer.uid},${hostServer.state},$usage,$powerDraw") outputFile.newLine() } diff --git a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt index 3392bd02..bd06520a 100644 --- a/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt +++ b/opendc/opendc-experiments-sc20/src/main/kotlin/com/atlarge/opendc/experiments/sc20/TestExperiment.kt @@ -200,6 +200,8 @@ fun main(args: Array<String>) { simulationContext.clock.millis(), event.requestedBurst, event.grantedBurst, + event.overcommissionedBurst, + event.interferredBurst, event.numberOfDeployedImages, event.hostServer ) @@ -237,7 +239,7 @@ fun main(args: Array<String>) { chan.send(Unit) val server = scheduler.deploy( workload.image.name, workload.image, - Flavor(workload.image.cores, workload.image.requiredMemory) + Flavor(workload.image.maxCores, workload.image.requiredMemory) ) running += server // Monitor server events diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/sc20/Sc20TraceReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/sc20/Sc20TraceReader.kt index 78a58671..e0d81b38 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/sc20/Sc20TraceReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/sc20/Sc20TraceReader.kt @@ -103,10 +103,10 @@ class Sc20TraceReader( val flops: Long = (cpuUsage * 5 * 60 * cores).toLong() if (flopsHistory.isEmpty()) { - flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage)) + flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage, cores)) } else { if (flopsHistory.last().flops != flops) { - flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage)) + flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage, cores)) } else { val oldFragment = flopsHistory.removeAt(flopsHistory.size - 1) flopsHistory.add( @@ -114,7 +114,8 @@ class Sc20TraceReader( oldFragment.tick, oldFragment.flops + flops, oldFragment.duration + traceInterval, - cpuUsage + cpuUsage, + cores ) ) } diff --git a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/vm/VmTraceReader.kt b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/vm/VmTraceReader.kt index 6b8843aa..fbe77654 100644 --- a/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/vm/VmTraceReader.kt +++ b/opendc/opendc-format/src/main/kotlin/com/atlarge/opendc/format/trace/vm/VmTraceReader.kt @@ -101,10 +101,10 @@ class VmTraceReader( val flops: Long = (cpuUsage * 5 * 60 * cores).toLong() if (flopsHistory.isEmpty()) { - flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage)) + flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage, cores)) } else { if (flopsHistory.last().flops != flops) { - flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage)) + flopsHistory.add(FlopsHistoryFragment(timestamp, flops, traceInterval, cpuUsage, cores)) } else { val oldFragment = flopsHistory.removeAt(flopsHistory.size - 1) flopsHistory.add( @@ -112,7 +112,8 @@ class VmTraceReader( oldFragment.tick, oldFragment.flops + flops, oldFragment.duration + traceInterval, - cpuUsage + cpuUsage, + cores ) ) } |
