bug: Fix incorrect reporting of overcommission

author: Fabian Mastenbroek <mail.fabianm@gmail.com> 2020-04-08 15:27:24 +0200
committer: Fabian Mastenbroek <mail.fabianm@gmail.com> 2020-04-08 15:54:49 +0200
commit: 7a11aff2ff46b0fb3bf01f537946d5fcd66a1e90 (patch)
tree: 71be757043da58f0adaec71c4432e6f5f0135656 /opendc/opendc-compute/src
parent: 02864ba50fafffd19bb1b635eea06004d9fd78aa (diff)
6 files changed, 58 insertions, 20 deletions
diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt
index 92c0ab0c..5b0035e3 100644
--- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt
+++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/FlopsHistoryFragment.kt
@@ -1,3 +1,3 @@
 package com.atlarge.opendc.compute.core.image
 
-data class FlopsHistoryFragment(val tick: Long, val flops: Long, val duration: Long, val usage: Double)
+data class FlopsHistoryFragment(val tick: Long, val flops: Long, val duration: Long, val usage: Double, val cores: Int)
diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt
index 79021d6b..9ad88c17 100644
--- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt
+++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt
@@ -14,7 +14,7 @@ class VmImage(
     public override val name: String,
     public override val tags: TagContainer,
     public val flopsHistory: List<FlopsHistoryFragment>,
-    public val cores: Int,
+    public val maxCores: Int,
     public val requiredMemory: Long
 ) : Image {
 
@@ -25,7 +25,7 @@ class VmImage(
             if (fragment.flops == 0L) {
                 delay(fragment.duration)
             } else {
-                val cores = min(this.cores, ctx.server.flavor.cpuCount)
+                val cores = min(fragment.cores, ctx.server.flavor.cpuCount)
                 val burst = LongArray(cores) { fragment.flops / cores }
                 val usage = DoubleArray(cores) { fragment.usage }
 
@@ -34,5 +34,5 @@ class VmImage(
         }
     }
 
-    override fun toString(): String = "VmImage(uid=$uid, name=$name, cores=$cores, requiredMemory=$requiredMemory)"
+    override fun toString(): String = "VmImage(uid=$uid, name=$name, cores=$maxCores, requiredMemory=$requiredMemory)"
 }
diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt
index e3cb6e35..8e15584a 100644
--- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt
+++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt
@@ -282,7 +282,9 @@ public class SimpleBareMetalDriver(
                     }
 
                     if (!unavailable) {
-                        usageState.value = totalUsage / cpus.size
+                        delay.invokeOnTimeout(1, Runnable {
+                            usageState.value = totalUsage / cpus.size
+                        })
                     }
 
                     val action = Runnable {
diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt
index 5c19b00d..24b19ada 100644
--- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt
+++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt
@@ -54,13 +54,20 @@ public sealed class HypervisorEvent {
      *
      * @property driver The driver that emitted the event.
      * @property requestedBurst The total requested CPU time (can be above capacity).
-     * @property grantedBurst The actual total granted capacity.
+     * @property grantedBurst The actual total granted capacity, which might be lower than the requested burst due to
+     * the hypervisor being interrupted during a slice.
+     * @property overcommissionedBurst The CPU time that the hypervisor could not grant to the virtual machine since
+     * it did not have the capacity.
+     * @property interferredBurst The sum of CPU time that virtual machines could not utilize due to performance
+     * interference.
      * @property numberOfDeployedImages The number of images deployed on this hypervisor.
      */
     public data class SliceFinished(
         override val driver: VirtDriver,
         public val requestedBurst: Long,
         public val grantedBurst: Long,
+        public val overcommissionedBurst: Long,
+        public val interferredBurst: Long,
         public val numberOfDeployedImages: Int,
         public val hostServer: Server
     ) : HypervisorEvent()
diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt
index 4939a624..08d7e840 100644
--- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt
+++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt
@@ -64,6 +64,7 @@ import java.util.Objects
 import java.util.TreeSet
 import java.util.UUID
 import kotlin.math.ceil
+import kotlin.math.floor
 import kotlin.math.max
 import kotlin.math.min
 
@@ -213,15 +214,21 @@ class SimpleVirtDriver(
 
             val start = clock.millis()
 
+            val vmCount = vms.size
             var duration: Double = Double.POSITIVE_INFINITY
             var deadline: Long = Long.MAX_VALUE
             var availableUsage = maxUsage
+            var totalRequestedUsage = 0.0
+            var totalRequestedBurst = 0L
 
             // Divide the available host capacity fairly across the vCPUs using max-min fair sharing
             for ((i, req) in requests.withIndex()) {
                 val remaining = requests.size - i
                 val availableShare = availableUsage / remaining
-                val grantedUsage = min(req.limit, availableShare)
+                val grantedUsage = floor(min(req.limit, availableShare))
+
+                totalRequestedUsage += req.limit
+                totalRequestedBurst += req.burst
 
                 req.allocatedUsage = grantedUsage
                 availableUsage -= grantedUsage
@@ -231,21 +238,23 @@ class SimpleVirtDriver(
                 deadline = min(deadline, req.vm.deadline)
             }
 
-            val totalUsage = maxUsage - availableUsage
-            var totalBurst = 0L
-            availableUsage = totalUsage
-            val serverLoad = totalUsage / maxUsage
+            duration = ceil(duration)
+
+            val totalAllocatedUsage = maxUsage - availableUsage
+            var totalAllocatedBurst = 0L
+            availableUsage = totalAllocatedUsage
+            val serverLoad = totalAllocatedUsage / maxUsage
 
             // Divide the requests over the available capacity of the pCPUs fairly
             for (i in pCPUs) {
                 val remaining = hostContext.cpus.size - i
                 val availableShare = availableUsage / remaining
                 val grantedUsage = min(hostContext.cpus[i].frequency, availableShare)
-                val pBurst = (duration * grantedUsage).toLong()
+                val pBurst = ceil(duration * grantedUsage).toLong()
 
                 usage[i] = grantedUsage
                 burst[i] = pBurst
-                totalBurst += pBurst
+                totalAllocatedBurst += pBurst
                 availableUsage -= grantedUsage
             }
 
@@ -263,7 +272,22 @@ class SimpleVirtDriver(
                 continue
             }
 
+            // The total burst that the VMs wanted to run in the time-frame that we ran.
+            val totalRequestedSubBurst =
+                if (interrupted && deadline - end > 0)
+                    min(totalRequestedBurst, requests.sumByDouble { ceil((end - start) / 1000.0 * it.limit) }.toLong()) // Replicate behavior of SimpleBareMetalDriver
+                else
+                    totalRequestedBurst
+            // The total burst that the host ran in the time-frame we ran.
+            val totalAllocatedSubBurst =
+                if (interrupted && deadline - end > 0)
+                    min(totalRequestedBurst, usage.sumByDouble { ceil((end - start) / 1000.0 * it) }.toLong()) // Replicate behavior of SimpleBareMetalDriver
+                else
+                    totalAllocatedBurst
             val totalRemainder = burst.sum()
+            val totalGrantedBurst = min(totalRequestedBurst, totalAllocatedBurst - totalRemainder)
+            // The burst that was actually utilized by the VMs (this may be affected by interference)
+            var totalUsedBurst = 0L
 
             val entryIterator = vms.entries.iterator()
             while (entryIterator.hasNext()) {
@@ -277,14 +301,16 @@ class SimpleVirtDriver(
 
                 for ((i, req) in vmRequests.withIndex()) {
                     // Compute the fraction of compute time allocated to the VM
-                    val fraction = req.allocatedUsage / totalUsage
+                    val fraction = req.allocatedUsage / totalAllocatedUsage
 
                     // Derive the burst that was allocated to this vCPU
-                    val allocatedBurst = ceil(totalBurst * fraction).toLong()
+                    val allocatedBurst = ceil(totalAllocatedBurst * fraction).toLong()
 
                     // Compute the burst time that the VM was actually granted
                     val grantedBurst = (performanceScore * (allocatedBurst - ceil(totalRemainder * fraction))).toLong()
 
+                    totalUsedBurst += grantedBurst
+
                     // Compute remaining burst time to be executed for the request
                     req.burst = max(0, vm.burst[i] - grantedBurst)
                     vm.burst[i] = req.burst
@@ -307,9 +333,11 @@ class SimpleVirtDriver(
             eventFlow.emit(
                 HypervisorEvent.SliceFinished(
                     this@SimpleVirtDriver,
-                    totalBurst,
-                    totalBurst - totalRemainder,
-                    vms.size,
+                    totalRequestedBurst,
+                    totalGrantedBurst,
+                    totalRequestedSubBurst - totalGrantedBurst,
+                    max(0, totalAllocatedSubBurst - totalUsedBurst), // Might be smaller than zero due to FP rounding errors
+                    vmCount, // Some of the VMs might already have finished, so keep initial VM count
                     server
                 )
             )
diff --git a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt
index 58d784b0..4f3abc02 100644
--- a/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt
+++ b/opendc/opendc-compute/src/test/kotlin/com/atlarge/opendc/compute/virt/HypervisorTest.kt
@@ -62,8 +62,8 @@ internal class HypervisorTest {
 
             val driverDom = root.newDomain("driver")
 
-            val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 2)
-            val cpus = List(2) { ProcessingUnit(cpuNode, it, 2000.0) }
+            val cpuNode = ProcessingNode("Intel", "Xeon", "amd64", 1)
+            val cpus = List(1) { ProcessingUnit(cpuNode, it, 2000.0) }
             val metalDriver = SimpleBareMetalDriver(driverDom, UUID.randomUUID(), "test", emptyMap(), cpus, emptyList())
 
             metalDriver.init()
@@ -75,6 +75,7 @@ internal class HypervisorTest {
 
             val flavor = Flavor(1, 0)
             val vmDriver = metalDriver.refresh().server!!.services[VirtDriver]
+            vmDriver.events.onEach { println(it) }.launchIn(this)
             val vmA = vmDriver.spawn("a", workloadA, flavor)
             vmA.events.onEach { println(it) }.launchIn(this)
             val vmB = vmDriver.spawn("b", workloadB, flavor)
author	Fabian Mastenbroek <mail.fabianm@gmail.com>	2020-04-08 15:27:24 +0200
committer	Fabian Mastenbroek <mail.fabianm@gmail.com>	2020-04-08 15:54:49 +0200
commit	7a11aff2ff46b0fb3bf01f537946d5fcd66a1e90 (patch)
tree	71be757043da58f0adaec71c4432e6f5f0135656 /opendc/opendc-compute/src
parent	02864ba50fafffd19bb1b635eea06004d9fd78aa (diff)