From 8d4d552e706ad5c5adebc774920337b4f201ac1f Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 12 Apr 2020 13:59:00 +0200 Subject: bug: Fix invalid FLOPs value for trace fragments This change fixes the invalid FLOPs values for the trace fragments which were multiplied twice by the number of cores of the VM. --- .../com/atlarge/opendc/compute/core/image/VmImage.kt | 2 +- .../atlarge/opendc/compute/virt/HypervisorEvent.kt | 4 ++++ .../opendc/compute/virt/driver/SimpleVirtDriver.kt | 20 ++++++++++---------- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'opendc/opendc-compute/src') diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt index 9ad88c17..b2a01804 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt @@ -27,7 +27,7 @@ class VmImage( } else { val cores = min(fragment.cores, ctx.server.flavor.cpuCount) val burst = LongArray(cores) { fragment.flops / cores } - val usage = DoubleArray(cores) { fragment.usage } + val usage = DoubleArray(cores) { fragment.usage / cores } ctx.run(burst, usage, simulationContext.clock.millis() + fragment.duration) } diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt index 9ceb8bfc..7c088bc8 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/HypervisorEvent.kt @@ -60,6 +60,8 @@ public sealed class HypervisorEvent { * it did not have the capacity. * @property interferedBurst The sum of CPU time that virtual machines could not utilize due to performance * interference. + * @property cpuUsage CPU use in megahertz. + * @property cpuDemand CPU demand in megahertz. * @property numberOfDeployedImages The number of images deployed on this hypervisor. */ public data class SliceFinished( @@ -68,6 +70,8 @@ public sealed class HypervisorEvent { public val grantedBurst: Long, public val overcommissionedBurst: Long, public val interferedBurst: Long, + public val cpuUsage: Double, + public val cpuDemand: Double, public val numberOfDeployedImages: Int, public val hostServer: Server ) : HypervisorEvent() diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt index 5f15084d..cec9ce53 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt @@ -246,14 +246,14 @@ class SimpleVirtDriver( // Divide the requests over the available capacity of the pCPUs fairly for (i in pCPUs) { - val remaining = hostContext.cpus.size - i - val availableShare = availableUsage / remaining - val grantedUsage = min(hostContext.cpus[i].frequency, availableShare) - val pBurst = ceil(duration * grantedUsage).toLong() + val maxCpuUsage = hostContext.cpus[i].frequency + val fraction = maxCpuUsage / maxUsage + val grantedUsage = min(maxCpuUsage, totalAllocatedUsage * fraction) + val grantedBurst = ceil(duration * grantedUsage).toLong() usage[i] = grantedUsage - burst[i] = pBurst - totalAllocatedBurst += pBurst + burst[i] = grantedBurst + totalAllocatedBurst += grantedBurst availableUsage -= grantedUsage } @@ -308,9 +308,7 @@ class SimpleVirtDriver( if (req.burst <= 0L || req.isCancelled) { hasFinished = true - } - - if (vm.deadline <= end && hostContext.server.state != ServerState.ERROR) { + } else if (vm.deadline <= end && hostContext.server.state != ServerState.ERROR) { // Request must have its entire burst consumed or otherwise we have overcommission // Note that we count the overcommissioned burst if the hypervisor has failed. totalOvercommissionedBurst += req.burst @@ -335,7 +333,9 @@ class SimpleVirtDriver( min(totalRequestedBurst, totalAllocatedBurst), min(totalRequestedBurst, totalGrantedBurst), // We can run more than requested due to timing totalOvercommissionedBurst, - totalInterferedBurst, // Might be smaller than zero due to FP rounding errors + totalInterferedBurst, // Might be smaller than zero due to FP rounding errors, + totalAllocatedUsage, + totalRequestedUsage, vmCount, // Some VMs might already have finished, so keep initial VM count server ) -- cgit v1.2.3 From acfc5edaec2e3ee1f92551bcf3878e7dc8496b7e Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 12 Apr 2020 14:10:03 +0200 Subject: perf: Address bottlenecks in VirtDriver --- .../opendc/compute/virt/driver/SimpleVirtDriver.kt | 51 ++++++++++++---------- 1 file changed, 28 insertions(+), 23 deletions(-) (limited to 'opendc/opendc-compute/src') diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt index cec9ce53..9b741ce1 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt @@ -42,6 +42,7 @@ import com.atlarge.opendc.core.services.ServiceKey import com.atlarge.opendc.core.services.ServiceRegistry import com.atlarge.opendc.core.workload.IMAGE_PERF_INTERFERENCE_MODEL import com.atlarge.opendc.core.workload.PerformanceInterferenceModel +import kotlinx.coroutines.CancellableContinuation import kotlinx.coroutines.CancellationException import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.ExperimentalCoroutinesApi @@ -58,11 +59,12 @@ import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.launch import kotlinx.coroutines.selects.SelectClause0 import kotlinx.coroutines.selects.select +import kotlinx.coroutines.suspendCancellableCoroutine import kotlinx.coroutines.withContext -import java.lang.Exception import java.util.Objects import java.util.TreeSet import java.util.UUID +import kotlin.coroutines.resume import kotlin.math.ceil import kotlin.math.max import kotlin.math.min @@ -180,7 +182,7 @@ class SimpleVirtDriver( val pCPUs = hostContext.cpus.indices.sortedBy { hostContext.cpus[it].frequency } val vms = mutableMapOf>() - val requests = TreeSet() + val requests = TreeSet(cpuRequestComparator) val usage = DoubleArray(hostContext.cpus.size) val burst = LongArray(hostContext.cpus.size) @@ -321,7 +323,7 @@ class SimpleVirtDriver( requests.removeAll(vmRequests) // Return vCPU `run` call: the requested burst was completed or deadline was exceeded - vm.chan.send(Unit) + vm.cont?.resume(Unit) } } @@ -343,6 +345,25 @@ class SimpleVirtDriver( } } + /** + * The [Comparator] for [CpuRequest]. + */ + private val cpuRequestComparator: Comparator = Comparator { lhs, rhs -> + var cmp = lhs.limit.compareTo(rhs.limit) + + if (cmp != 0) { + return@Comparator cmp + } + + cmp = lhs.vm.server.uid.compareTo(rhs.vm.server.uid) + + if (cmp != 0) { + return@Comparator cmp + } + + lhs.vcpu.id.compareTo(rhs.vcpu.id) + } + /** * A request to schedule a virtual CPU on the host cpu. */ @@ -351,7 +372,7 @@ class SimpleVirtDriver( val vcpu: ProcessingUnit, var burst: Long, val limit: Double - ) : Comparable { + ) { /** * The usage that was actually granted. */ @@ -364,22 +385,6 @@ class SimpleVirtDriver( override fun equals(other: Any?): Boolean = other is CpuRequest && vm == other.vm && vcpu == other.vcpu override fun hashCode(): Int = Objects.hash(vm, vcpu) - - override fun compareTo(other: CpuRequest): Int { - var cmp = limit.compareTo(other.limit) - - if (cmp != 0) { - return cmp - } - - cmp = vm.server.uid.compareTo(other.vm.server.uid) - - if (cmp != 0) { - return cmp - } - - return vcpu.id.compareTo(other.vcpu.id) - } } internal inner class VmServerContext( @@ -390,7 +395,7 @@ class SimpleVirtDriver( private var finalized: Boolean = false lateinit var burst: LongArray var deadline: Long = 0L - var chan = Channel(Channel.RENDEZVOUS) + var cont: CancellableContinuation? = null private var initialized: Boolean = false internal val job: Job = launch { @@ -462,13 +467,13 @@ class SimpleVirtDriver( // Wait until the burst has been run or the coroutine is cancelled try { schedulingQueue.send(SchedulerCommand.Schedule(this, requests)) - chan.receive() + suspendCancellableCoroutine { cont = it } } catch (e: CancellationException) { // Deschedule the VM withContext(NonCancellable) { requests.forEach { it.isCancelled = true } schedulingQueue.send(SchedulerCommand.Interrupt) - chan.receive() + suspendCancellableCoroutine { cont = it } } e.assertFailure() -- cgit v1.2.3 From 878c22867424c3d361bf2b3d30b0af9e222829fa Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 12 Apr 2020 14:30:57 +0200 Subject: feat: Make bare metal server uid deterministic --- .../atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'opendc/opendc-compute/src') diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt index 8e15584a..844938db 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/metal/driver/SimpleBareMetalDriver.kt @@ -62,6 +62,7 @@ import kotlin.math.min import kotlinx.coroutines.withContext import java.lang.Exception import kotlin.coroutines.ContinuationInterceptor +import kotlin.random.Random /** * A basic implementation of the [BareMetalDriver] that simulates an [Image] running on a bare-metal machine. @@ -116,6 +117,11 @@ public class SimpleBareMetalDriver( override val powerDraw: Flow = powerModel(this) + /** + * The internal random instance. + */ + private val random = Random(0) + override suspend fun init(): Node = withContext(domain.coroutineContext) { nodeState.value } @@ -128,7 +134,7 @@ public class SimpleBareMetalDriver( val events = EventFlow() val server = Server( - UUID.randomUUID(), + UUID(node.uid.leastSignificantBits xor node.uid.mostSignificantBits, random.nextLong()), node.name, emptyMap(), flavor, -- cgit v1.2.3 From a584b53847ca6089d892a08fcd65b8694f262603 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 12 Apr 2020 14:47:27 +0200 Subject: perf: Cache clock and job in VmImage --- .../main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'opendc/opendc-compute/src') diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt index b2a01804..b0688f99 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/core/image/VmImage.kt @@ -3,6 +3,7 @@ package com.atlarge.opendc.compute.core.image import com.atlarge.odcsim.simulationContext import com.atlarge.opendc.compute.core.execution.ServerContext import com.atlarge.opendc.core.resource.TagContainer +import kotlinx.coroutines.Job import kotlinx.coroutines.delay import kotlinx.coroutines.ensureActive import java.util.UUID @@ -19,8 +20,11 @@ class VmImage( ) : Image { override suspend fun invoke(ctx: ServerContext) { + val clock = simulationContext.clock + val job = coroutineContext[Job]!! + for (fragment in flopsHistory) { - coroutineContext.ensureActive() + job.ensureActive() if (fragment.flops == 0L) { delay(fragment.duration) @@ -29,7 +33,7 @@ class VmImage( val burst = LongArray(cores) { fragment.flops / cores } val usage = DoubleArray(cores) { fragment.usage / cores } - ctx.run(burst, usage, simulationContext.clock.millis() + fragment.duration) + ctx.run(burst, usage, clock.millis() + fragment.duration) } } } -- cgit v1.2.3 From cc6912418f554c4dadd8e95d80b4c229d947907b Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 12 Apr 2020 15:27:58 +0200 Subject: bug: Fix scheduling slices to minimum 5 min --- .../com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'opendc/opendc-compute/src') diff --git a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt index 9b741ce1..d81b8825 100644 --- a/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt +++ b/opendc/opendc-compute/src/main/kotlin/com/atlarge/opendc/compute/virt/driver/SimpleVirtDriver.kt @@ -182,7 +182,7 @@ class SimpleVirtDriver( val pCPUs = hostContext.cpus.indices.sortedBy { hostContext.cpus[it].frequency } val vms = mutableMapOf>() - val requests = TreeSet(cpuRequestComparator) + val requests = TreeSet(cpuRequestComparator) val usage = DoubleArray(hostContext.cpus.size) val burst = LongArray(hostContext.cpus.size) @@ -239,7 +239,8 @@ class SimpleVirtDriver( deadline = min(deadline, req.vm.deadline) } - duration = ceil(duration) + // XXX We set the minimum duration to 5 minutes here to prevent the rounding issues that are occurring with the FLOPs. + duration = max(300.0, ceil(duration)) val totalAllocatedUsage = maxUsage - availableUsage var totalAllocatedBurst = 0L -- cgit v1.2.3