diff options
115 files changed, 4628 insertions, 532 deletions
diff --git a/opendc-common/src/main/java/org/opendc/common/ResourceType.java b/opendc-common/src/main/java/org/opendc/common/ResourceType.java new file mode 100644 index 00000000..6e1f8dec --- /dev/null +++ b/opendc-common/src/main/java/org/opendc/common/ResourceType.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2025 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.common; + +public enum ResourceType { + CPU, + GPU, + POWER, + // Combined category for non-compute physical and virtual resources + AUXILIARY +} diff --git a/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt b/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt index e88379f6..a54a0130 100644 --- a/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt +++ b/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt @@ -30,10 +30,15 @@ public interface Flavor : Resource { /** * The number of (virtual) processing cores to use. */ - public val coreCount: Int + public val cpuCoreCount: Int /** * The amount of RAM available to the task (in MB). */ public val memorySize: Long + + /** + * The amount of gpu cores available to the task. + */ + public val gpuCoreCount: Int } diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java new file mode 100644 index 00000000..97aaa820 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.host; + +/** + * A model for a GPU in a host. + * + * @param gpuCoreCapacity The capacity of the GPU cores hz. + * @param gpuCoreCount The number of GPU cores. + * @param GpuMemoryCapacity The capacity of the GPU memory in GB. + * @param GpuMemorySpeed The speed of the GPU memory in GB/s. + */ +public record GpuHostModel(double gpuCoreCapacity, int gpuCoreCount, long GpuMemoryCapacity, double GpuMemorySpeed) {} diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java index 1ea73ea6..6464a56c 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java +++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java @@ -22,11 +22,24 @@ package org.opendc.compute.simulator.host; +import java.util.List; + /** * Record describing the static machine properties of the host. * - * @param cpuCapacity The total CPU capacity of the host in MHz. - * @param coreCount The number of logical processing cores available for this host. + * @param cpuCapacity The total CPU capacity of the host in MHz. + * @param coreCount The number of logical processing cores available for this host. * @param memoryCapacity The amount of memory available for this host in MB. */ -public record HostModel(double cpuCapacity, int coreCount, long memoryCapacity) {} +public record HostModel(double cpuCapacity, int coreCount, long memoryCapacity, List<GpuHostModel> gpuHostModels) { + /** + * Create a new host model. + * + * @param cpuCapacity The total CPU capacity of the host in MHz. + * @param coreCount The number of logical processing cores available for this host. + * @param memoryCapacity The amount of memory available for this host in MB. + */ + public HostModel(double cpuCapacity, int coreCount, long memoryCapacity) { + this(cpuCapacity, coreCount, memoryCapacity, null); + } +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java index 2b4306af..835c7186 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java +++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java @@ -198,7 +198,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver { HostView hv = hostToView.get(host); final ServiceFlavor flavor = task.getFlavor(); if (hv != null) { - hv.provisionedCores -= flavor.getCoreCount(); + hv.provisionedCpuCores -= flavor.getCpuCoreCount(); hv.instanceCount--; hv.availableMemory += flavor.getMemorySize(); } else { @@ -496,7 +496,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver { if (result.getResultType() == SchedulingResultType.FAILURE) { LOGGER.trace("Task {} selected for scheduling but no capacity available for it at the moment", task); - if (flavor.getMemorySize() > maxMemory || flavor.getCoreCount() > maxCores) { + if (flavor.getMemorySize() > maxMemory || flavor.getCpuCoreCount() > maxCores) { // Remove the incoming image taskQueue.remove(req); tasksPending--; @@ -531,7 +531,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver { attemptsSuccess++; hv.instanceCount++; - hv.provisionedCores += flavor.getCoreCount(); + hv.provisionedCpuCores += flavor.getCpuCoreCount(); hv.availableMemory -= flavor.getMemorySize(); activeTasks.put(task, host); @@ -612,12 +612,12 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver { @NotNull public ServiceFlavor newFlavor( - @NotNull String name, int cpuCount, long memorySize, @NotNull Map<String, ?> meta) { + @NotNull String name, int cpuCount, long memorySize, int gpuCoreCount, @NotNull Map<String, ?> meta) { checkOpen(); final ComputeService service = this.service; UUID uid = new UUID(service.clock.millis(), service.random.nextLong()); - ServiceFlavor flavor = new ServiceFlavor(service, uid, name, cpuCount, memorySize, meta); + ServiceFlavor flavor = new ServiceFlavor(service, uid, name, cpuCount, memorySize, gpuCoreCount, meta); // service.flavorById.put(uid, flavor); // service.flavors.add(flavor); diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java index 7c548add..c07f58c7 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java +++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java @@ -31,7 +31,8 @@ public class HostView { private final SimHost host; int instanceCount; long availableMemory; - int provisionedCores; + int provisionedCpuCores; + int provisionedGpuCores; /** * Scheduler bookkeeping @@ -83,8 +84,12 @@ public class HostView { /** * Return the provisioned cores on the host. */ - public int getProvisionedCores() { - return provisionedCores; + public int getProvisionedCpuCores() { + return provisionedCpuCores; + } + + public int getProvisionedGpuCores() { + return provisionedGpuCores; } @Override diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java index eddde87e..8a4359b4 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java +++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java @@ -36,22 +36,31 @@ public final class ServiceFlavor implements Flavor { private final ComputeService service; private final UUID uid; private final String name; - private final int coreCount; + private final int cpuCoreCount; private final long memorySize; + private final int gpuCoreCount; private final Map<String, ?> meta; - ServiceFlavor(ComputeService service, UUID uid, String name, int coreCount, long memorySize, Map<String, ?> meta) { + ServiceFlavor( + ComputeService service, + UUID uid, + String name, + int cpuCoreCount, + long memorySize, + int gpuCoreCount, + Map<String, ?> meta) { this.service = service; this.uid = uid; this.name = name; - this.coreCount = coreCount; + this.cpuCoreCount = cpuCoreCount; this.memorySize = memorySize; + this.gpuCoreCount = gpuCoreCount; this.meta = meta; } @Override - public int getCoreCount() { - return coreCount; + public int getCpuCoreCount() { + return cpuCoreCount; } @Override @@ -59,6 +68,11 @@ public final class ServiceFlavor implements Flavor { return memorySize; } + @Override + public int getGpuCoreCount() { + return gpuCoreCount; + } + @NotNull @Override public UUID getUid() { diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java new file mode 100644 index 00000000..1aba13e3 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.telemetry; + +/** + * Statistics about the GPUs of a guest. + * + * @param activeTime The cumulative time (in seconds) that the GPUs of the guest were actively running. + * @param idleTime The cumulative time (in seconds) the GPUs of the guest were idle. + * @param stealTime The cumulative GPU time (in seconds) that the guest was ready to run, but not granted time by the host. + * @param lostTime The cumulative GPU time (in seconds) that was lost due to interference with other machines. + * @param capacity The available GPU capacity of the guest (in MHz). + * @param usage Amount of GPU resources (in MHz) actually used by the guest. + * @param utilization The utilization of the GPU resources (in %) relative to the total GPU capacity. + */ +public record GuestGpuStats( + long activeTime, + long idleTime, + long stealTime, + long lostTime, + double capacity, + double usage, + double demand, + double utilization) {} diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java new file mode 100644 index 00000000..e42d7704 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.telemetry; + +/** + * Statistics about the GPUs of a host. + * + * @param activeTime The cumulative time (in seconds) that the GPUs of the host were actively running. + * @param idleTime The cumulative time (in seconds) the GPUs of the host were idle. + * @param stealTime The cumulative GPU time (in seconds) that virtual machines were ready to run, but were not able to. + * @param lostTime The cumulative GPU time (in seconds) that was lost due to interference between virtual machines. + * @param capacity The available GPU capacity of the host (in MHz). + * @param demand Amount of GPU resources (in MHz) the guests would use if there were no GPU contention or GPU + * limits. + * @param usage Amount of GPU resources (in MHz) actually used by the host. + * @param utilization The utilization of the GPU resources (in %) relative to the total GPU capacity. + */ +public record HostGpuStats( + long activeTime, + long idleTime, + long stealTime, + long lostTime, + double capacity, + double demand, + double usage, + double utilization) {} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt index d23794ab..effe3d5b 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt @@ -22,19 +22,22 @@ package org.opendc.compute.simulator.host +import org.opendc.common.ResourceType import org.opendc.compute.api.Flavor import org.opendc.compute.api.TaskState import org.opendc.compute.simulator.internal.Guest import org.opendc.compute.simulator.internal.GuestListener import org.opendc.compute.simulator.service.ServiceTask import org.opendc.compute.simulator.telemetry.GuestCpuStats +import org.opendc.compute.simulator.telemetry.GuestGpuStats import org.opendc.compute.simulator.telemetry.GuestSystemStats import org.opendc.compute.simulator.telemetry.HostCpuStats +import org.opendc.compute.simulator.telemetry.HostGpuStats import org.opendc.compute.simulator.telemetry.HostSystemStats -import org.opendc.simulator.compute.cpu.CpuPowerModel import org.opendc.simulator.compute.machine.SimMachine import org.opendc.simulator.compute.models.MachineModel import org.opendc.simulator.compute.models.MemoryUnit +import org.opendc.simulator.compute.power.PowerModel import org.opendc.simulator.engine.engine.FlowEngine import org.opendc.simulator.engine.graph.FlowDistributor import java.time.Duration @@ -57,7 +60,8 @@ public class SimHost( private val clock: InstantSource, private val engine: FlowEngine, private val machineModel: MachineModel, - private val cpuPowerModel: CpuPowerModel, + private val cpuPowerModel: PowerModel, + private val gpuPowerModel: PowerModel?, private val embodiedCarbon: Double, private val expectedLifetime: Double, private val powerDistributor: FlowDistributor, @@ -81,11 +85,22 @@ public class SimHost( field = value } + private val gpuHostModels: List<GpuHostModel>? = + machineModel.gpuModels?.map { gpumodel -> + return@map GpuHostModel( + gpumodel.totalCoreCapacity, + gpumodel.coreCount, + gpumodel.memorySize, + gpumodel.memoryBandwidth, + ) + } + private val model: HostModel = HostModel( machineModel.cpuModel.totalCapacity, machineModel.cpuModel.coreCount, machineModel.memory.size, + gpuHostModels, ) private var simMachine: SimMachine? = null @@ -136,6 +151,7 @@ public class SimHost( this.machineModel, this.powerDistributor, this.cpuPowerModel, + this.gpuPowerModel, ) { cause -> hostState = if (cause != null) HostState.ERROR else HostState.DOWN } @@ -207,7 +223,7 @@ public class SimHost( public fun canFit(task: ServiceTask): Boolean { val sufficientMemory = model.memoryCapacity >= task.flavor.memorySize - val enoughCpus = model.coreCount >= task.flavor.coreCount + val enoughCpus = model.coreCount >= task.flavor.cpuCoreCount val canFit = simMachine!!.canFit(task.flavor.toMachineModel()) return sufficientMemory && enoughCpus && canFit @@ -324,14 +340,14 @@ public class SimHost( val counters = simMachine!!.performanceCounters return HostCpuStats( - counters.cpuActiveTime, - counters.cpuIdleTime, - counters.cpuStealTime, - counters.cpuLostTime, - counters.cpuCapacity, - counters.cpuDemand, - counters.cpuSupply, - counters.cpuSupply / cpuLimit, + counters.activeTime, + counters.idleTime, + counters.stealTime, + counters.lostTime, + counters.capacity, + counters.demand, + counters.supply, + counters.supply / cpuLimit, ) } @@ -340,6 +356,33 @@ public class SimHost( return guest.getCpuStats() } + public fun getGpuStats(): List<HostGpuStats> { + val gpuStats = mutableListOf<HostGpuStats>() + for (gpu in simMachine!!.gpus) { + gpu.updateCounters(this.clock.millis()) + val counters = simMachine!!.getGpuPerformanceCounters(gpu.id) + + gpuStats.add( + HostGpuStats( + counters.activeTime, + counters.idleTime, + counters.stealTime, + counters.lostTime, + counters.capacity, + counters.demand, + counters.supply, + counters.supply / gpu.getCapacity(ResourceType.GPU), + ), + ) + } + return gpuStats + } + + public fun getGpuStats(task: ServiceTask): List<GuestGpuStats> { + val guest = requireNotNull(taskToGuestMap[task]) { "Unknown task ${task.name} at host $name" } + return guest.getGpuStats() + } + override fun hashCode(): Int = name.hashCode() override fun equals(other: Any?): Boolean { @@ -352,7 +395,13 @@ public class SimHost( * Convert flavor to machine model. */ private fun Flavor.toMachineModel(): MachineModel { - return MachineModel(simMachine!!.machineModel.cpuModel, MemoryUnit("Generic", "Generic", 3200.0, memorySize)) + return MachineModel( + simMachine!!.machineModel.cpuModel, + MemoryUnit("Generic", "Generic", 3200.0, memorySize), + simMachine!!.machineModel.gpuModels, + simMachine!!.machineModel.cpuDistributionStrategy, + simMachine!!.machineModel.gpuDistributionStrategy, + ) } /** diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt index fe8cbf2f..a980f6cb 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt @@ -27,6 +27,7 @@ import org.opendc.compute.api.TaskState import org.opendc.compute.simulator.host.SimHost import org.opendc.compute.simulator.service.ServiceTask import org.opendc.compute.simulator.telemetry.GuestCpuStats +import org.opendc.compute.simulator.telemetry.GuestGpuStats import org.opendc.compute.simulator.telemetry.GuestSystemStats import org.opendc.simulator.compute.machine.SimMachine import org.opendc.simulator.compute.workload.ChainWorkload @@ -64,6 +65,7 @@ public class Guest( private var lastReport = clock.millis() private var bootTime: Instant? = null private val cpuLimit = simMachine.cpu.cpuModel.totalCapacity + private val gpuLimit = simMachine.gpus?.firstOrNull()?.gpuModel?.totalCoreCapacity ?: 0.0 /** * Start the guest. @@ -242,20 +244,43 @@ public class Guest( */ public fun getCpuStats(): GuestCpuStats { virtualMachine!!.updateCounters(this.clock.millis()) - val counters = virtualMachine!!.performanceCounters + val counters = virtualMachine!!.cpuPerformanceCounters return GuestCpuStats( - counters.cpuActiveTime / 1000L, - counters.cpuIdleTime / 1000L, - counters.cpuStealTime / 1000L, - counters.cpuLostTime / 1000L, - counters.cpuCapacity, - counters.cpuSupply, - counters.cpuDemand, - counters.cpuSupply / cpuLimit, + counters.activeTime / 1000L, + counters.idleTime / 1000L, + counters.stealTime / 1000L, + counters.lostTime / 1000L, + counters.capacity, + counters.supply, + counters.demand, + counters.supply / cpuLimit, ) } + public fun getGpuStats(): List<GuestGpuStats> { + virtualMachine!!.updateCounters(this.clock.millis()) + val counters = virtualMachine!!.gpuPerformanceCounters + + val gpuStats = mutableListOf<GuestGpuStats>() + for (gpuCounter in counters) { + gpuStats.add( + GuestGpuStats( + gpuCounter.activeTime / 1000L, + gpuCounter.idleTime / 1000L, + gpuCounter.stealTime / 1000L, + gpuCounter.lostTime / 1000L, + gpuCounter.capacity, + gpuCounter.supply, + gpuCounter.demand, + // Assuming similar scaling as CPU + gpuCounter.supply / gpuLimit, + ), + ) + } + return gpuStats + } + /** * Helper function to track the uptime and downtime of the guest. */ diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt index 675ce3a9..791ab692 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt @@ -127,6 +127,7 @@ public class HostsProvisioningStep internal constructor( engine, hostSpec.model, hostSpec.cpuPowerModel, + hostSpec.gpuPowerModel, hostSpec.embodiedCarbon, hostSpec.expectedLifetime, hostDistributor, diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt index e70cec58..0376a492 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt @@ -27,11 +27,13 @@ package org.opendc.compute.simulator.scheduler import org.opendc.compute.simulator.scheduler.filters.ComputeFilter import org.opendc.compute.simulator.scheduler.filters.RamFilter import org.opendc.compute.simulator.scheduler.filters.VCpuFilter +import org.opendc.compute.simulator.scheduler.filters.VGpuFilter import org.opendc.compute.simulator.scheduler.timeshift.TimeshiftScheduler import org.opendc.compute.simulator.scheduler.weights.CoreRamWeigher import org.opendc.compute.simulator.scheduler.weights.InstanceCountWeigher import org.opendc.compute.simulator.scheduler.weights.RamWeigher import org.opendc.compute.simulator.scheduler.weights.VCpuWeigher +import org.opendc.compute.simulator.scheduler.weights.VGpuWeigher import java.time.InstantSource import java.util.SplittableRandom import java.util.random.RandomGenerator @@ -48,6 +50,8 @@ public enum class ComputeSchedulerEnum { Random, TaskNumMemorizing, Timeshift, + ProvisionedCpuGpuCores, + ProvisionedCpuGpuCoresInv, } public fun createPrefabComputeScheduler( @@ -68,6 +72,7 @@ public fun createPrefabComputeScheduler( ): ComputeScheduler { val cpuAllocationRatio = 1.0 val ramAllocationRatio = 1.5 + val gpuAllocationRatio = 1.0 return when (name) { ComputeSchedulerEnum.Mem -> FilterScheduler( @@ -128,5 +133,31 @@ public fun createPrefabComputeScheduler( clock = clock, random = SplittableRandom(seeder.nextLong()), ) + ComputeSchedulerEnum.ProvisionedCpuGpuCores -> + FilterScheduler( + filters = + listOf( + ComputeFilter(), + VCpuFilter(cpuAllocationRatio), + VGpuFilter(gpuAllocationRatio), + RamFilter(ramAllocationRatio), + ), + weighers = listOf(VCpuWeigher(cpuAllocationRatio, multiplier = 1.0), VGpuWeigher(gpuAllocationRatio, multiplier = 1.0)), + ) + ComputeSchedulerEnum.ProvisionedCpuGpuCoresInv -> + FilterScheduler( + filters = + listOf( + ComputeFilter(), + VCpuFilter(cpuAllocationRatio), + VGpuFilter(gpuAllocationRatio), + RamFilter(ramAllocationRatio), + ), + weighers = + listOf( + VCpuWeigher(cpuAllocationRatio, multiplier = -1.0), + VGpuWeigher(gpuAllocationRatio, multiplier = -1.0), + ), + ) } } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt index 4e63baf4..7fa7a051 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt @@ -40,7 +40,7 @@ public class VCpuCapacityFilter : HostFilter { return ( requiredCapacity == null || (availableCapacity / host.host.getModel().coreCount) - >= (requiredCapacity / task.flavor.coreCount) + >= (requiredCapacity / task.flavor.cpuCoreCount) ) } } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt index c179a7bf..89739658 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt @@ -35,7 +35,7 @@ public class VCpuFilter(private val allocationRatio: Double) : HostFilter { host: HostView, task: ServiceTask, ): Boolean { - val requested = task.flavor.coreCount + val requested = task.flavor.cpuCoreCount val totalCores = host.host.getModel().coreCount val limit = totalCores * allocationRatio @@ -44,7 +44,7 @@ public class VCpuFilter(private val allocationRatio: Double) : HostFilter { return false } - val availableCores = limit - host.provisionedCores + val availableCores = limit - host.provisionedCpuCores return availableCores >= requested } } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt new file mode 100644 index 00000000..6dc27327 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.scheduler.filters + +import org.opendc.compute.simulator.service.HostView +import org.opendc.compute.simulator.service.ServiceTask +import kotlin.collections.maxOfOrNull + +/** + * A [HostFilter] that filters hosts based on the vCPU speed requirements of a [ServiceTask] and the available + * capacity on the host. + */ +public class VGpuCapacityFilter : HostFilter { + override fun test( + host: HostView, + task: ServiceTask, + ): Boolean { + val requiredCapacity = task.flavor.meta["gpu-capacity"] as? Double + val availableCapacity = (host.host.getModel().gpuHostModels().maxOfOrNull { it.gpuCoreCapacity() } ?: 0).toDouble() + val availableCores = (host.host.getModel().gpuHostModels().maxOfOrNull { it -> it.gpuCoreCount } ?: -1).toDouble() + val availableRatio = availableCapacity / availableCores + + return ( + requiredCapacity == null || + ((availableCapacity / availableCores) >= (requiredCapacity / task.flavor.gpuCoreCount)) + ) + } +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt new file mode 100644 index 00000000..9f564776 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.scheduler.filters + +import org.opendc.compute.simulator.service.HostView +import org.opendc.compute.simulator.service.ServiceTask + +/** + * A [HostFilter] that filters hosts based on the vCPU requirements of a [ServiceTask] and the available vCPUs on the host. + * + * @param allocationRatio Virtual CPU to physical CPU allocation ratio. + */ +public class VGpuFilter(private val allocationRatio: Double) : HostFilter { + override fun test( + host: HostView, + task: ServiceTask, + ): Boolean { + val requested = task.flavor.gpuCoreCount + val totalCores = host.host.getModel().gpuHostModels().maxOfOrNull { it.gpuCoreCount() } ?: 0 + val limit = totalCores * allocationRatio + + // Do not allow an instance to overcommit against itself, only against other instances + if (requested > totalCores) { + return false + } + + val availableCores = limit - host.provisionedGpuCores + return availableCores >= requested + } +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt index 4f52e11a..d9b094fb 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt @@ -35,7 +35,7 @@ public class VCpuCapacityWeigher(override val multiplier: Double = 1.0) : HostWe ): Double { val model = host.host.getModel() val requiredCapacity = task.flavor.meta["cpu-capacity"] as? Double ?: 0.0 - return model.cpuCapacity - requiredCapacity / task.flavor.coreCount + return model.cpuCapacity - requiredCapacity / task.flavor.cpuCoreCount } override fun toString(): String = "VCpuWeigher" diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt index 3f9a7f03..d882c237 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt @@ -39,7 +39,7 @@ public class VCpuWeigher(private val allocationRatio: Double, override val multi host: HostView, task: ServiceTask, ): Double { - return allocationRatio - host.provisionedCores + return allocationRatio - host.provisionedCpuCores } override fun toString(): String = "VCpuWeigher" diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt new file mode 100644 index 00000000..35f2c7b9 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.scheduler.weights + +import org.opendc.compute.simulator.service.HostView +import org.opendc.compute.simulator.service.ServiceTask + +/** + * A [HostWeigher] that weighs the hosts based on the difference required vCPU capacity and the available CPU capacity. + */ +public class VGpuCapacityWeigher(override val multiplier: Double = 1.0) : HostWeigher { + override fun getWeight( + host: HostView, + task: ServiceTask, + ): Double { + val model = host.host.getModel() + val requiredCapacity = task.flavor.meta["gpu-capacity"] as? Double ?: 0.0 + val availableCapacity = model.gpuHostModels.maxOfOrNull { it.gpuCoreCapacity } ?: 0.0 + return availableCapacity - requiredCapacity / task.flavor.gpuCoreCount + } + + override fun toString(): String = "VGpuWeigher" +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt new file mode 100644 index 00000000..7397bf10 --- /dev/null +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.compute.simulator.scheduler.weights + +import org.opendc.compute.simulator.service.HostView +import org.opendc.compute.simulator.service.ServiceTask + +/** + * A [HostWeigher] that weighs the hosts based on the remaining number of vCPUs available. + * + * @param allocationRatio Virtual CPU to physical CPU allocation ratio. + */ +public class VGpuWeigher(private val allocationRatio: Double, override val multiplier: Double = 1.0) : HostWeigher { + init { + require(allocationRatio > 0.0) { "Allocation ratio must be greater than zero" } + } + + override fun getWeight( + host: HostView, + task: ServiceTask, + ): Double { + return allocationRatio - host.provisionedGpuCores + } + + override fun toString(): String = "VGpuWeigher" +} diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt index 00f7854d..affaab58 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt @@ -144,6 +144,48 @@ public object DfltHostExportColumns { field = Types.required(INT64).named("cpu_time_lost"), ) { it.cpuLostTime } + // TODO: support multiple GPUs + + public val GPU_CAPACITY: ExportColumn<HostTableReader> = + ExportColumn( + field = Types.optional(FLOAT).named("gpu_capacity"), + ) { it.gpuCapacities.getOrNull(0) } + + public val GPU_USAGE: ExportColumn<HostTableReader> = + ExportColumn( + field = Types.optional(FLOAT).named("gpu_usage"), + ) { it.gpuUsages.getOrNull(0) } + + public val GPU_DEMAND: ExportColumn<HostTableReader> = + ExportColumn( + field = Types.optional(FLOAT).named("gpu_demand"), + ) { it.gpuDemands.getOrNull(0) } + + public val GPU_UTILIZATION: ExportColumn<HostTableReader> = + ExportColumn( + field = Types.optional(FLOAT).named("gpu_utilization"), + ) { it.gpuUtilizations.getOrNull(0) } + + public val GPU_TIME_ACTIVE: ExportColumn<HostTableReader> = + ExportColumn( + field = Types.optional(INT64).named("gpu_time_active"), + ) { it.gpuActiveTimes.getOrNull(0) } + + public val GPU_TIME_IDLE: ExportColumn<HostTableReader> = + ExportColumn( + field = Types.optional(INT64).named("gpu_time_idle"), + ) { it.gpuIdleTimes.getOrNull(0) } + + public val GPU_TIME_STEAL: ExportColumn<HostTableReader> = + ExportColumn( + field = Types.optional(INT64).named("gpu_time_steal"), + ) { it.gpuStealTimes.getOrNull(0) } + + public val GPU_TIME_LOST: ExportColumn<HostTableReader> = + ExportColumn( + field = Types.optional(INT64).named("gpu_time_lost"), + ) { it.gpuLostTimes.getOrNull(0) } + public val POWER_DRAW: ExportColumn<HostTableReader> = ExportColumn( field = Types.required(FLOAT).named("power_draw"), diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt index f533eb1f..ad7a1d52 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt @@ -132,6 +132,43 @@ public object DfltTaskExportColumns { field = Types.required(INT64).named("cpu_time_lost"), ) { it.cpuLostTime } + // TODO: support multiple GPUs + + public val GPU_CAPACITY: ExportColumn<TaskTableReader> = + ExportColumn( + field = Types.optional(FLOAT).named("gpu_capacity"), + ) { it.gpuLimits?.getOrNull(0) } + + public val GPU_USAGE: ExportColumn<TaskTableReader> = + ExportColumn( + field = Types.optional(FLOAT).named("gpu_usage"), + ) { it.gpuUsages?.getOrNull(0) } + + public val GPU_DEMAND: ExportColumn<TaskTableReader> = + ExportColumn( + field = Types.optional(FLOAT).named("gpu_demand"), + ) { it.gpuDemands?.getOrNull(0) } + + public val GPU_TIME_ACTIVE: ExportColumn<TaskTableReader> = + ExportColumn( + field = Types.optional(INT64).named("gpu_time_active"), + ) { it.gpuActiveTimes?.getOrNull(0) } + + public val GPU_TIME_IDLE: ExportColumn<TaskTableReader> = + ExportColumn( + field = Types.optional(INT64).named("gpu_time_idle"), + ) { it.gpuIdleTimes?.getOrNull(0) } + + public val GPU_TIME_STEAL: ExportColumn<TaskTableReader> = + ExportColumn( + field = Types.optional(INT64).named("gpu_time_steal"), + ) { it.gpuStealTimes?.getOrNull(0) } + + public val GPU_TIME_LOST: ExportColumn<TaskTableReader> = + ExportColumn( + field = Types.optional(INT64).named("gpu_time_lost"), + ) { it.gpuLostTimes?.getOrNull(0) } + public val UP_TIME: ExportColumn<TaskTableReader> = ExportColumn( field = Types.required(INT64).named("uptime"), diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt index a626c41b..4fb930e1 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt @@ -44,6 +44,7 @@ public class ParquetComputeMonitor( private val batteryExporter: Exporter<BatteryTableReader>?, private val serviceExporter: Exporter<ServiceTableReader>?, ) : ComputeMonitor, AutoCloseable { + // FIXME: Include GPU override fun record(reader: HostTableReader) { hostExporter?.write(reader) } diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt index ff0115df..fbffd508 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt @@ -113,6 +113,51 @@ public interface HostTableReader : Exportable { public val cpuLostTime: Long /** + * The capacity of the CPUs in the host (in MHz). + */ + public val gpuCapacities: ArrayList<Double> + + /** + * The capacity of the GPUs in the host (in MHz). They inserted by GPU ID. + */ + public val gpuLimits: ArrayList<Double> + + /** + * The usage per GPU in the host (in MHz). They inserted by GPU ID + */ + public val gpuUsages: ArrayList<Double> + + /** + * The demand per GPU of the guests (in MHz). They inserted by GPU ID + */ + public val gpuDemands: ArrayList<Double> + + /** + * The GPU utilization of the host of each GPU. They inserted by GPU ID. + */ + public val gpuUtilizations: ArrayList<Double> + + /** + * The duration (in ms) that the respective GPU was active in the host. They inserted by GPU ID + */ + public val gpuActiveTimes: ArrayList<Long> + + /** + * The duration (in ms) that a GPU was idle in the host. They inserted by GPU ID + */ + public val gpuIdleTimes: ArrayList<Long> + + /** + * The duration (in ms) that a vGPU wanted to run, but no capacity was available. They inserted by GPU ID. + */ + public val gpuStealTimes: ArrayList<Long> + + /** + * The duration (in ms) of GPU time that was lost due to interference. They inserted by GPU ID + */ + public val gpuLostTimes: ArrayList<Long> + + /** * The current power draw of the host in W. */ public val powerDraw: Double diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt index 6e1dac48..cb25358a 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt @@ -49,6 +49,7 @@ public class HostTableReaderImpl( _tasksActive = table.tasksActive _guestsError = table.guestsError _guestsInvalid = table.guestsInvalid + // CPU stats _cpuCapacity = table.cpuCapacity _cpuDemand = table.cpuDemand _cpuUsage = table.cpuUsage @@ -57,6 +58,18 @@ public class HostTableReaderImpl( _cpuIdleTime = table.cpuIdleTime _cpuStealTime = table.cpuStealTime _cpuLostTime = table.cpuLostTime + // GPU stats + _gpuCapacities = table.gpuCapacities + _gpuLimits = table.gpuLimits + _gpuDemands = table.gpuDemands + _gpuUsages = table.gpuUsages + _gpuUtilizations = table.gpuUtilizations + _gpuActiveTimes = table.gpuActiveTimes + _gpuIdleTimes = table.gpuIdleTimes + _gpuStealTimes = table.gpuStealTimes + _gpuLostTimes = table.gpuLostTimes + + // energy & carbon stats _powerDraw = table.powerDraw _energyUsage = table.energyUsage _embodiedCarbon = table.embodiedCarbon @@ -135,6 +148,65 @@ public class HostTableReaderImpl( private var _cpuLostTime = 0L private var previousCpuLostTime = 0L + override val gpuCapacities: ArrayList<Double> + get() = _gpuCapacities + private var _gpuCapacities: ArrayList<Double> = ArrayList() + + override val gpuLimits: ArrayList<Double> + get() = _gpuLimits + private var _gpuLimits: ArrayList<Double> = ArrayList() + + override val gpuUsages: ArrayList<Double> + get() = _gpuUsages + private var _gpuUsages: ArrayList<Double> = ArrayList() + + override val gpuDemands: ArrayList<Double> + get() = _gpuDemands + private var _gpuDemands: ArrayList<Double> = ArrayList() + + override val gpuUtilizations: ArrayList<Double> + get() = _gpuUtilizations + private var _gpuUtilizations: ArrayList<Double> = ArrayList() + + // half of the CPU stats + override val gpuActiveTimes: ArrayList<Long> +// get() = _gpuActiveTimes.zip(previousGpuActiveTimes) { current, previous -> current - previous} as ArrayList<Long> + get() = + (0 until _gpuActiveTimes.size).map { + i -> + (_gpuActiveTimes.getOrNull(i) ?: 0L) - (previousGpuActiveTimes.getOrNull(i) ?: 0L) + } as ArrayList<Long> + private var _gpuActiveTimes: ArrayList<Long> = ArrayList() + private var previousGpuActiveTimes: ArrayList<Long> = ArrayList() + + override val gpuIdleTimes: ArrayList<Long> +// get() = _gpuIdleTimes.zip(previousGpuIdleTimes) { current, previous -> current - previous} as ArrayList<Long> + get() = + (0 until _gpuIdleTimes.size).map { + i -> + (_gpuIdleTimes.getOrNull(i) ?: 0L) - (previousGpuIdleTimes.getOrNull(i) ?: 0L) + } as ArrayList<Long> + private var _gpuIdleTimes: ArrayList<Long> = ArrayList() + private var previousGpuIdleTimes: ArrayList<Long> = ArrayList() + + override val gpuStealTimes: ArrayList<Long> + get() = + (0 until _gpuStealTimes.size).map { + i -> + (_gpuStealTimes.getOrNull(i) ?: 0L) - (previousGpuStealTimes.getOrNull(i) ?: 0L) + } as ArrayList<Long> + private var _gpuStealTimes: ArrayList<Long> = ArrayList() + private var previousGpuStealTimes: ArrayList<Long> = ArrayList() + + override val gpuLostTimes: ArrayList<Long> + get() = + (0 until _gpuLostTimes.size).map { + i -> + (_gpuLostTimes.getOrNull(i) ?: 0L) - (previousGpuLostTimes.getOrNull(i) ?: 0L) + } as ArrayList<Long> + private var _gpuLostTimes: ArrayList<Long> = ArrayList() + private var previousGpuLostTimes: ArrayList<Long> = ArrayList() + override val powerDraw: Double get() = _powerDraw private var _powerDraw = 0.0 @@ -168,6 +240,7 @@ public class HostTableReaderImpl( override fun record(now: Instant) { val hostCpuStats = host.getCpuStats() val hostSysStats = host.getSystemStats() + val hostGpuStats = host.getGpuStats() _timestamp = now _timestampAbsolute = now + startTime @@ -184,6 +257,16 @@ public class HostTableReaderImpl( _cpuIdleTime = hostCpuStats.idleTime _cpuStealTime = hostCpuStats.stealTime _cpuLostTime = hostCpuStats.lostTime + // GPU stats + _gpuLimits = hostGpuStats.map { it.capacity } as ArrayList<Double> + _gpuDemands = hostGpuStats.map { it.demand } as ArrayList<Double> + _gpuUsages = hostGpuStats.map { it.usage } as ArrayList<Double> + _gpuUtilizations = hostGpuStats.map { it.utilization } as ArrayList<Double> + _gpuActiveTimes = hostGpuStats.map { it.activeTime } as ArrayList<Long> + _gpuIdleTimes = hostGpuStats.map { it.idleTime } as ArrayList<Long> + _gpuStealTimes = hostGpuStats.map { it.stealTime } as ArrayList<Long> + _gpuLostTimes = hostGpuStats.map { it.lostTime } as ArrayList<Long> + // energy & carbon stats _powerDraw = hostSysStats.powerDraw _energyUsage = hostSysStats.energyUsage _embodiedCarbon = hostSysStats.embodiedCarbon @@ -202,6 +285,10 @@ public class HostTableReaderImpl( previousCpuIdleTime = _cpuIdleTime previousCpuStealTime = _cpuStealTime previousCpuLostTime = _cpuLostTime + previousGpuActiveTimes = _gpuActiveTimes + previousGpuIdleTimes = _gpuIdleTimes + previousGpuStealTimes = _gpuStealTimes + previousGpuLostTimes = _gpuLostTimes previousEnergyUsage = _energyUsage previousUptime = _uptime previousDowntime = _downtime diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt index b0745dd6..f71587c7 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt @@ -32,6 +32,7 @@ import java.time.Instant * An interface that is used to read a row of a task trace entry. */ public interface TaskTableReader : Exportable { + // TODO: find better way for more resources public fun copy(): TaskTableReader public fun setValues(table: TaskTableReader) @@ -130,6 +131,41 @@ public interface TaskTableReader : Exportable { public val cpuLostTime: Long /** + * The capacity of the GPUs of Host on which the task is running (in MHz). + */ + public val gpuLimits: DoubleArray? + + /** + * The amount of GPus allocated to the task (in MHz). + */ + public val gpuUsages: DoubleArray? + + /** + * The GPU demanded by this task (in MHz). + */ + public val gpuDemands: DoubleArray? + + /** + * The duration (in seconds) that a GPU was active in the task. + */ + public val gpuActiveTimes: LongArray? + + /** + * The duration (in seconds) that a GPU was idle in the task. + */ + public val gpuIdleTimes: LongArray? + + /** + * The duration (in seconds) that a vGPU wanted to run, but no capacity was available. + */ + public val gpuStealTimes: LongArray? + + /** + * The duration (in seconds) of GPU time that was lost due to interference. + */ + public val gpuLostTimes: LongArray? + + /** * The state of the task */ public val taskState: TaskState? diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt index d63202a9..6128c9a2 100644 --- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt +++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt @@ -63,6 +63,15 @@ public class TaskTableReaderImpl( _cpuIdleTime = table.cpuIdleTime _cpuStealTime = table.cpuStealTime _cpuLostTime = table.cpuLostTime + // GPU stats + _gpuLimits = table.gpuLimits + _gpuDemands = table.gpuDemands + _gpuUsages = table.gpuUsages + _gpuActiveTimes = table.gpuActiveTimes + _gpuIdleTimes = table.gpuIdleTimes + _gpuStealTimes = table.gpuStealTimes + _gpuLostTimes = table.gpuLostTimes + _uptime = table.uptime _downtime = table.downtime _numFailures = table.numFailures @@ -84,7 +93,7 @@ public class TaskTableReaderImpl( task.name, "vm", "x86", - task.flavor.coreCount, + task.flavor.cpuCoreCount, task.flavor.memorySize, ) @@ -168,6 +177,74 @@ public class TaskTableReaderImpl( private var _cpuLostTime = 0L private var previousCpuLostTime = 0L + override val gpuLimits: DoubleArray? + get() = _gpuLimits ?: DoubleArray(0) + private var _gpuLimits: DoubleArray? = null + + override val gpuUsages: DoubleArray? + get() = _gpuUsages ?: DoubleArray(0) + private var _gpuUsages: DoubleArray? = null + + override val gpuDemands: DoubleArray? + get() = _gpuDemands ?: DoubleArray(0) + private var _gpuDemands: DoubleArray? = null + + override val gpuActiveTimes: LongArray? + get() { + val current = _gpuActiveTimes ?: return LongArray(0) + val previous = previousGpuActiveTimes + + return if (previous == null || current.size != previous.size) { // not sure if I like the second clause + current + } else { + LongArray(current.size) { i -> current[i] - previous[i] } + } + } + private var _gpuActiveTimes: LongArray? = null + private var previousGpuActiveTimes: LongArray? = null + + override val gpuIdleTimes: LongArray? + get() { + val current = _gpuIdleTimes ?: return LongArray(0) + val previous = previousGpuIdleTimes + + return if (previous == null || current.size != previous.size) { // not sure if I like the second clause + current + } else { + LongArray(current.size) { i -> current[i] - previous[i] } + } + } + private var _gpuIdleTimes: LongArray? = null + private var previousGpuIdleTimes: LongArray? = null + + override val gpuStealTimes: LongArray? + get() { + val current = _gpuStealTimes ?: return LongArray(0) + val previous = previousGpuStealTimes + + return if (previous == null || current.size != previous.size) { + current + } else { + LongArray(current.size) { i -> current[i] - previous[i] } + } + } + private var _gpuStealTimes: LongArray? = null + private var previousGpuStealTimes: LongArray? = null + + override val gpuLostTimes: LongArray? + get() { + val current = _gpuLostTimes ?: return LongArray(0) + val previous = previousGpuLostTimes + + return if (previous == null || current.size != previous.size) { + current + } else { + LongArray(current.size) { i -> current[i] - previous[i] } + } + } + private var _gpuLostTimes: LongArray? = null + private var previousGpuLostTimes: LongArray? = null + override val taskState: TaskState? get() = _taskState private var _taskState: TaskState? = null @@ -192,6 +269,7 @@ public class TaskTableReaderImpl( val cpuStats = simHost?.getCpuStats(task) val sysStats = simHost?.getSystemStats(task) + val gpuStats = simHost?.getGpuStats(task) _hostName = task.hostName @@ -214,6 +292,26 @@ public class TaskTableReaderImpl( _scheduleTime = task.scheduledAt _finishTime = task.finishedAt + if (gpuStats != null && gpuStats.isNotEmpty()) { + val size = gpuStats.size + _gpuLimits = DoubleArray(size) { i -> gpuStats[i].capacity } + _gpuDemands = DoubleArray(size) { i -> gpuStats[i].demand } + _gpuUsages = DoubleArray(size) { i -> gpuStats[i].usage } + _gpuActiveTimes = LongArray(size) { i -> gpuStats[i].activeTime } + _gpuIdleTimes = LongArray(size) { i -> gpuStats[i].idleTime } + _gpuStealTimes = LongArray(size) { i -> gpuStats[i].stealTime } + _gpuLostTimes = LongArray(size) { i -> gpuStats[i].lostTime } + } else { + _gpuIdleTimes = null + _gpuStealTimes = null + _gpuLostTimes = null + _gpuIdleTimes = null + _gpuLimits = null + _gpuUsages = null + _gpuDemands = null + _gpuActiveTimes = null + } + _taskState = task.state } @@ -227,6 +325,10 @@ public class TaskTableReaderImpl( previousCpuIdleTime = _cpuIdleTime previousCpuStealTime = _cpuStealTime previousCpuLostTime = _cpuLostTime + previousGpuActiveTimes = _gpuActiveTimes + previousGpuIdleTimes = _gpuIdleTimes + previousGpuStealTimes = _gpuStealTimes + previousGpuLostTimes = _gpuLostTimes simHost = null _cpuLimit = 0.0 diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt index 04a20f49..5109f828 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt @@ -78,7 +78,7 @@ internal class FilterSchedulerTest { ) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -103,7 +103,7 @@ internal class FilterSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -134,7 +134,7 @@ internal class FilterSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -159,7 +159,7 @@ internal class FilterSchedulerTest { scheduler.addHost(host) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -180,7 +180,7 @@ internal class FilterSchedulerTest { scheduler.addHost(host) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -209,7 +209,7 @@ internal class FilterSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -232,7 +232,7 @@ internal class FilterSchedulerTest { scheduler.addHost(host) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 2300 every { req.isCancelled } returns false @@ -250,18 +250,18 @@ internal class FilterSchedulerTest { val hostA = mockk<HostView>() every { hostA.host.getState() } returns HostState.UP every { hostA.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) - every { hostA.provisionedCores } returns 3 + every { hostA.provisionedCpuCores } returns 3 val hostB = mockk<HostView>() every { hostB.host.getState() } returns HostState.UP every { hostB.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) - every { hostB.provisionedCores } returns 0 + every { hostB.provisionedCpuCores } returns 0 scheduler.addHost(hostA) scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -279,12 +279,12 @@ internal class FilterSchedulerTest { val host = mockk<HostView>() every { host.host.getState() } returns HostState.UP every { host.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) - every { host.provisionedCores } returns 0 + every { host.provisionedCpuCores } returns 0 scheduler.addHost(host) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 8 + every { req.task.flavor.cpuCoreCount } returns 8 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -312,7 +312,7 @@ internal class FilterSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.task.flavor.meta } returns mapOf("cpu-capacity" to 2 * 3200.0) every { req.isCancelled } returns false @@ -342,7 +342,7 @@ internal class FilterSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -358,7 +358,7 @@ internal class FilterSchedulerTest { ) val reqA = mockk<SchedulingRequest>() - every { reqA.task.flavor.coreCount } returns 2 + every { reqA.task.flavor.cpuCoreCount } returns 2 every { reqA.task.flavor.memorySize } returns 1024 every { reqA.isCancelled } returns false val taskA = mockk<ServiceTask>() @@ -369,19 +369,19 @@ internal class FilterSchedulerTest { every { hostA.host.getState() } returns HostState.UP every { hostA.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) every { hostA.host.getInstances() } returns emptySet() - every { hostA.provisionedCores } returns 3 + every { hostA.provisionedCpuCores } returns 3 val hostB = mockk<HostView>() every { hostB.host.getState() } returns HostState.UP every { hostB.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) every { hostB.host.getInstances() } returns setOf(reqA.task) - every { hostB.provisionedCores } returns 0 + every { hostB.provisionedCpuCores } returns 0 scheduler.addHost(hostA) scheduler.addHost(hostB) val reqB = mockk<SchedulingRequest>() - every { reqB.task.flavor.coreCount } returns 2 + every { reqB.task.flavor.cpuCoreCount } returns 2 every { reqB.task.flavor.memorySize } returns 1024 every { reqB.task.meta } returns emptyMap() every { reqB.isCancelled } returns false @@ -402,7 +402,7 @@ internal class FilterSchedulerTest { ) val reqA = mockk<SchedulingRequest>() - every { reqA.task.flavor.coreCount } returns 2 + every { reqA.task.flavor.cpuCoreCount } returns 2 every { reqA.task.flavor.memorySize } returns 1024 every { reqA.isCancelled } returns false val taskA = mockk<ServiceTask>() @@ -413,19 +413,19 @@ internal class FilterSchedulerTest { every { hostA.host.getState() } returns HostState.UP every { hostA.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) every { hostA.host.getInstances() } returns setOf(reqA.task) - every { hostA.provisionedCores } returns 3 + every { hostA.provisionedCpuCores } returns 3 val hostB = mockk<HostView>() every { hostB.host.getState() } returns HostState.UP every { hostB.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) every { hostB.host.getInstances() } returns emptySet() - every { hostB.provisionedCores } returns 0 + every { hostB.provisionedCpuCores } returns 0 scheduler.addHost(hostA) scheduler.addHost(hostB) val reqB = mockk<SchedulingRequest>() - every { reqB.task.flavor.coreCount } returns 2 + every { reqB.task.flavor.cpuCoreCount } returns 2 every { reqB.task.flavor.memorySize } returns 1024 every { reqB.task.meta } returns emptyMap() every { reqB.isCancelled } returns false @@ -459,7 +459,7 @@ internal class FilterSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -488,7 +488,7 @@ internal class FilterSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -506,18 +506,18 @@ internal class FilterSchedulerTest { val hostA = mockk<HostView>() every { hostA.host.getState() } returns HostState.UP every { hostA.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) - every { hostA.provisionedCores } returns 2 + every { hostA.provisionedCpuCores } returns 2 val hostB = mockk<HostView>() every { hostB.host.getState() } returns HostState.UP every { hostB.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048) - every { hostB.provisionedCores } returns 0 + every { hostB.provisionedCpuCores } returns 0 scheduler.addHost(hostA) scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -546,7 +546,7 @@ internal class FilterSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt index 92d5008b..6b9b0048 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt @@ -43,7 +43,7 @@ internal class MemorizingSchedulerTest { ) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -67,7 +67,7 @@ internal class MemorizingSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false @@ -101,7 +101,7 @@ internal class MemorizingSchedulerTest { scheduler.addHost(hostB) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false val skipped = slot<Int>() @@ -129,7 +129,7 @@ internal class MemorizingSchedulerTest { scheduler.addHost(host) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 2300 every { req.isCancelled } returns false val skipped = slot<Int>() diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt index 46c6425e..02f83eaf 100644 --- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt +++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt @@ -48,7 +48,7 @@ class TimeshiftSchedulerTest { ) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false every { req.task.nature } returns TaskNature(true) @@ -76,7 +76,7 @@ class TimeshiftSchedulerTest { ) val req = mockk<SchedulingRequest>() - every { req.task.flavor.coreCount } returns 2 + every { req.task.flavor.cpuCoreCount } returns 2 every { req.task.flavor.memorySize } returns 1024 every { req.isCancelled } returns false every { req.task.nature } returns TaskNature(true) diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt index b6c945d2..b52608a9 100644 --- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt +++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt @@ -31,10 +31,13 @@ import org.opendc.compute.topology.specs.HostJSONSpec import org.opendc.compute.topology.specs.HostSpec import org.opendc.compute.topology.specs.PowerSourceSpec import org.opendc.compute.topology.specs.TopologySpec -import org.opendc.simulator.compute.cpu.getPowerModel import org.opendc.simulator.compute.models.CpuModel +import org.opendc.simulator.compute.models.GpuModel import org.opendc.simulator.compute.models.MachineModel import org.opendc.simulator.compute.models.MemoryUnit +import org.opendc.simulator.compute.power.getPowerModel +import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicyFactory +import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicyFactory.DistributionPolicyType import java.io.File import java.io.InputStream @@ -166,29 +169,63 @@ private fun HostJSONSpec.toHostSpec(clusterName: String): HostSpec { } val unknownMemoryUnit = MemoryUnit(memory.vendor, memory.modelName, memory.memorySpeed.toMHz(), memory.memorySize.toMiB().toLong()) + val gpuUnits = + List(gpu?.count ?: 0) { + GpuModel( + globalCoreId++, + gpu!!.coreCount, + gpu.coreSpeed.toMHz(), + gpu.memoryBandwidth.toKibps(), + gpu.memorySize.toMiB().toLong(), + gpu.vendor, + gpu.modelName, + gpu.architecture, + ) + } + val machineModel = MachineModel( units, unknownMemoryUnit, + gpuUnits, + // TODO: Pass through + DistributionPolicyFactory.getDistributionStrategy(DistributionPolicyType.MaxMinFairness), + DistributionPolicyFactory.getDistributionStrategy(DistributionPolicyType.MaxMinFairness), ) - val powerModel = + val cpuPowerModel = getPowerModel( - powerModel.modelType, - powerModel.power.toWatts(), - powerModel.maxPower.toWatts(), - powerModel.idlePower.toWatts(), - powerModel.calibrationFactor, - powerModel.asymUtil, - powerModel.dvfs, + cpuPowerModel.modelType, + cpuPowerModel.power.toWatts(), + cpuPowerModel.maxPower.toWatts(), + cpuPowerModel.idlePower.toWatts(), + cpuPowerModel.calibrationFactor, + cpuPowerModel.asymUtil, + cpuPowerModel.dvfs, ) + val gpuPowerModel = + if (gpuUnits.isEmpty()) { + null + } else { + getPowerModel( + gpuPowerModel.modelType, + gpuPowerModel.power.toWatts(), + gpuPowerModel.maxPower.toWatts(), + gpuPowerModel.idlePower.toWatts(), + gpuPowerModel.calibrationFactor, + gpuPowerModel.asymUtil, + gpuPowerModel.dvfs, + ) + } + val hostSpec = HostSpec( createUniqueName(this.name, hostNames), clusterName, machineModel, - powerModel, + cpuPowerModel, + gpuPowerModel, ) return hostSpec } diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt index e4ec89e1..30a75896 100644 --- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt +++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt @@ -22,8 +22,8 @@ package org.opendc.compute.topology.specs -import org.opendc.simulator.compute.cpu.CpuPowerModel import org.opendc.simulator.compute.models.MachineModel +import org.opendc.simulator.compute.power.PowerModel /** * Description of a physical host that will be simulated by OpenDC and host the virtual machines. @@ -36,7 +36,8 @@ public data class HostSpec( val name: String, val clusterName: String, val model: MachineModel, - val cpuPowerModel: CpuPowerModel, + val cpuPowerModel: PowerModel, + val gpuPowerModel: PowerModel?, val embodiedCarbon: Double = 1000.0, val expectedLifetime: Double = 5.0, ) diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt index 8cbf818b..62c3906a 100644 --- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt +++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt @@ -24,6 +24,7 @@ package org.opendc.compute.topology.specs import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable +import org.opendc.common.units.DataRate import org.opendc.common.units.DataSize import org.opendc.common.units.Frequency import org.opendc.common.units.Power @@ -76,7 +77,9 @@ public data class HostJSONSpec( val cpu: CPUJSONSpec, val count: Int = 1, val memory: MemoryJSONSpec, - val powerModel: PowerModelSpec = PowerModelSpec.DFLT, + val gpu: GPUJSONSpec? = null, + val cpuPowerModel: PowerModelSpec = PowerModelSpec.DFLT, + val gpuPowerModel: PowerModelSpec = PowerModelSpec.DFLT, ) /** @@ -118,6 +121,18 @@ public data class MemoryJSONSpec( ) @Serializable +public data class GPUJSONSpec( + val count: Int = 1, + val coreCount: Int, + val coreSpeed: Frequency, + val memorySize: DataSize = DataSize.ofMiB(-1), + val memoryBandwidth: DataRate = DataRate.ofKibps(-1), + val vendor: String = "unknown", + val modelName: String = "unknown", + val architecture: String = "unknown", +) + +@Serializable public data class PowerModelSpec( val modelType: String, val power: Power = Power.ofWatts(400), diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt index 80996c0e..7599d4e1 100644 --- a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt +++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt @@ -33,11 +33,15 @@ import org.opendc.trace.conv.resourceCpuCapacity import org.opendc.trace.conv.resourceCpuCount import org.opendc.trace.conv.resourceDeadline import org.opendc.trace.conv.resourceDuration +import org.opendc.trace.conv.resourceGpuCapacity +import org.opendc.trace.conv.resourceGpuCount +import org.opendc.trace.conv.resourceGpuMemCapacity import org.opendc.trace.conv.resourceID import org.opendc.trace.conv.resourceMemCapacity import org.opendc.trace.conv.resourceNature import org.opendc.trace.conv.resourceStateCpuUsage import org.opendc.trace.conv.resourceStateDuration +import org.opendc.trace.conv.resourceStateGpuUsage import org.opendc.trace.conv.resourceSubmissionTime import java.io.File import java.lang.ref.SoftReference @@ -79,6 +83,8 @@ public class ComputeWorkloadLoader( val durationCol = reader.resolve(resourceStateDuration) val coresCol = reader.resolve(resourceCpuCount) val usageCol = reader.resolve(resourceStateCpuUsage) + val gpuCoresCol = reader.resolve(resourceGpuCount) + val resourceGpuCapacityCol = reader.resolve(resourceStateGpuUsage) val fragments = mutableMapOf<String, Builder>() @@ -88,12 +94,23 @@ public class ComputeWorkloadLoader( val durationMs = reader.getDuration(durationCol)!! val cores = reader.getInt(coresCol) val cpuUsage = reader.getDouble(usageCol) + val gpuUsage = + if (reader.getDouble( + resourceGpuCapacityCol, + ).isNaN() + ) { + 0.0 + } else { + reader.getDouble(resourceGpuCapacityCol) // Default to 0 if not present + } + val gpuCores = reader.getInt(gpuCoresCol) // Default to 0 if not present + val gpuMemory = 0L // Default to 0 if not present val builder = fragments.computeIfAbsent( id, ) { Builder(checkpointInterval, checkpointDuration, checkpointIntervalScaling, scalingPolicy, id) } - builder.add(durationMs, cpuUsage, cores) + builder.add(durationMs, cpuUsage, cores, gpuUsage, gpuCores, gpuMemory) } fragments @@ -117,6 +134,9 @@ public class ComputeWorkloadLoader( val cpuCountCol = reader.resolve(resourceCpuCount) val cpuCapacityCol = reader.resolve(resourceCpuCapacity) val memCol = reader.resolve(resourceMemCapacity) + val gpuCapacityCol = reader.resolve(resourceGpuCapacity) // Assuming GPU capacity is also present + val gpuCoreCountCol = reader.resolve(resourceGpuCount) // Assuming GPU cores are also present + val gpuMemoryCol = reader.resolve(resourceGpuMemCapacity) // Assuming GPU memory is also present val natureCol = reader.resolve(resourceNature) val deadlineCol = reader.resolve(resourceDeadline) @@ -135,6 +155,17 @@ public class ComputeWorkloadLoader( val cpuCount = reader.getInt(cpuCountCol) val cpuCapacity = reader.getDouble(cpuCapacityCol) val memCapacity = reader.getDouble(memCol) / 1000.0 // Convert from KB to MB + val gpuUsage = + if (reader.getDouble( + gpuCapacityCol, + ).isNaN() + ) { + 0.0 + } else { + reader.getDouble(gpuCapacityCol) // Default to 0 if not present// Default to 0 if not present + } + val gpuCoreCount = reader.getInt(gpuCoreCountCol) // Default to 0 if not present + val gpuMemory = 0L // currently not implemented val uid = UUID.nameUUIDFromBytes("$id-${counter++}".toByteArray()) var nature = reader.getString(natureCol) var deadline = reader.getLong(deadlineCol) @@ -153,6 +184,9 @@ public class ComputeWorkloadLoader( cpuCount, cpuCapacity, memCapacity.roundToLong(), + gpuCoreCount, + gpuUsage, + gpuMemory, totalLoad, submissionTime, duration, @@ -224,17 +258,23 @@ public class ComputeWorkloadLoader( * Add a fragment to the trace. * * @param duration The duration of the fragment (in epoch millis). - * @param usage CPU usage of this fragment. - * @param cores Number of cores used. + * @param cpuUsage CPU usage of this fragment. + * @param cpuCores Number of cores used. + * @param gpuUsage GPU usage of this fragment. + * @param gpuCores Number of GPU cores used. + * @param gpuMemoryUsage GPU memory usage of this fragment. */ fun add( duration: Duration, - usage: Double, - cores: Int, + cpuUsage: Double, + cpuCores: Int, + gpuUsage: Double = 0.0, + gpuCores: Int = 0, + gpuMemoryUsage: Long = 0, ) { - totalLoad += (usage * duration.toMillis()) / 1000 // avg MHz * duration = MFLOPs + totalLoad += ((cpuUsage * duration.toMillis()) + (gpuUsage * duration.toMillis())) / 1000 // avg MHz * duration = MFLOPs - builder.add(duration.toMillis(), usage, cores) + builder.add(duration.toMillis(), cpuUsage, cpuCores, gpuUsage, gpuCores, gpuMemoryUsage) } /** diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt index 787f271e..228b84ed 100644 --- a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt +++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt @@ -43,6 +43,9 @@ public data class Task( val cpuCount: Int, val cpuCapacity: Double, val memCapacity: Long, + val gpuCount: Int = 0, + val gpuCapacity: Double = 0.0, + val gpuMemCapacity: Long = 0L, val totalLoad: Double, var submissionTime: Long, val duration: Long, diff --git a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioReplayer.kt b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioReplayer.kt index d56e4e4b..72042f3c 100644 --- a/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioReplayer.kt +++ b/opendc-experiments/opendc-experiments-base/src/main/kotlin/org/opendc/experiments/base/runner/ScenarioReplayer.kt @@ -129,6 +129,15 @@ public suspend fun ComputeService.replay( TaskNature(false) } + val flavorMeta = mutableMapOf<String, Any>() + + if (entry.cpuCapacity > 0.0) { + flavorMeta["cpu-capacity"] = entry.cpuCapacity + } + if (entry.gpuCapacity > 0.0) { + flavorMeta["gpu-capacity"] = entry.gpuCapacity + } + launch { val task = client.newTask( @@ -140,7 +149,8 @@ public suspend fun ComputeService.replay( entry.name, entry.cpuCount, entry.memCapacity, - if (entry.cpuCapacity > 0.0) mapOf("cpu-capacity" to entry.cpuCapacity) else emptyMap(), + entry.gpuCount, + flavorMeta, ), workload, meta, diff --git a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/ExperimentTest.kt b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/ExperimentTest.kt index d4729350..582fdbee 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/ExperimentTest.kt +++ b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/ExperimentTest.kt @@ -66,8 +66,8 @@ class ExperimentTest { assertAll( { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals(((10 * 30000)).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals((10 * 30000).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals((10 * 30000).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect host energy usage at timestamp 0" } }, { assertEquals(600 * 150.0, monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect host energy usage" } }, { assertEquals(600 * 150.0, monitor.energyUsages.sum()) { "Incorrect total energy usage" } }, @@ -117,8 +117,8 @@ class ExperimentTest { assertAll( { assertEquals(15 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals(((10 * 30000)).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals(((10 * 30000) + (5 * 60000)).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals(((10 * 30000) + (5 * 60000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect energy usage" } }, { assertEquals((600 * 150.0) + (300 * 200.0), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect energy usage" } }, { assertEquals((600 * 150.0) + (300 * 200.0), monitor.energyUsages.sum()) { "Incorrect energy usage" } }, @@ -160,8 +160,8 @@ class ExperimentTest { assertAll( { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals(((10 * 30000)).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals(((10 * 30000)).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect energy usage" } }, { assertEquals((600 * 150.0), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect energy usage" } }, { assertEquals((600 * 150.0), monitor.energyUsages.sum()) { "Incorrect energy usage" } }, @@ -204,8 +204,8 @@ class ExperimentTest { assertAll( { assertEquals(25 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals(((10 * 30000) + (10 * 60000)).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals(((10 * 30000) + (5 * 60000)).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals(((10 * 30000) + (10 * 60000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals(((10 * 30000) + (5 * 60000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect energy usage" } }, { assertEquals( @@ -215,4 +215,284 @@ class ExperimentTest { }, ) } + + /** + * Simulator test 5: One Task purely running on GPU + * + * In this test, a single task is scheduled that takes 10 minutes to run. It solely uses the GPU. + */ + @Test + fun testSimulator5() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + + val monitor = runTest(topology, workload) + + assertAll( + { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, + { assertEquals(((10 * 60 * 1000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "CPU Idle time incorrect" } }, + { assertEquals(0L, monitor.hostCpuActiveTimes["H01"]?.sum()) { "CPU Active time incorrect" } }, + { + assertEquals( + ((10 * 30000)).toLong(), + monitor.hostGpuIdleTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Idle time incorrect" } + }, + { + assertEquals( + ((10 * 30000)).toLong(), + monitor.hostGpuActiveTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Active time incorrect" } + }, + // double, as CPU and GPU both use power + // higher power usage, as default GPU power model is used range [200, 400] + { assertEquals(2 * 12000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect host energy usage at timestamp 0" } }, + { assertEquals((600 * 100.0) + (600 * 300.0), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect host energy usage" } }, + { assertEquals((600 * 100.0) + (600 * 300.0), monitor.energyUsages.sum()) { "Incorrect total energy usage" } }, + ) + } + + /** + * Simulator test 6: One Task running on CPU & GPU + * + * In this test, a single task is scheduled that takes 10 minutes to run. CPU & GPU are used and have the same runtime. + */ + @Test + fun testSimulator6() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + + val monitor = runTest(topology, workload) + + assertAll( + { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "CPU Idle time incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "CPU Active time incorrect" } }, + { + assertEquals( + ((10 * 30000)).toLong(), + monitor.hostGpuIdleTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Idle time incorrect" } + }, + { + assertEquals( + ((10 * 30000)).toLong(), + monitor.hostGpuActiveTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Active time incorrect" } + }, + // double, as CPU and GPU both use power + { assertEquals(27000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect host energy usage at timestamp 0" } }, + { assertEquals((600 * 150.0) + (600 * 300.0), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect host energy usage" } }, + { assertEquals((600 * 150.0) + (600 * 300.0), monitor.energyUsages.sum()) { "Incorrect total energy usage" } }, + ) + } + + /** + * Simulator test 7: One Task running on CPU & GPU + * + * In this test, a single task is scheduled that takes 10 minutes to run. CPU & GPU are used. CPU will finish way ahead of the GPU. + */ + @Test + fun testSimulator7() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 2000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + + val monitor = runTest(topology, workload) + assertAll( + { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "CPU Idle time incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "CPU Active time incorrect" } }, + { + assertEquals( + 0L, + monitor.hostGpuIdleTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Idle time incorrect" } + }, + { + assertEquals( + ((10 * 60000)).toLong(), + monitor.hostGpuActiveTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Active time incorrect" } + }, + // double, as CPU and GPU both use power + { assertEquals(33000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect host energy usage at timestamp 0" } }, + { assertEquals((600 * 150.0) + (600 * 400.0), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect host energy usage" } }, + { assertEquals((600 * 150.0) + (600 * 400.0), monitor.energyUsages.sum()) { "Incorrect total energy usage" } }, + ) + } + + /** + * Simulator test 8: One Task running on CPU & GPU + * + * In this test, a single task is scheduled that takes 10 minutes to run. CPU & GPU are used. GPU will finish way ahead of the CPU. + */ + @Test + fun testSimulator8() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 2000.0, 1, 1000.0, 1), + ), + ), + ) + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + val monitor = runTest(topology, workload) + + assertAll( + { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, + { assertEquals(0L, monitor.hostCpuIdleTimes["H01"]?.sum()) { "CPU Idle time incorrect" } }, + { assertEquals(((10 * 60000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "CPU Active time incorrect" } }, + { + assertEquals( + ((10 * 30000)).toLong(), + monitor.hostGpuIdleTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Idle time incorrect" } + }, + { + assertEquals( + ((10 * 30000)).toLong(), + monitor.hostGpuActiveTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Active time incorrect" } + }, + // double, as CPU and GPU both use power + { assertEquals(30000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect host energy usage at timestamp 0" } }, + { assertEquals((600 * 200.0) + (600 * 300.0), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect host energy usage" } }, + { assertEquals((600 * 200.0) + (600 * 300.0), monitor.energyUsages.sum()) { "Incorrect total energy usage" } }, + ) + } + + /** + * Simulator test 9: Two tasks running on CPU & GPU + * + * In this test, two tasks are scheduled at the same time that takes 10 minutes to run. CPU & GPU are used. Both resources will finish at the same time. + */ + @Test + fun testSimulator9() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1), + ), + ), + createTestTask( + name = "1", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + val monitor = runTest(topology, workload) + + assertAll( + { assertEquals(2 * (10 * 60 * 1000), monitor.maxTimestamp) { "Total runtime incorrect" } }, + { assertEquals(((10 * 60000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "CPU Idle time incorrect" } }, + { assertEquals(((10 * 60000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "CPU Active time incorrect" } }, + { + assertEquals( + ((10 * 60000)).toLong(), + monitor.hostGpuIdleTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Idle time incorrect" } + }, + { + assertEquals( + ((10 * 60000)).toLong(), + monitor.hostGpuActiveTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Active time incorrect" } + }, + // double, as CPU and GPU both use power + { assertEquals(27000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect host energy usage at timestamp 0" } }, + { assertEquals(2 * ((600 * 150.0) + (600 * 300.0)), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect host energy usage" } }, + { assertEquals(2 * ((600 * 150.0) + (600 * 300.0)), monitor.energyUsages.sum()) { "Incorrect total energy usage" } }, + ) + } + + /** + * Simulator test 10: Two tasks running on CPU & GPU + * + * In this test, two tasks are scheduled at the same time that takes 10 minutes to run. One task purely uses CPU, one purely GPU. + */ + @Test + fun testSimulator10() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 0.0, 0), + ), + ), + createTestTask( + name = "1", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + val monitor = runTest(topology, workload) + + assertAll( + { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "CPU Idle time incorrect" } }, + { assertEquals(((10 * 30000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "CPU Active time incorrect" } }, + { + assertEquals( + ((10 * 30000)).toLong(), + monitor.hostGpuIdleTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Idle time incorrect" } + }, + { + assertEquals( + ((10 * 30000)).toLong(), + monitor.hostGpuActiveTimes["H01"]?.fold(0, { acc, iterator -> acc + iterator[0] }), + ) { "GPU Active time incorrect" } + }, + // double, as CPU and GPU both use power + { assertEquals(27000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect host energy usage at timestamp 0" } }, + { assertEquals((600 * 150.0) + (600 * 300.0), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect host energy usage" } }, + { assertEquals((600 * 150.0) + (600 * 300.0), monitor.energyUsages.sum()) { "Incorrect total energy usage" } }, + ) + } } diff --git a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/FailuresAndCheckpointingTest.kt b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/FailuresAndCheckpointingTest.kt index df3a3c88..4278ca41 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/FailuresAndCheckpointingTest.kt +++ b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/FailuresAndCheckpointingTest.kt @@ -70,8 +70,8 @@ class FailuresAndCheckpointingTest { assertAll( { assertEquals(20 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals(((15 * 30000) + (5 * 60000)).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals((15 * 30000).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals(((15 * 30000) + (5 * 60000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals((15 * 30000).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect energy usage" } }, { assertEquals(6000.0, monitor.hostEnergyUsages["H01"]?.get(5)) { "Incorrect energy usage" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(10)) { "Incorrect energy usage" } }, @@ -110,8 +110,8 @@ class FailuresAndCheckpointingTest { assertAll( { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals((10 * 30000).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals((10 * 30000).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals((10 * 30000).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals((10 * 30000).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect energy usage" } }, { assertEquals((600 * 150.0), monitor.hostEnergyUsages["H01"]?.sum()) { "Incorrect energy usage" } }, ) @@ -153,8 +153,8 @@ class FailuresAndCheckpointingTest { assertAll( { assertEquals(37 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals(((22 * 30000) + (15 * 60000)).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals((22 * 30000).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals(((22 * 30000) + (15 * 60000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals((22 * 30000).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect energy usage" } }, { assertEquals(6000.0, monitor.hostEnergyUsages["H01"]?.get(5)) { "Incorrect energy usage" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(10)) { "Incorrect energy usage" } }, @@ -198,8 +198,8 @@ class FailuresAndCheckpointingTest { assertAll( { assertEquals(95 * 60000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals(((50 * 60000) + (20 * 60000)).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals((25 * 60000).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals(((50 * 60000) + (20 * 60000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals((25 * 60000).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect energy usage" } }, { assertEquals(6000.0, monitor.hostEnergyUsages["H01"]?.get(5)) { "Incorrect energy usage" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(10)) { "Incorrect energy usage" } }, diff --git a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/FlowDistributorTest.kt b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/FlowDistributorTest.kt index 3d733360..7b7b23d2 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/FlowDistributorTest.kt +++ b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/FlowDistributorTest.kt @@ -557,4 +557,328 @@ class FlowDistributorTest { { assertEquals(1000 * 10 * 60 * 1000, monitor.maxTimestamp) { "The expected runtime is exceeded" } }, ) } + + /** + * FlowDistributor test 14: A single fitting GPU task + * In this test, a single task is scheduled that should fit the FlowDistributor + * We check if both the host and the Task show the correct cpu and gpu usage and demand during the two fragments. + */ + @Test + fun testFlowDistributor14() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1), + TraceFragment(10 * 60 * 1000, 0.0, 0, 2000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + + val monitor = runTest(topology, workload) + + assertAll( + // CPU + // task + { assertEquals(0.0, monitor.taskCpuDemands["0"]?.get(1)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuDemands["0"]?.get(10)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["0"]?.get(1)) { "The cpu used by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["0"]?.get(10)) { "The cpu used by task 0 is incorrect" } }, + // host + { assertEquals(0.0, monitor.hostCpuDemands["H01"]?.get(1)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuDemands["H01"]?.get(10)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuSupplied["H01"]?.get(1)) { "The cpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuSupplied["H01"]?.get(10)) { "The cpu used by the host is incorrect" } }, + // GPU + // task + { assertEquals(1000.0, monitor.taskGpuDemands["0"]?.get(1)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuDemands["0"]?.get(10)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskGpuSupplied["0"]?.get(1)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuSupplied["0"]?.get(10)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(2000.0, monitor.hostGpuDemands["H01"]?.get(10)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0)) { "The gpu used by the host is incorrect" } }, + { assertEquals(2000.0, monitor.hostGpuSupplied["H01"]?.get(10)?.get(0)) { "The gpu used by the host is incorrect" } }, + ) + } + + /** + * FlowDistributor test 15: One Task running on CPU & GPU + * + * In this test, a single task is scheduled that takes 10 minutes to run. CPU & GPU are used and have the same runtime. + */ + @Test + fun testFlowDistributor15() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + + val monitor = runTest(topology, workload) + + assertAll( + // CPU + // task + { assertEquals(1000.0, monitor.taskCpuDemands["0"]?.get(0)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuDemands["0"]?.get(9)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["0"]?.get(0)) { "The cpu used by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["0"]?.get(9)) { "The cpu used by task 0 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostCpuDemands["H01"]?.get(1)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuDemands["H01"]?.get(10)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostCpuSupplied["H01"]?.get(1)) { "The cpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuSupplied["H01"]?.get(10)) { "The cpu used by the host is incorrect" } }, + // GPU + // task + { assertEquals(1000.0, monitor.taskGpuDemands["0"]?.get(0)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assert(monitor.taskGpuDemands["0"]?.get(9)?.isEmpty() ?: false) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskGpuSupplied["0"]?.get(0)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + { assert(monitor.taskGpuSupplied["0"]?.get(9)?.isEmpty() ?: false) { "The gpu used by task 0 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0)) { "The gpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostGpuDemands["H01"]?.get(10)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostGpuSupplied["H01"]?.get(10)?.get(0)) { "The gpu used by the host is incorrect" } }, + ) + } + + /** + * FlowDistributor test 16: One Task running on CPU & GPU + * + * In this test, a single task is scheduled that takes 10 minutes to run. CPU & GPU are used. CPU will finish way ahead of the GPU. + */ + @Test + fun testFlowDistributor16() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 2000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + + val monitor = runTest(topology, workload) + + assertAll( + // CPU + // task + { assertEquals(1000.0, monitor.taskCpuDemands["0"]?.get(0)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuDemands["0"]?.get(9)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["0"]?.get(0)) { "The cpu used by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["0"]?.get(9)) { "The cpu used by task 0 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostCpuDemands["H01"]?.get(1)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuDemands["H01"]?.get(10)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostCpuSupplied["H01"]?.get(1)) { "The cpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuSupplied["H01"]?.get(10)) { "The cpu used by the host is incorrect" } }, + // GPU + // task + { assertEquals(2000.0, monitor.taskGpuDemands["0"]?.get(0)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assert(monitor.taskGpuDemands["0"]?.get(9)?.isEmpty() ?: false) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuSupplied["0"]?.get(0)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + { assert(monitor.taskGpuSupplied["0"]?.get(9)?.isEmpty() ?: false) { "The gpu used by task 0 is incorrect" } }, + // host + { assertEquals(2000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostGpuDemands["H01"]?.get(10)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(2000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0)) { "The gpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostGpuSupplied["H01"]?.get(10)?.get(0)) { "The gpu used by the host is incorrect" } }, + ) + } + + /** + * FlowDistributor test 17: One Task running on CPU & GPU + * + * In this test, a single task is scheduled that takes 10 minutes to run. CPU & GPU are used. GPU will finish way ahead of the CPU. + */ + @Test + fun testFlowDistributor17() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 2000.0, 1, 1000.0, 1), + ), + ), + ) + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + val monitor = runTest(topology, workload) + + assertAll( + // CPU + // task + { assertEquals(2000.0, monitor.taskCpuDemands["0"]?.get(0)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuDemands["0"]?.get(9)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(2000.0, monitor.taskCpuSupplied["0"]?.get(0)) { "The cpu used by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["0"]?.get(9)) { "The cpu used by task 0 is incorrect" } }, + // host + { assertEquals(2000.0, monitor.hostCpuDemands["H01"]?.get(1)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuDemands["H01"]?.get(10)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(2000.0, monitor.hostCpuSupplied["H01"]?.get(1)) { "The cpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuSupplied["H01"]?.get(10)) { "The cpu used by the host is incorrect" } }, + // GPU + // task + { assertEquals(1000.0, monitor.taskGpuDemands["0"]?.get(1)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assert(monitor.taskGpuDemands["0"]?.get(9)?.isEmpty() ?: false) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskGpuSupplied["0"]?.get(1)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + { assert(monitor.taskGpuSupplied["0"]?.get(9)?.isEmpty() ?: false) { "The gpu used by task 0 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostGpuDemands["H01"]?.get(10)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0)) { "The gpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostGpuSupplied["H01"]?.get(10)?.get(0)) { "The gpu used by the host is incorrect" } }, + ) + } + + /** + * FlowDistributor test 18: Two tasks running on CPU & GPU + * + * In this test, two tasks are scheduled at the same time that takes 10 minutes to run. + * Only one can be scheduled due to resource constraints. + * CPU & GPU are used. Both resources will finish at the same time. + */ + @Test + fun testFlowDistributor18() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1), + ), + ), + createTestTask( + name = "1", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + val monitor = runTest(topology, workload) + assertAll( + // CPU + // task 0 + { assertEquals(1000.0, monitor.taskCpuDemands["0"]?.get(0)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuDemands["0"]?.get(9)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["0"]?.get(0)) { "The cpu used by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["0"]?.get(9)) { "The cpu used by task 0 is incorrect" } }, + // task 1 + { assertEquals(0.0, monitor.taskCpuDemands["1"]?.get(1)) { "The cpu demanded by task 1 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuDemands["1"]?.get(10)) { "The cpu demanded by task 1 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuDemands["1"]?.get(19)) { "The cpu demanded by task 1 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["1"]?.get(1)) { "The cpu used by task 1 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["1"]?.get(10)) { "The cpu used by task 1 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["1"]?.get(19)) { "The cpu used by task 1 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostCpuDemands["H01"]?.get(1)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostCpuDemands["H01"]?.get(10)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostCpuSupplied["H01"]?.get(1)) { "The cpu used by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostCpuSupplied["H01"]?.get(10)) { "The cpu used by the host is incorrect" } }, + // GPU + // task 0 + { assertEquals(1000.0, monitor.taskGpuDemands["0"]?.get(0)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assert(monitor.taskGpuDemands["0"]?.get(9)?.isEmpty() ?: false) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskGpuSupplied["0"]?.get(0)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + { assert(monitor.taskGpuSupplied["0"]?.get(9)?.isEmpty() ?: false) { "The gpu used by task 0 is incorrect" } }, + // task 1 + { assert(monitor.taskGpuDemands["1"]?.get(0)?.isEmpty() ?: false) { "The gpu demanded by task 1 is incorrect" } }, + { assertEquals(1000.0, monitor.taskGpuDemands["1"]?.get(10)?.get(0)) { "The gpu demanded by task 1 is incorrect" } }, + { assert(monitor.taskGpuDemands["1"]?.get(19)?.isEmpty() ?: false) { "The gpu demanded by task 1 is incorrect" } }, + { assert(monitor.taskGpuSupplied["1"]?.get(0)?.isEmpty() ?: false) { "The gpu used by task 1 is incorrect" } }, + { assertEquals(1000.0, monitor.taskGpuSupplied["1"]?.get(10)?.get(0)) { "The gpu used by task 1 is incorrect" } }, + { assert(monitor.taskGpuSupplied["1"]?.get(19)?.isEmpty() ?: false) { "The gpu used by task 1 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(10)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0)) { "The gpu used by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostGpuSupplied["H01"]?.get(10)?.get(0)) { "The gpu used by the host is incorrect" } }, + ) + } + + /** + * FlowDistributor test 19: Two tasks running on CPU & GPU + * + * In this test, two tasks are scheduled at the same time that takes 10 minutes to run. One task purely uses CPU, one purely GPU. + */ + @Test + fun testFlowDistributor19() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 0.0, 0), + ), + ), + createTestTask( + name = "1", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1), + ), + ), + ) + + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + val monitor = runTest(topology, workload) + + assertAll( + // CPU + // task 0 + { assertEquals(1000.0, monitor.taskCpuDemands["0"]?.get(0)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuDemands["0"]?.get(9)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["0"]?.get(0)) { "The cpu used by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["0"]?.get(9)) { "The cpu used by task 0 is incorrect" } }, + // task 1 + { assertEquals(0.0, monitor.taskCpuDemands["1"]?.get(0)) { "The cpu demanded by task 1 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuDemands["1"]?.get(9)) { "The cpu demanded by task 1 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["1"]?.get(0)) { "The cpu used by task 1 is incorrect" } }, + { assertEquals(0.0, monitor.taskCpuSupplied["1"]?.get(9)) { "The cpu used by task 1 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostCpuDemands["H01"]?.get(1)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuDemands["H01"]?.get(10)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostCpuSupplied["H01"]?.get(1)) { "The cpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostCpuSupplied["H01"]?.get(10)) { "The cpu used by the host is incorrect" } }, + // GPU + // task 0 + { assertEquals(0.0, monitor.taskGpuDemands["0"]?.get(0)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assert(monitor.taskGpuDemands["0"]?.get(9)?.isEmpty() ?: false) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(0.0, monitor.taskGpuSupplied["0"]?.get(0)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + { assert(monitor.taskGpuSupplied["0"]?.get(9)?.isEmpty() ?: false) { "The gpu used by task 0 is incorrect" } }, + // task 1 + { assertEquals(1000.0, monitor.taskGpuDemands["1"]?.get(0)?.get(0)) { "The gpu demanded by task 1 is incorrect" } }, + { assert(monitor.taskGpuDemands["1"]?.get(9)?.isEmpty() ?: false) { "The gpu demanded by task 1 is incorrect" } }, + { assertEquals(1000.0, monitor.taskGpuSupplied["1"]?.get(0)?.get(0)) { "The gpu used by task 1 is incorrect" } }, + { assert(monitor.taskGpuSupplied["1"]?.get(9)?.isEmpty() ?: false) { "The gpu used by task 1 is incorrect" } }, + // host + { assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostGpuDemands["H01"]?.get(10)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(1000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0)) { "The gpu used by the host is incorrect" } }, + { assertEquals(0.0, monitor.hostGpuSupplied["H01"]?.get(10)?.get(0)) { "The gpu used by the host is incorrect" } }, + ) + } } diff --git a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/GpuTest.kt b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/GpuTest.kt new file mode 100644 index 00000000..6e5a6b5e --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/GpuTest.kt @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2020 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.base + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertAll +import org.opendc.compute.topology.specs.ClusterSpec +import org.opendc.compute.workload.Task +import org.opendc.simulator.compute.workload.trace.TraceFragment +import java.util.ArrayList + +/** + * Testing suite containing tests that specifically test the FlowDistributor + */ +class GpuTest { + /** + * Test the creation of a GPU host with a single GPU, in minimal configuration + */ + @Test + fun testGpuHostCreationSingleMinimal() { + val topology = createTopology("Gpus/single_gpu_no_vendor_no_memory.json") + assertGpuConfiguration( + topology, + coreCount = 1, + coreSpeed = 2000.0, + memorySize = -1L, + memoryBandwidth = -1.0, + vendor = "unknown", + modelName = "unknown", + architecture = "unknown", + gpuCount = 1, + ) + } + + /** + * Test the creation of a GPU host with a single GPU with memory but no vendor + */ + @Test + fun testGpuHostCreationSingleWithMemoryNoVendor() { + val topology = createTopology("Gpus/single_gpu_no_vendor.json") + assertGpuConfiguration( + topology, + coreCount = 1, + coreSpeed = 2000.0, + memorySize = 4096L, + memoryBandwidth = 500.0, + vendor = "unknown", + modelName = "unknown", + architecture = "unknown", + gpuCount = 1, + ) + } + + /** + * Test the creation of a GPU host with a single GPU with no memory but with vendor + */ + @Test + fun testGpuHostCreationSingleNoMemoryWithVendor() { + val topology = createTopology("Gpus/single_gpu_no_memory.json") + assertGpuConfiguration( + topology, + coreCount = 1, + coreSpeed = 2000.0, + memorySize = -1L, + memoryBandwidth = -1.0, + vendor = "NVIDIA", + modelName = "Tesla V100", + architecture = "Volta", + gpuCount = 1, + ) + } + + /** + * Test the creation of a GPU host with a single GPU, in full configuration + */ + @Test + fun testGpuHostCreationSingleWithMemoryWithVendor() { + val topology = createTopology("Gpus/single_gpu_full.json") + assertGpuConfiguration( + topology, + // cuda cores + coreCount = 5120, +// coreCount = 640, // tensor cores + // fictional value + coreSpeed = 5000.0, + memorySize = 30517578125, + memoryBandwidth = 7031250000.0, + vendor = "NVIDIA", + modelName = "Tesla V100", + architecture = "Volta", + gpuCount = 1, + ) + } + + /** + * Test the creation of a GPU host with multiple GPU, in minimal configuration + */ + @Test + fun testGpuHostCreationMultiMinimal() { + val topology = createTopology("Gpus/multi_gpu_no_vendor_no_memory.json") + val count = 3 + assertGpuConfiguration( + topology, + coreCount = 1 * count, + coreSpeed = 2000.0, + memorySize = -1L * count, + memoryBandwidth = -1.0, + vendor = "unknown", + modelName = "unknown", + architecture = "unknown", + gpuCount = 1, + ) + } + + /** + * Test the creation of a GPU host with multiple GPU with memory but no vendor + */ + @Test + fun testGpuHostCreationMultiWithMemoryNoVendor() { + val topology = createTopology("Gpus/multi_gpu_no_vendor.json") + val count = 100 + + assertGpuConfiguration( + topology, + coreCount = 1 * count, + coreSpeed = 2000.0, + memorySize = 4096L * count, + memoryBandwidth = 500.0, + vendor = "unknown", + modelName = "unknown", + architecture = "unknown", + gpuCount = 1, + ) + } + + /** + * Test the creation of a GPU host with multiple GPU with no memory but with vendor + */ + @Test + fun testGpuHostCreationMultiNoMemoryWithVendor() { + val topology = createTopology("Gpus/multi_gpu_no_memory.json") + val count = 2 + assertGpuConfiguration( + topology, + coreCount = 1 * count, + coreSpeed = 2000.0, + memorySize = -1L * count, + memoryBandwidth = -1.0, + vendor = "NVIDIA", + modelName = "Tesla V100", + architecture = "Volta", + gpuCount = 1, + ) + } + + /** + * Test the creation of a GPU host with multiple GPU, in full configuration + */ + @Test + fun testGpuHostCreationMultiWithMemoryWithVendor() { + val topology = createTopology("Gpus/multi_gpu_full.json") + val count = 5 + assertGpuConfiguration( + topology, + // cuda cores + coreCount = 5120 * count, + // fictional value + coreSpeed = 5000.0, + memorySize = 30517578125 * count, + memoryBandwidth = 7031250000.0, + vendor = "NVIDIA", + modelName = "Tesla V100", + architecture = "Volta", + gpuCount = 1, + ) + } + + /** + * This test checks if the FlowDistributor can handle a workload that requires multiple GPUs. + * This test assumes that multiple GPUs are concatenated into on single larger GPU. + */ + @Test + fun testMultiGpuConcation() { + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 2000.0, 1), + ), + ), + createTestTask( + name = "1", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 2000.0, 1), + ), + ), + ) + val topology = createTopology("Gpus/multi_gpu_host.json") + + val monitor = runTest(topology, workload) + + assertAll( + { assertEquals(10 * 60 * 1000, monitor.maxTimestamp) { "The expected runtime is exceeded" } }, + // CPU + // task 0 + { assertEquals(1000.0, monitor.taskCpuDemands["0"]?.get(1)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuDemands["0"]?.get(8)) { "The cpu demanded by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["0"]?.get(1)) { "The cpu used by task 0 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["0"]?.get(8)) { "The cpu used by task 0 is incorrect" } }, + // task 1 + { assertEquals(1000.0, monitor.taskCpuDemands["1"]?.get(1)) { "The cpu demanded by task 1 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuDemands["1"]?.get(8)) { "The cpu demanded by task 1 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["1"]?.get(1)) { "The cpu used by task 1 is incorrect" } }, + { assertEquals(1000.0, monitor.taskCpuSupplied["1"]?.get(8)) { "The cpu used by task 1 is incorrect" } }, + // host + { assertEquals(2000.0, monitor.hostCpuDemands["DualGpuHost"]?.get(1)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(2000.0, monitor.hostCpuDemands["DualGpuHost"]?.get(9)) { "The cpu demanded by the host is incorrect" } }, + { assertEquals(2000.0, monitor.hostCpuSupplied["DualGpuHost"]?.get(1)) { "The cpu used by the host is incorrect" } }, + { assertEquals(2000.0, monitor.hostCpuSupplied["DualGpuHost"]?.get(9)) { "The cpu used by the host is incorrect" } }, + // GPU + // task 0 + { assertEquals(2000.0, monitor.taskGpuDemands["0"]?.get(1)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuDemands["0"]?.get(8)?.get(0)) { "The gpu demanded by task 0 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuSupplied["0"]?.get(1)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuSupplied["0"]?.get(8)?.get(0)) { "The gpu used by task 0 is incorrect" } }, + // task 1 + { assertEquals(2000.0, monitor.taskGpuDemands["1"]?.get(1)?.get(0)) { "The gpu demanded by task 1 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuDemands["1"]?.get(8)?.get(0)) { "The gpu demanded by task 1 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuSupplied["1"]?.get(1)?.get(0)) { "The gpu used by task 1 is incorrect" } }, + { assertEquals(2000.0, monitor.taskGpuSupplied["1"]?.get(8)?.get(0)) { "The gpu used by task 1 is incorrect" } }, + // host + { assertEquals(4000.0, monitor.hostGpuDemands["DualGpuHost"]?.get(1)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(4000.0, monitor.hostGpuDemands["DualGpuHost"]?.get(9)?.get(0)) { "The gpu demanded by the host is incorrect" } }, + { assertEquals(4000.0, monitor.hostGpuSupplied["DualGpuHost"]?.get(1)?.get(0)) { "The gpu used by the host is incorrect" } }, + { assertEquals(4000.0, monitor.hostGpuSupplied["DualGpuHost"]?.get(9)?.get(0)) { "The gpu used by the host is incorrect" } }, + ) + } + + private fun assertGpuConfiguration( + topology: List<ClusterSpec>, + coreCount: Int, + coreSpeed: Double, + memorySize: Long, + memoryBandwidth: Double, + vendor: String, + modelName: String, + architecture: String, + gpuCount: Int, + ) { + for (cluster in topology) { + for (host in cluster.hostSpecs) { + assert(host.model.gpuModels.size == gpuCount) { "GPU count should be $gpuCount, but is ${host.model.gpuModels.size}" } + + for (gpuModel in host.model.gpuModels) { + assert(gpuModel.coreCount == coreCount) { "GPU Core count should be $coreCount, but is ${gpuModel.coreCount}" } + assert(gpuModel.coreSpeed == coreSpeed) { "GPU core speed should be $coreSpeed, but is ${gpuModel.coreSpeed}" } + assert(gpuModel.memorySize == memorySize) { "GPU memory size should be $memorySize, but is ${gpuModel.memorySize}" } + assert(gpuModel.memoryBandwidth == memoryBandwidth) { + "GPU memory bandwidth should be $memoryBandwidth, but is ${gpuModel.memoryBandwidth}" + } + assert(gpuModel.vendor.contentEquals(vendor)) { "GPU vendor should be $vendor, but is ${gpuModel.vendor}" } + assert( + gpuModel.modelName.contentEquals(modelName), + ) { "GPU model name should be $modelName, but is ${gpuModel.modelName}" } + assert( + gpuModel.architecture.contentEquals(architecture), + ) { "GPU architecture should be $architecture, but is ${gpuModel.architecture}" } + } + } + } + } +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/SchedulerTest.kt b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/SchedulerTest.kt index f9a20c68..8f71b7e7 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/SchedulerTest.kt +++ b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/SchedulerTest.kt @@ -25,10 +25,14 @@ package org.opendc.experiments.base import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertAll +import org.opendc.compute.simulator.scheduler.FilterScheduler import org.opendc.compute.simulator.scheduler.MemorizingScheduler import org.opendc.compute.simulator.scheduler.filters.ComputeFilter import org.opendc.compute.simulator.scheduler.filters.RamFilter import org.opendc.compute.simulator.scheduler.filters.VCpuFilter +import org.opendc.compute.simulator.scheduler.filters.VGpuFilter +import org.opendc.compute.simulator.scheduler.weights.VCpuWeigher +import org.opendc.compute.simulator.scheduler.weights.VGpuWeigher import org.opendc.compute.workload.Task import org.opendc.simulator.compute.workload.trace.TraceFragment import java.util.ArrayList @@ -65,8 +69,8 @@ class SchedulerTest { assertAll( { assertEquals(25 * 60 * 1000, monitor.maxTimestamp) { "Total runtime incorrect" } }, - { assertEquals(((10 * 30000) + (10 * 60000)).toLong(), monitor.hostIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, - { assertEquals(((10 * 30000) + (5 * 60000)).toLong(), monitor.hostActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, + { assertEquals(((10 * 30000) + (10 * 60000)).toLong(), monitor.hostCpuIdleTimes["H01"]?.sum()) { "Idle time incorrect" } }, + { assertEquals(((10 * 30000) + (5 * 60000)).toLong(), monitor.hostCpuActiveTimes["H01"]?.sum()) { "Active time incorrect" } }, { assertEquals(9000.0, monitor.hostEnergyUsages["H01"]?.get(0)) { "Incorrect energy usage" } }, { assertEquals( @@ -76,4 +80,109 @@ class SchedulerTest { }, ) } + + /** + * This test verifies that the gpu only schedulers are working correctly. + * The same workload is run 4 times, once with the normal gpu filter scheduler and once with the inverted gpu filter scheduler. + * Each scheduler is then run with a hardware configuration where the tasks fit onto one host, and one where multiple hosts are needed. + */ + @Test + fun testGpuAwareSchedulers() { + // Define workload with tasks requiring both CPU and GPU resources + val workload: ArrayList<Task> = + arrayListOf( + createTestTask( + name = "0", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 2000.0, 1), + ), + ), + createTestTask( + name = "1", + fragments = + arrayListOf( + TraceFragment(10 * 60 * 1000, 1000.0, 1, 2000.0, 1), + ), + submissionTime = "1970-01-01T00:20", + ), + ) + + // Topology with 1 host having 2 GPUs (both tasks can fit on one host) + val fittingTopology = createTopology("Gpus/dual_gpu_host.json") + + // Topology with 2 hosts each having 1 GPU (tasks must be distributed) + val nonFittingTopology = createTopology("Gpus/single_gpu_hosts.json") + + val cpuAllocationRatio = 1.0 + val ramAllocationRatio = 1.5 + val gpuAllocationRatio = 1.0 + + // Normal scheduler prioritizes hosts with more available resources + val normalScheduler = + FilterScheduler( + filters = + listOf( + ComputeFilter(), + VCpuFilter(cpuAllocationRatio), + VGpuFilter(gpuAllocationRatio), + RamFilter(ramAllocationRatio), + ), + weighers = listOf(VCpuWeigher(cpuAllocationRatio, multiplier = 1.0), VGpuWeigher(gpuAllocationRatio, multiplier = 1.0)), + ) + + // Inverted scheduler prioritizes hosts with fewer available resources + val invertedScheduler = + FilterScheduler( + filters = + listOf( + ComputeFilter(), + VCpuFilter(cpuAllocationRatio), + VGpuFilter(gpuAllocationRatio), + RamFilter(ramAllocationRatio), + ), + weighers = listOf(VCpuWeigher(cpuAllocationRatio, multiplier = -1.0), VGpuWeigher(gpuAllocationRatio, multiplier = -1.0)), + ) + + // Run the tests with both schedulers and both topologies + val normalFittingMonitor = runTest(fittingTopology, workload, computeScheduler = normalScheduler) + val normalNonFittingMonitor = runTest(nonFittingTopology, workload, computeScheduler = normalScheduler) + val invertedFittingMonitor = runTest(fittingTopology, workload, computeScheduler = invertedScheduler) + val invertedNonFittingMonitor = runTest(nonFittingTopology, workload, computeScheduler = invertedScheduler) + + assertAll( + // Normal scheduler with fitting topology should use just one host + { + assertEquals( + 1, + normalFittingMonitor.hostCpuSupplied.size, + ) { "Normal scheduler should place both tasks on a single host when possible" } + }, + // Normal scheduler with non-fitting topology must use two hosts + { + assertEquals( + 2, + normalNonFittingMonitor.hostCpuSupplied.size, + ) { "Normal scheduler should distribute tasks across hosts when needed" } + }, + // Inverted scheduler with fitting topology might still use one host or distribute depending on implementation + { + assert( + invertedFittingMonitor.hostCpuSupplied.isNotEmpty(), + ) { "Inverted scheduler should place tasks based on resource availability" } + }, + // Inverted scheduler with non-fitting topology must use two hosts + { + assertEquals( + 2, + invertedNonFittingMonitor.hostCpuSupplied.size, + ) { "Inverted scheduler should distribute tasks across hosts when needed" } + }, + // Verify GPU allocations - check that both tasks had their GPUs allocated + { assertEquals(2, normalFittingMonitor.taskGpuSupplied.size) { "Both tasks should have GPU allocations" } }, + { assertEquals(2, normalNonFittingMonitor.taskGpuSupplied.size) { "Both tasks should have GPU allocations" } }, + { assertEquals(2, invertedFittingMonitor.taskGpuSupplied.size) { "Both tasks should have GPU allocations" } }, + { assertEquals(2, invertedNonFittingMonitor.taskGpuSupplied.size) { "Both tasks should have GPU allocations" } }, + ) + } } diff --git a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/TestingUtils.kt b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/TestingUtils.kt index eadb74e7..59b8d070 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/TestingUtils.kt +++ b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/TestingUtils.kt @@ -22,6 +22,7 @@ package org.opendc.experiments.base +import org.opendc.common.ResourceType import org.opendc.compute.simulator.provisioner.Provisioner import org.opendc.compute.simulator.provisioner.registerComputeMonitor import org.opendc.compute.simulator.provisioner.setupComputeService @@ -53,6 +54,7 @@ import java.time.LocalDateTime import java.time.ZoneOffset import java.util.UUID import kotlin.collections.ArrayList +import kotlin.compareTo /** * Obtain the topology factory for the test. @@ -73,12 +75,23 @@ fun createTestTask( checkpointIntervalScaling: Double = 1.0, scalingPolicy: ScalingPolicy = NoDelayScaling(), ): Task { + var usedResources = arrayOf<ResourceType>() + if (fragments.any { it.cpuUsage > 0.0 }) { + usedResources += ResourceType.CPU + } + if (fragments.any { it.gpuUsage > 0.0 }) { + usedResources += ResourceType.GPU + } + return Task( UUID.nameUUIDFromBytes(name.toByteArray()), name, - fragments.maxOf { it.coreCount }, + fragments.maxOf { it.cpuCoreCount() }, fragments.maxOf { it.cpuUsage }, memCapacity, + gpuCount = fragments.maxOfOrNull { it.gpuCoreCount() } ?: 0, + gpuCapacity = fragments.maxOfOrNull { it.gpuUsage } ?: 0.0, + gpuMemCapacity = fragments.maxOfOrNull { it.gpuMemoryUsage } ?: 0L, 1800000.0, LocalDateTime.parse(submissionTime).toInstant(ZoneOffset.UTC).toEpochMilli(), duration, @@ -91,6 +104,7 @@ fun createTestTask( checkpointIntervalScaling, scalingPolicy, name, + usedResources, ), ) } @@ -134,6 +148,8 @@ fun runTest( class TestComputeMonitor : ComputeMonitor { var taskCpuDemands = mutableMapOf<String, ArrayList<Double>>() var taskCpuSupplied = mutableMapOf<String, ArrayList<Double>>() + var taskGpuDemands = mutableMapOf<String, ArrayList<DoubleArray?>>() + var taskGpuSupplied = mutableMapOf<String, ArrayList<DoubleArray?>>() override fun record(reader: TaskTableReader) { val taskName: String = reader.taskInfo.name @@ -145,6 +161,13 @@ class TestComputeMonitor : ComputeMonitor { taskCpuDemands[taskName] = arrayListOf(reader.cpuDemand) taskCpuSupplied[taskName] = arrayListOf(reader.cpuUsage) } + if (taskName in taskGpuDemands) { + taskGpuDemands[taskName]?.add(reader.gpuDemands) + taskGpuSupplied[taskName]?.add(reader.gpuUsages) + } else { + taskGpuDemands[taskName] = arrayListOf(reader.gpuDemands) + taskGpuSupplied[taskName] = arrayListOf(reader.gpuUsages) + } } var attemptsSuccess = 0 @@ -174,13 +197,20 @@ class TestComputeMonitor : ComputeMonitor { maxTimestamp = reader.timestamp.toEpochMilli() } - var hostIdleTimes = mutableMapOf<String, ArrayList<Long>>() - var hostActiveTimes = mutableMapOf<String, ArrayList<Long>>() - var hostStealTimes = mutableMapOf<String, ArrayList<Long>>() - var hostLostTimes = mutableMapOf<String, ArrayList<Long>>() - var hostCpuDemands = mutableMapOf<String, ArrayList<Double>>() var hostCpuSupplied = mutableMapOf<String, ArrayList<Double>>() + var hostCpuIdleTimes = mutableMapOf<String, ArrayList<Long>>() + var hostCpuActiveTimes = mutableMapOf<String, ArrayList<Long>>() + var hostCpuStealTimes = mutableMapOf<String, ArrayList<Long>>() + var hostCpuLostTimes = mutableMapOf<String, ArrayList<Long>>() + + var hostGpuDemands = mutableMapOf<String, ArrayList<ArrayList<Double>>>() + var hostGpuSupplied = mutableMapOf<String, ArrayList<ArrayList<Double>>>() + var hostGpuIdleTimes = mutableMapOf<String, ArrayList<ArrayList<Long>>>() + var hostGpuActiveTimes = mutableMapOf<String, ArrayList<ArrayList<Long>>>() + var hostGpuStealTimes = mutableMapOf<String, ArrayList<ArrayList<Long>>>() + var hostGpuLostTimes = mutableMapOf<String, ArrayList<ArrayList<Long>>>() + var hostPowerDraws = mutableMapOf<String, ArrayList<Double>>() var hostEnergyUsages = mutableMapOf<String, ArrayList<Double>>() @@ -188,24 +218,39 @@ class TestComputeMonitor : ComputeMonitor { val hostName: String = reader.hostInfo.name if (!(hostName in hostCpuDemands)) { - hostIdleTimes[hostName] = ArrayList() - hostActiveTimes[hostName] = ArrayList() - hostStealTimes[hostName] = ArrayList() - hostLostTimes[hostName] = ArrayList() + hostCpuIdleTimes[hostName] = ArrayList() + hostCpuActiveTimes[hostName] = ArrayList() + hostCpuStealTimes[hostName] = ArrayList() + hostCpuLostTimes[hostName] = ArrayList() hostCpuDemands[hostName] = ArrayList() hostCpuSupplied[hostName] = ArrayList() hostPowerDraws[hostName] = ArrayList() hostEnergyUsages[hostName] = ArrayList() } - - hostIdleTimes[hostName]?.add(reader.cpuIdleTime) - hostActiveTimes[hostName]?.add(reader.cpuActiveTime) - hostStealTimes[hostName]?.add(reader.cpuStealTime) - hostLostTimes[hostName]?.add(reader.cpuLostTime) + if (hostName !in hostGpuDemands) { + hostGpuDemands[hostName] = ArrayList() + hostGpuSupplied[hostName] = ArrayList() + hostGpuIdleTimes[hostName] = ArrayList() + hostGpuActiveTimes[hostName] = ArrayList() + hostGpuStealTimes[hostName] = ArrayList() + hostGpuLostTimes[hostName] = ArrayList() + } hostCpuDemands[hostName]?.add(reader.cpuDemand) hostCpuSupplied[hostName]?.add(reader.cpuUsage) + hostCpuIdleTimes[hostName]?.add(reader.cpuIdleTime) + hostCpuActiveTimes[hostName]?.add(reader.cpuActiveTime) + hostCpuStealTimes[hostName]?.add(reader.cpuStealTime) + hostCpuLostTimes[hostName]?.add(reader.cpuLostTime) + + hostGpuDemands[hostName]?.add(reader.gpuDemands) + hostGpuSupplied[hostName]?.add(reader.gpuUsages) + hostGpuIdleTimes[hostName]?.add(reader.gpuIdleTimes) + hostGpuActiveTimes[hostName]?.add(reader.gpuActiveTimes) + hostGpuStealTimes[hostName]?.add(reader.gpuStealTimes) + hostGpuLostTimes[hostName]?.add(reader.gpuLostTimes) + hostPowerDraws[hostName]?.add(reader.powerDraw) hostEnergyUsages[hostName]?.add(reader.energyUsage) } diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/dual_gpu_host.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/dual_gpu_host.json new file mode 100644 index 00000000..c5271ff8 --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/dual_gpu_host.json @@ -0,0 +1,35 @@ +{ + "clusters": [ + { + "name": "C01", + "hosts": [ + { + "name": "DualGpuHost", + "cpu": { + "coreCount": 4, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": { + "coreCount": 2, + "coreSpeed": 2000 + }, + "gpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_full.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_full.json new file mode 100644 index 00000000..334100fc --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_full.json @@ -0,0 +1,39 @@ +{ + "clusters": + [ + { + "name": "C01", + "hosts" : + [ + { + "name": "H01", + "cpu": + { + "coreCount": 1, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": { + "count": 5, + "coreCount": 5120, + "coreSpeed": 5000, + "memorySize": 30517578125, + "memoryBandwidth": "900 GBps", + "vendor": "NVIDIA", + "modelName": "Tesla V100", + "architecture": "Volta" + } + } + ] + } + ] +} + diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_host.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_host.json new file mode 100644 index 00000000..719f0ab2 --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_host.json @@ -0,0 +1,36 @@ +{ + "clusters": [ + { + "name": "C01", + "hosts": [ + { + "name": "DualGpuHost", + "cpu": { + "coreCount": 4, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": { + "count": 2, + "coreCount": 1, + "coreSpeed": 2000 + }, + "gpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_memory.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_memory.json new file mode 100644 index 00000000..3757e641 --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_memory.json @@ -0,0 +1,36 @@ +{ + "clusters": + [ + { + "name": "C01", + "hosts" : + [ + { + "name": "H01", + "cpu": + { + "coreCount": 1, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": { + "count": 2, + "coreCount": 1, + "coreSpeed": 2000, + "vendor": "NVIDIA", + "modelName": "Tesla V100", + "architecture": "Volta" + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_vendor.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_vendor.json new file mode 100644 index 00000000..07aaac7c --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_vendor.json @@ -0,0 +1,36 @@ +{ + "clusters": + [ + { + "name": "C01", + "hosts" : + [ + { + "name": "H01", + "cpu": + { + "coreCount": 1, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": { + "count": 100, + "coreCount": 1, + "coreSpeed": 2000, + "memorySize": 4096, + "memoryBandwidth": 500 + } + } + ] + } + ] +} + diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_vendor_no_memory.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_vendor_no_memory.json new file mode 100644 index 00000000..3d036eef --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/multi_gpu_no_vendor_no_memory.json @@ -0,0 +1,34 @@ +{ + "clusters": + [ + { + "name": "C01", + "hosts" : + [ + { + "name": "H01", + "cpu": + { + "coreCount": 1, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": + { + "count": 3, + "coreCount": 1, + "coreSpeed": 2000 + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_full.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_full.json new file mode 100644 index 00000000..8e4c3546 --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_full.json @@ -0,0 +1,44 @@ +{ + "clusters": + [ + { + "name": "C01", + "hosts" : + [ + { + "name": "H01", + "cpu": + { + "coreCount": 1, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": + { + "coreCount": 5120, + "coreSpeed": 5000, + "memorySize": 30517578125, + "memoryBandwidth": "900 GBps", + "vendor": "NVIDIA", + "modelName": "Tesla V100", + "architecture": "Volta" + }, + "gpuPowerModel": { + "modelType": "linear", + "power": 800.0, + "idlePower": 300.0, + "maxPower": 600.0 + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_hosts.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_hosts.json new file mode 100644 index 00000000..44b83ef7 --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_hosts.json @@ -0,0 +1,61 @@ +{ + "clusters": [ + { + "name": "C01", + "hosts": [ + { + "name": "SingleGpuHost1", + "cpu": { + "coreCount": 2, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": { + "coreCount": 1, + "coreSpeed": 2000 + }, + "gpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + } + }, + { + "name": "SingleGpuHost2", + "cpu": { + "coreCount": 2, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": { + "coreCount": 1, + "coreSpeed": 2000 + }, + "gpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_memory.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_memory.json new file mode 100644 index 00000000..85be1e6e --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_memory.json @@ -0,0 +1,36 @@ +{ + "clusters": + [ + { + "name": "C01", + "hosts" : + [ + { + "name": "H01", + "cpu": + { + "coreCount": 1, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": + { + "coreCount": 1, + "coreSpeed": 2000, + "vendor": "NVIDIA", + "modelName": "Tesla V100", + "architecture": "Volta" + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_vendor.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_vendor.json new file mode 100644 index 00000000..b54fab75 --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_vendor.json @@ -0,0 +1,35 @@ +{ + "clusters": + [ + { + "name": "C01", + "hosts" : + [ + { + "name": "H01", + "cpu": + { + "coreCount": 1, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": + { + "coreCount": 1, + "coreSpeed": 2000, + "memorySize": 4096, + "memoryBandwidth": 500 + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_vendor_no_memory.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_vendor_no_memory.json new file mode 100644 index 00000000..ed01cf46 --- /dev/null +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_no_vendor_no_memory.json @@ -0,0 +1,33 @@ +{ + "clusters": + [ + { + "name": "C01", + "hosts" : + [ + { + "name": "H01", + "cpu": + { + "coreCount": 1, + "coreSpeed": 2000 + }, + "memory": { + "memorySize": 140457600000 + }, + "cpuPowerModel": { + "modelType": "linear", + "power": 400.0, + "idlePower": 100.0, + "maxPower": 200.0 + }, + "gpu": + { + "coreCount": 1, + "coreSpeed": 2000 + } + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment1.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment1.json index 8835faeb..ad12a3e5 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment1.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment1.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment2.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment2.json index 8882af09..cbddf7f8 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment2.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment2.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment3.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment3.json index d78626f1..06a2163c 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment3.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment3.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment4.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment4.json index cb0ef4e5..c6e67b6b 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment4.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/batteries/experiment4.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000.json index ac9a3082..36a1efd7 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_BE.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_BE.json index 3a04b275..1eb20867 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_BE.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_BE.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_DE.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_DE.json index 651e8b54..d11ecc2f 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_DE.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_DE.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_FR.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_FR.json index fed097e9..ebec67e5 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_FR.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_FR.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_NL.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_NL.json index 05805c88..8f5ba1c6 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_NL.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_1_2000_NL.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_2_2000.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_2_2000.json index 24ab0bcd..e34e0256 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_2_2000.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_2_2000.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_50_big.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_50_big.json index 676d4f3d..47c633c9 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_50_big.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_50_big.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_50_big_BE.json b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_50_big_BE.json index d2c19861..fe4e4813 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_50_big_BE.json +++ b/opendc-experiments/opendc-experiments-base/src/test/resources/topologies/single_50_big_BE.json @@ -15,7 +15,7 @@ "memory": { "memorySize": 140457600000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "linear", "power": 400.0, "idlePower": 100.0, diff --git a/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_asymptotic1.json b/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_asymptotic1.json index 884d27a4..7f5d5f0e 100644 --- a/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_asymptotic1.json +++ b/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_asymptotic1.json @@ -13,7 +13,7 @@ "memory": { "memorySize": 128000000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "asymptotic", "power": 400.0, "idlePower": 32.0, diff --git a/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_asymptotic2.json b/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_asymptotic2.json index 612d5e5a..ad6bdc35 100644 --- a/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_asymptotic2.json +++ b/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_asymptotic2.json @@ -13,7 +13,7 @@ "memory": { "memorySize": 128000000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "asymptotic", "power": 400.0, "idlePower": 32.0, diff --git a/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_mse.json b/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_mse.json index 0cfef148..25922d7d 100644 --- a/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_mse.json +++ b/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_mse.json @@ -13,7 +13,7 @@ "memory": { "memorySize": 128000000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "mse", "power": 400.0, "idlePower": 32.0, diff --git a/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_sqrt.json b/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_sqrt.json index c02c8b80..55b4e0ef 100644 --- a/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_sqrt.json +++ b/opendc-experiments/opendc-experiments-m3sa/src/test/resources/topologies/experiment1/topology_sqrt.json @@ -13,7 +13,7 @@ "memory": { "memorySize": 128000000 }, - "powerModel": { + "cpuPowerModel": { "modelType": "sqrt", "power": 400.0, "idlePower": 32.0, diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/ComputeResource.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/ComputeResource.java new file mode 100644 index 00000000..1167cf06 --- /dev/null +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/ComputeResource.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2025 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.compute; + +import org.opendc.simulator.compute.machine.PerformanceCounters; + +public interface ComputeResource { + + public int getId(); + + public PerformanceCounters getPerformanceCounters(); + + public double getCapacity(); + + public double getDemand(); + + public double getSupply(); +} diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/SimCpu.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/SimCpu.java index 1a56650e..5669eb16 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/SimCpu.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/SimCpu.java @@ -24,21 +24,29 @@ package org.opendc.simulator.compute.cpu; import java.util.List; import java.util.Map; +import org.opendc.common.ResourceType; +import org.opendc.simulator.compute.ComputeResource; import org.opendc.simulator.compute.machine.PerformanceCounters; import org.opendc.simulator.compute.models.CpuModel; +import org.opendc.simulator.compute.power.PowerModel; import org.opendc.simulator.engine.engine.FlowEngine; import org.opendc.simulator.engine.graph.FlowConsumer; import org.opendc.simulator.engine.graph.FlowEdge; import org.opendc.simulator.engine.graph.FlowNode; import org.opendc.simulator.engine.graph.FlowSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A {@link SimCpu} of a machine. */ -public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer { +public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer, ComputeResource { + + private static final Logger LOGGER = LoggerFactory.getLogger(SimCpu.class); + private int id; private final CpuModel cpuModel; - private final CpuPowerModel cpuPowerModel; + private final PowerModel cpuPowerModel; private double currentCpuDemand = 0.0f; // cpu capacity demanded by the mux private double currentCpuUtilization = 0.0f; @@ -60,6 +68,10 @@ public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer // Basic Getters and Setters //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + public int getId() { + return id; + } + public double getFrequency() { return cpuModel.getTotalCapacity(); } @@ -87,7 +99,7 @@ public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer return this.currentCpuDemand; } - public double getSpeed() { + public double getSupply() { return this.currentCpuSupplied; } @@ -104,8 +116,9 @@ public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer // Constructors //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - public SimCpu(FlowEngine engine, CpuModel cpuModel, CpuPowerModel powerModel, int id) { + public SimCpu(FlowEngine engine, CpuModel cpuModel, PowerModel powerModel, int id) { super(engine); + this.id = id; this.cpuModel = cpuModel; this.maxCapacity = this.cpuModel.getTotalCapacity(); @@ -135,7 +148,7 @@ public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer this.currentCpuSupplied = Math.min(this.currentCpuDemand, this.maxCapacity); - this.pushOutgoingSupply(this.distributorEdge, this.currentCpuSupplied); + this.pushOutgoingSupply(this.distributorEdge, this.currentCpuSupplied, ResourceType.CPU); return Long.MAX_VALUE; } @@ -161,14 +174,14 @@ public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer final double factor = this.cpuFrequencyInv * delta; - this.performanceCounters.addCpuActiveTime(Math.round(rate * factor)); - this.performanceCounters.addCpuIdleTime(Math.round((capacity - rate) * factor)); - this.performanceCounters.addCpuStealTime(Math.round((demand - rate) * factor)); + this.performanceCounters.addActiveTime(Math.round(rate * factor)); + this.performanceCounters.addIdleTime(Math.round((capacity - rate) * factor)); + this.performanceCounters.addStealTime(Math.round((demand - rate) * factor)); } - this.performanceCounters.setCpuDemand(this.currentCpuDemand); - this.performanceCounters.setCpuSupply(this.currentCpuSupplied); - this.performanceCounters.setCpuCapacity(this.maxCapacity); + this.performanceCounters.setDemand(this.currentCpuDemand); + this.performanceCounters.setSupply(this.currentCpuSupplied); + this.performanceCounters.setCapacity(this.maxCapacity); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -182,7 +195,7 @@ public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer public void pushOutgoingDemand(FlowEdge supplierEdge, double newPowerDemand) { updateCounters(); this.currentPowerDemand = newPowerDemand; - this.psuEdge.pushDemand(newPowerDemand); + this.psuEdge.pushDemand(newPowerDemand, false, ResourceType.CPU); } /** @@ -193,7 +206,15 @@ public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer updateCounters(); this.currentCpuSupplied = newCpuSupply; - this.distributorEdge.pushSupply(newCpuSupply, true); + this.distributorEdge.pushSupply(newCpuSupply, true, ResourceType.CPU); + } + + @Override + public void pushOutgoingSupply(FlowEdge consumerEdge, double newCpuSupply, ResourceType resourceType) { + updateCounters(); + this.currentCpuSupplied = newCpuSupply; + + this.distributorEdge.pushSupply(newCpuSupply, true, resourceType); } /** @@ -265,4 +286,14 @@ public final class SimCpu extends FlowNode implements FlowSupplier, FlowConsumer FlowEdge.NodeType.CONSUMING, List.of(this.psuEdge), FlowEdge.NodeType.SUPPLYING, List.of(this.distributorEdge)); } + + @Override + public ResourceType getSupplierResourceType() { + return ResourceType.CPU; + } + + @Override + public ResourceType getConsumerResourceType() { + return ResourceType.CPU; + } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/gpu/SimGpu.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/gpu/SimGpu.java new file mode 100644 index 00000000..c5778dc0 --- /dev/null +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/gpu/SimGpu.java @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2024 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.compute.gpu; + +import java.util.List; +import java.util.Map; +import org.opendc.common.ResourceType; +import org.opendc.simulator.compute.ComputeResource; +import org.opendc.simulator.compute.machine.PerformanceCounters; +import org.opendc.simulator.compute.models.GpuModel; +import org.opendc.simulator.compute.power.PowerModel; +import org.opendc.simulator.engine.engine.FlowEngine; +import org.opendc.simulator.engine.graph.FlowConsumer; +import org.opendc.simulator.engine.graph.FlowEdge; +import org.opendc.simulator.engine.graph.FlowNode; +import org.opendc.simulator.engine.graph.FlowSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link SimGpu} of a machine. + */ +public final class SimGpu extends FlowNode implements FlowSupplier, FlowConsumer, ComputeResource { + private static final Logger LOGGER = LoggerFactory.getLogger(SimGpu.class); + private final int id; + private final GpuModel gpuModel; + + private final PowerModel gpuPowerModel; + + private double currentGpuDemand = 0.0f; // cpu capacity demanded by the mux + private double currentGpuUtilization = 0.0f; + private double currentGpuSupplied = 0.0f; // cpu capacity supplied to the mux + + private double currentPowerDemand; // power demanded of the psu + private double currentPowerSupplied = 0.0f; // cpu capacity supplied by the psu + + private double maxCapacity; + + private final PerformanceCounters performanceCounters = new PerformanceCounters(); + private long lastCounterUpdate; + private final double gpuFrequencyInv; + + private FlowEdge distributorEdge; + private FlowEdge psuEdge; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Basic Getters and Setters + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + public double getFrequency() { + return gpuModel.getTotalCoreCapacity(); + } + + public int getId() { + return id; + } + + @Override + public double getCapacity() { + return maxCapacity; + } // TODO: take memory into account + + public PerformanceCounters getPerformanceCounters() { + return performanceCounters; + } + + public double getPowerDraw() { + return this.currentPowerSupplied; + } + + public double getDemand() { + return this.currentGpuDemand; + } + + // TODO: take memory into account + public double getSupply() { + return this.currentGpuSupplied; + } // TODO: take memory into account + + public GpuModel getGpuModel() { + return gpuModel; + } + + @Override + public String toString() { + return "SimBareMetalMachine.Gpu[model=" + gpuModel + "]"; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Constructors + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + public SimGpu(FlowEngine engine, GpuModel gpuModel, PowerModel powerModel, int id) { + super(engine); + this.id = id; + this.gpuModel = gpuModel; + this.maxCapacity = this.gpuModel.getTotalCoreCapacity(); + + this.gpuPowerModel = powerModel; + + this.lastCounterUpdate = clock.millis(); + + this.gpuFrequencyInv = 1 / this.maxCapacity; + + this.currentPowerDemand = this.gpuPowerModel.computePower(this.currentGpuUtilization); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // FlowNode related functionality + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + @Override + public long onUpdate(long now) { + updateCounters(now); + + // Check if supply == demand + if (this.currentPowerDemand != this.currentPowerSupplied) { + this.pushOutgoingDemand(this.psuEdge, this.currentPowerDemand); + + return Long.MAX_VALUE; + } + + this.currentGpuSupplied = Math.min(this.currentGpuDemand, this.maxCapacity); + this.pushOutgoingSupply(this.distributorEdge, this.currentGpuSupplied); + + return Long.MAX_VALUE; + } + + public void updateCounters() { + this.updateCounters(this.clock.millis()); + } + + /** + * Update the performance counters of the GPU. + * + * @param now The timestamp at which to update the counter. + */ + public void updateCounters(long now) { + long lastUpdate = this.lastCounterUpdate; + this.lastCounterUpdate = now; + long delta = now - lastUpdate; + + if (delta > 0) { + double demand = this.currentGpuDemand; + double rate = this.currentGpuSupplied; + double capacity = this.maxCapacity; + + final double factor = this.gpuFrequencyInv * delta; + + this.performanceCounters.addActiveTime(Math.round(rate * factor)); + this.performanceCounters.addIdleTime(Math.round((capacity - rate) * factor)); + this.performanceCounters.addStealTime(Math.round((demand - rate) * factor)); + } + + this.performanceCounters.setDemand(this.currentGpuDemand); + this.performanceCounters.setSupply(this.currentGpuSupplied); + this.performanceCounters.setCapacity(this.maxCapacity); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // FlowGraph Related functionality + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Push new demand to the psu + */ + @Override + public void pushOutgoingDemand(FlowEdge supplierEdge, double newPowerDemand) { + updateCounters(); + this.currentPowerDemand = newPowerDemand; + this.psuEdge.pushDemand(newPowerDemand, false, ResourceType.GPU); + } + + /** + * Push updated supply to the mux + */ + @Override + public void pushOutgoingSupply(FlowEdge consumerEdge, double newGpuSupply) { + updateCounters(); + this.currentGpuSupplied = newGpuSupply; + + this.distributorEdge.pushSupply(newGpuSupply, true, ResourceType.GPU); + } + + /** + * Push updated supply to the mux + */ + @Override + public void pushOutgoingSupply(FlowEdge consumerEdge, double newGpuSupply, ResourceType resourceType) { + updateCounters(); + this.currentGpuSupplied = newGpuSupply; + + this.distributorEdge.pushSupply(newGpuSupply, true, resourceType); + } + + /** + * Handle new demand coming in from the mux + */ + @Override + public void handleIncomingDemand(FlowEdge consumerEdge, double newGpuDemand) { + updateCounters(); + this.currentGpuDemand = newGpuDemand; + + this.currentGpuUtilization = Math.min(this.currentGpuDemand / this.maxCapacity, 1.0); + + // Calculate Power Demand and send to PSU + this.currentPowerDemand = this.gpuPowerModel.computePower(this.currentGpuUtilization); + + this.invalidate(); + } + + /** + * Handle updated supply from the psu + */ + @Override + public void handleIncomingSupply(FlowEdge supplierEdge, double newPowerSupply) { + updateCounters(); + this.currentPowerSupplied = newPowerSupply; + + this.invalidate(); + } + + /** + * Add a connection to the mux + */ + @Override + public void addConsumerEdge(FlowEdge consumerEdge) { + this.distributorEdge = consumerEdge; + } + + /** + * Add a connection to the psu + */ + @Override + public void addSupplierEdge(FlowEdge supplierEdge) { + this.psuEdge = supplierEdge; + + this.invalidate(); + } + + /** + * Remove the connection to the mux + */ + @Override + public void removeConsumerEdge(FlowEdge consumerEdge) { + this.distributorEdge = null; + this.invalidate(); + } + + /** + * Remove the connection to the psu + */ + @Override + public void removeSupplierEdge(FlowEdge supplierEdge) { + this.psuEdge = null; + this.invalidate(); + } + + @Override + public Map<FlowEdge.NodeType, List<FlowEdge>> getConnectedEdges() { + return Map.of( + FlowEdge.NodeType.CONSUMING, List.of(this.psuEdge), + FlowEdge.NodeType.SUPPLYING, List.of(this.distributorEdge)); + } + + @Override + public ResourceType getSupplierResourceType() { + return ResourceType.GPU; + } + + @Override + public ResourceType getConsumerResourceType() { + return ResourceType.GPU; + } +} diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/machine/PerformanceCounters.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/machine/PerformanceCounters.java index f5b8d27d..93033bc0 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/machine/PerformanceCounters.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/machine/PerformanceCounters.java @@ -23,80 +23,97 @@ package org.opendc.simulator.compute.machine; public class PerformanceCounters { - private long cpuActiveTime = 0; - private long cpuIdleTime = 0; - private long cpuStealTime = 0; - private long cpuLostTime = 0; - private double cpuCapacity = 0.0f; - private double cpuDemand = 0.0f; - private double cpuSupply = 0.0f; + private long activeTime = 0; + private long idleTime = 0; + private long stealTime = 0; + private long lostTime = 0; - public long getCpuActiveTime() { - return cpuActiveTime; + private double capacity = 0.0f; + private double demand = 0.0f; + private double supply = 0.0f; + + public long getActiveTime() { + return this.activeTime; + } + + public long getIdleTime() { + return this.idleTime; + } + + public long getStealTime() { + return this.stealTime; + } + + public long getLostTime() { + return this.lostTime; + } + + public double getCapacity() { + return this.capacity; } - public void setCpuActiveTime(long cpuActiveTime) { - this.cpuActiveTime = cpuActiveTime; + public double getDemand() { + return this.demand; } - public void addCpuActiveTime(long cpuActiveTime) { - this.cpuActiveTime += cpuActiveTime; + public double getSupply() { + return this.supply; } - public long getCpuIdleTime() { - return cpuIdleTime; + public void setActiveTime(long activeTime) { + this.activeTime = activeTime; } - public void setCpuIdleTime(long cpuIdleTime) { - this.cpuIdleTime = cpuIdleTime; + public void setIdleTime(long idleTime) { + this.idleTime = idleTime; } - public void addCpuIdleTime(long cpuIdleTime) { - this.cpuIdleTime += cpuIdleTime; + public void setStealTime(long stealTime) { + this.stealTime = stealTime; } - public long getCpuStealTime() { - return cpuStealTime; + public void setLostTime(long lostTime) { + this.lostTime = lostTime; } - public void setCpuStealTime(long cpuStealTime) { - this.cpuStealTime = cpuStealTime; + public void setCapacity(double capacity) { + this.capacity = capacity; } - public void addCpuStealTime(long cpuStealTime) { - this.cpuStealTime += cpuStealTime; + public void setDemand(double demand) { + this.demand = demand; } - public long getCpuLostTime() { - return cpuLostTime; + public void setSupply(double supply) { + this.supply = supply; } - public void setCpuLostTime(long cpuLostTime) { - this.cpuLostTime = cpuLostTime; + public void addActiveTime(long activeTime) { + this.activeTime += activeTime; } - public double getCpuCapacity() { - return cpuCapacity; + public void addIdleTime(long idleTime) { + this.idleTime += idleTime; } - public void setCpuCapacity(double cpuCapacity) { - this.cpuCapacity = cpuCapacity; + public void addStealTime(long stealTime) { + this.stealTime += stealTime; } - public double getCpuDemand() { - return cpuDemand; + public void addLostTime(long lostTime) { + this.lostTime += lostTime; } - public void setCpuDemand(double cpuDemand) { - this.cpuDemand = cpuDemand; + public void addCapacity(double capacity) { + this.capacity += capacity; } - public double getCpuSupply() { - return cpuSupply; + public void addDemand(double demand) { + this.demand += demand; } - public void setCpuSupply(double cpuSupply) { - this.cpuSupply = cpuSupply; + public void addSupply(double supply) { + this.supply += supply; } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/machine/SimMachine.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/machine/SimMachine.java index 8baa7f34..8792552e 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/machine/SimMachine.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/machine/SimMachine.java @@ -23,18 +23,29 @@ package org.opendc.simulator.compute.machine; import java.time.InstantSource; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.List; import java.util.function.Consumer; -import org.opendc.simulator.compute.cpu.CpuPowerModel; +import org.jetbrains.annotations.Nullable; +import org.opendc.common.ResourceType; +import org.opendc.simulator.compute.ComputeResource; import org.opendc.simulator.compute.cpu.SimCpu; +import org.opendc.simulator.compute.gpu.SimGpu; import org.opendc.simulator.compute.memory.Memory; +import org.opendc.simulator.compute.models.GpuModel; import org.opendc.simulator.compute.models.MachineModel; +import org.opendc.simulator.compute.power.PowerModel; import org.opendc.simulator.compute.power.SimPsu; import org.opendc.simulator.compute.workload.ChainWorkload; import org.opendc.simulator.compute.workload.SimWorkload; import org.opendc.simulator.compute.workload.VirtualMachine; import org.opendc.simulator.engine.engine.FlowEngine; +import org.opendc.simulator.engine.graph.FlowConsumer; import org.opendc.simulator.engine.graph.FlowDistributor; import org.opendc.simulator.engine.graph.FlowEdge; +import org.opendc.simulator.engine.graph.FlowNode; +import org.opendc.simulator.engine.graph.FlowSupplier; /** * A machine that is able to execute {@link SimWorkload} objects. @@ -45,19 +56,63 @@ public class SimMachine { private final InstantSource clock; - private SimCpu cpu; - private FlowDistributor cpuDistributor; private SimPsu psu; private Memory memory; + private final Hashtable<ResourceType, FlowDistributor> distributors = new Hashtable<>(); + + private final Hashtable<ResourceType, ArrayList<ComputeResource>> computeResources = new Hashtable<>(); + private final List<ResourceType> availableResources; + private final Consumer<Exception> completion; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Basic Getters and Setters //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + public ComputeResource getResource(ResourceType resourceType, int id) { + if (!this.computeResources.containsKey(resourceType)) { + throw new RuntimeException("No such resource type: " + resourceType); + } + for (ComputeResource resource : this.computeResources.get(resourceType)) { + if (resource.getId() == id) { + return resource; + } + } + throw new RuntimeException("No such resource with id: " + id + " of type: " + resourceType); + } + + public ArrayList<ComputeResource> getResources(ResourceType resourceType) { + if (!this.computeResources.containsKey(resourceType)) { + throw new RuntimeException("No such resource type: " + resourceType); + } + return this.computeResources.get(resourceType); + } + public PerformanceCounters getPerformanceCounters() { - return this.cpu.getPerformanceCounters(); + + return this.computeResources.get(ResourceType.CPU).getFirst().getPerformanceCounters(); + } + + public List<PerformanceCounters> getGpuPerformanceCounters() { + List<PerformanceCounters> counters = new ArrayList<>(); + List<ComputeResource> gpus = this.computeResources.get(ResourceType.GPU) == null + ? new ArrayList<>() + : this.computeResources.get(ResourceType.GPU); + + for (ComputeResource gpu : gpus) { + counters.add(gpu.getPerformanceCounters()); + } + return counters; + } + + public PerformanceCounters getGpuPerformanceCounters(int GpuId) { + for (ComputeResource gpu : this.computeResources.get(ResourceType.GPU)) { + if (gpu.getId() == GpuId) { + return gpu.getPerformanceCounters(); + } + } + throw new RuntimeException("No such gpu id: " + GpuId); } public MachineModel getMachineModel() { @@ -73,7 +128,7 @@ public class SimMachine { } public SimCpu getCpu() { - return cpu; + return (SimCpu) this.computeResources.get(ResourceType.CPU).getFirst(); } public Memory getMemory() { @@ -84,6 +139,28 @@ public class SimMachine { return psu; } + public ArrayList<SimGpu> getGpus() { + ArrayList<SimGpu> gpus = new ArrayList<>(); + if (!this.computeResources.containsKey(ResourceType.GPU)) { + return gpus; + } + for (ComputeResource gpu : this.computeResources.get(ResourceType.GPU)) { + if (gpu instanceof SimGpu) { + gpus.add((SimGpu) gpu); + } + } + return gpus; + } + + public SimGpu getGpu(int gpuId) { + for (ComputeResource gpu : this.computeResources.get(ResourceType.GPU)) { + if (gpu.getId() == gpuId) { + return (SimGpu) gpu; + } + } + throw new RuntimeException("No such gpu id: " + gpuId); + } + /** * Return the CPU capacity of the hypervisor in MHz. */ @@ -105,6 +182,10 @@ public class SimMachine { return 0.0; } + public List<ResourceType> getAvailableResources() { + return availableResources; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Constructors //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -113,27 +194,47 @@ public class SimMachine { FlowEngine engine, MachineModel machineModel, FlowDistributor powerDistributor, - CpuPowerModel cpuPowerModel, + PowerModel cpuPowerModel, + @Nullable PowerModel gpuPowerModel, Consumer<Exception> completion) { this.engine = engine; this.machineModel = machineModel; this.clock = engine.getClock(); + this.availableResources = this.machineModel.getUsedResources(); + // Create the psu and cpu and connect them this.psu = new SimPsu(engine); - new FlowEdge(this.psu, powerDistributor); - this.cpu = new SimCpu(engine, this.machineModel.getCpuModel(), cpuPowerModel, 0); - - new FlowEdge(this.cpu, this.psu); + this.computeResources.put( + ResourceType.CPU, + new ArrayList<>(List.of(new SimCpu(engine, this.machineModel.getCpuModel(), cpuPowerModel, 0)))); - this.memory = new Memory(engine, this.machineModel.getMemory()); + new FlowEdge((FlowConsumer) this.computeResources.get(ResourceType.CPU).getFirst(), this.psu); // Create a FlowDistributor and add the cpu as supplier - this.cpuDistributor = new FlowDistributor(engine); + this.distributors.put(ResourceType.CPU, new FlowDistributor(engine)); + new FlowEdge(this.distributors.get(ResourceType.CPU), (FlowSupplier) + this.computeResources.get(ResourceType.CPU).getFirst()); - new FlowEdge(this.cpuDistributor, this.cpu); + // TODO: include memory as flow node + this.memory = new Memory(engine, this.machineModel.getMemory()); + + if (this.availableResources.contains(ResourceType.GPU)) { + this.distributors.put(ResourceType.GPU, new FlowDistributor(engine)); + short i = 0; + ArrayList<ComputeResource> gpus = new ArrayList<>(); + + for (GpuModel gpuModel : machineModel.getGpuModels()) { + SimGpu gpu = new SimGpu(engine, gpuModel, gpuPowerModel, i); + gpus.add(gpu); + // suspends here without the distributor + new FlowEdge(this.distributors.get(ResourceType.GPU), gpu); + new FlowEdge(gpu, this.psu); + } + this.computeResources.put(ResourceType.GPU, gpus); + } this.completion = completion; } @@ -149,14 +250,20 @@ public class SimMachine { this.psu.closeNode(); this.psu = null; - this.cpu.closeNode(); - this.cpu = null; - - this.cpuDistributor.closeNode(); - this.cpuDistributor = null; - + // Close resource Flow Nodes + for (List<ComputeResource> resources : this.computeResources.values()) { + for (ComputeResource resource : resources) { + ((FlowNode) resource).closeNode(); + } + resources.clear(); + } this.memory = null; + for (ResourceType resourceType : this.distributors.keySet()) { + this.distributors.get(resourceType).closeNode(); + } + this.distributors.clear(); + this.completion.accept(cause); } @@ -180,6 +287,12 @@ public class SimMachine { * @param completion The completion callback that needs to be called when the workload is done */ public VirtualMachine startWorkload(ChainWorkload workload, Consumer<Exception> completion) { - return (VirtualMachine) workload.startWorkload(this.cpuDistributor, this, completion); + + ArrayList<FlowSupplier> distributors = new ArrayList<>(); + for (ResourceType resourceType : this.availableResources) { + distributors.add(this.distributors.get(resourceType)); + } + + return (VirtualMachine) workload.startWorkload(distributors, this, completion); } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/CpuModel.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/CpuModel.java index ab829bc4..903a985e 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/CpuModel.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/CpuModel.java @@ -130,7 +130,7 @@ public final class CpuModel { @Override public String toString() { - return "ProcessingUnit[" + "id= " + id + ", coreCount= " + coreCount + ", coreSpeed= " + coreSpeed + return "ProcessingUnit[" + "id= " + id + ", cpuCoreCount= " + coreCount + ", coreSpeed= " + coreSpeed + ", frequency= " + totalCapacity + ", vendor= " + vendor + ", modelName= " + modelName + ", arch= " + arch + "]"; } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/GpuModel.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/GpuModel.java new file mode 100644 index 00000000..b804b061 --- /dev/null +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/GpuModel.java @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.compute.models; + +import java.util.Objects; + +/** + * A single logical compute unit of processor node, either virtual or physical. + */ +public final class GpuModel { + private final int id; + private final int coreCount; + private final double coreSpeed; + private final double totalCoreCapacity; + private final double memoryBandwidth; + private final long memorySize; + private final String vendor; + private final String modelName; + private final String arch; + + /** + * Construct a {@link GpuModel} instance. + * + * @param id The identifier of the GPU core within the processing node. + * @param coreCount The number of cores present in the GPU + * @param coreSpeed The speed of a single core + * @param memoryBandwidth The speed of the memory in MHz + * @param memorySize The memory size of the GPU + * @param vendor The vendor of the GPU + * @param modelName The name of the GPU + * @param arch The architecture of the GPU + */ + public GpuModel( + int id, + int coreCount, + double coreSpeed, + double memoryBandwidth, + long memorySize, + String vendor, + String modelName, + String arch) { + this.id = id; + this.coreCount = coreCount; + this.coreSpeed = coreSpeed; + this.memoryBandwidth = memoryBandwidth; + this.memorySize = memorySize; + this.totalCoreCapacity = coreSpeed * coreCount; + this.vendor = vendor; + this.modelName = modelName; + this.arch = arch; + } + + /** + * Construct a {@link GpuModel} instance. Purely as a processing unit + * + * @param id The identifier of the GPU core within the processing node. + * @param coreCount The number of cores present in the GPU + * @param coreSpeed The speed of a single core + */ + public GpuModel(int id, int coreCount, double coreSpeed) { + this(id, coreCount, coreSpeed, 0, 0, "unkown", "unkown", "unkown"); + } + + public GpuModel(int id, int coreCount, double coreSpeed, double memoryBandwidth, long memorySize) { + this(id, coreCount, coreSpeed, memoryBandwidth, memorySize, "unkown", "unkown", "unkown"); + } + + /** + * Return the identifier of the GPU core within the processing node. + */ + public int getId() { + return id; + } + + /** + * Return the number of logical GPUs in the processor node. + */ + public int getCoreCount() { + return coreCount; + } + + /** + * Return the clock rate of a single core of the GPU MHz. + */ + public double getCoreSpeed() { + return coreSpeed; + } + + /** + * Return the clock rate of the GPU in MHz. + */ + public double getTotalCoreCapacity() { + return totalCoreCapacity; + } + + /** + * Return the speed of the memory in Mhz. + */ + public double getMemoryBandwidth() { + return memoryBandwidth; + } + + /** + * Return the size of the memory in MB. + */ + public long getMemorySize() { + return memorySize; + } + + /** + * Return the vendor of the storage device. + */ + public String getVendor() { + return vendor; + } + + /** + * Return the model name of the device. + */ + public String getModelName() { + return modelName; + } + + /** + * Return the micro-architecture of the processor node. + */ + public String getArchitecture() { + return arch; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + GpuModel that = (GpuModel) o; + return id == that.id + && Double.compare(that.totalCoreCapacity, totalCoreCapacity) == 0 + && Double.compare(that.coreSpeed, coreSpeed) == 0 + && Double.compare(that.memoryBandwidth, memoryBandwidth) == 0 + && Double.compare(that.memorySize, memorySize) == 0 + && Objects.equals(vendor, that.vendor) + && Objects.equals(modelName, that.modelName) + && Objects.equals(arch, that.arch); + } + + @Override + public int hashCode() { + return Objects.hash( + id, coreCount, coreSpeed, totalCoreCapacity, memoryBandwidth, memorySize, vendor, modelName, arch); + } + + @Override + public String toString() { + return "ProcessingUnit[" + "id= " + id + ", gpuCoreCount= " + coreCount + ", gpuCoreSpeed= " + coreSpeed + + ", frequency= " + totalCoreCapacity + ", gpuMemoryBandwidth" + memoryBandwidth + ", gpuMemorySize" + + memorySize + ", vendor= " + vendor + ", modelName= " + modelName + ", arch= " + + arch + "]"; + } +} diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/MachineModel.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/MachineModel.java index 6c47fbe6..874194f6 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/MachineModel.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/models/MachineModel.java @@ -22,8 +22,12 @@ package org.opendc.simulator.compute.models; +import java.util.ArrayList; import java.util.List; import java.util.Objects; +import org.jetbrains.annotations.Nullable; +import org.opendc.common.ResourceType; +import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicy; /** * A description of the physical or virtual machine on which a bootable image runs. @@ -31,16 +35,46 @@ import java.util.Objects; public final class MachineModel { private final CpuModel cpuModel; private final MemoryUnit memory; - + // private final List<GpuModel> gpuModels = new ArrayList<>(); // TODO: Implement multi GPU support + private final List<GpuModel> gpuModels; + private final DistributionPolicy cpuDistributionStrategy; + private final DistributionPolicy gpuDistributionPolicy; + private final List<ResourceType> availableResources = new ArrayList<>(); /** * Construct a {@link MachineModel} instance. * * @param cpuModel The cpu available to the image. * @param memory The list of memory units available to the image. */ - public MachineModel(CpuModel cpuModel, MemoryUnit memory) { + public MachineModel( + CpuModel cpuModel, + MemoryUnit memory, + @Nullable List<GpuModel> gpuModels, + DistributionPolicy cpuDistributionPolicy, + DistributionPolicy gpuDistributionPolicy) { this.cpuModel = cpuModel; this.memory = memory; + this.cpuDistributionStrategy = cpuDistributionPolicy; + this.gpuDistributionPolicy = gpuDistributionPolicy; + this.availableResources.add(ResourceType.CPU); + // TODO: Add Memory + // this.usedResources.add(ResourceType.Memory); + if (gpuModels != null && !gpuModels.isEmpty()) { + // this.gpuModels = gpuModels; + this.gpuModels = new ArrayList<>(); + this.gpuModels.add(new GpuModel( + 0, + gpuModels.getFirst().getCoreCount() * gpuModels.size(), // merges multiple GPUs into one + gpuModels.getFirst().getCoreSpeed(), + gpuModels.getFirst().getMemoryBandwidth(), + gpuModels.getFirst().getMemorySize() * gpuModels.size(), // merges multiple GPUs into one + gpuModels.getFirst().getVendor(), + gpuModels.getFirst().getModelName(), + gpuModels.getFirst().getArchitecture())); + this.availableResources.add(ResourceType.GPU); + } else { + this.gpuModels = new ArrayList<>(); + } } /** @@ -61,7 +95,40 @@ public final class MachineModel { cpus.get(0).getVendor(), cpus.get(0).getModelName(), cpus.get(0).getArchitecture()), - memory); + memory, + null, + null, + null); + } + + /** + * Construct a {@link MachineModel} instance. + * A list of the same cpus, are automatically converted to a single CPU with the number of cores of + * all cpus in the list combined. + * + * @param cpus The list of processing units available to the image. + * @param memory The list of memory units available to the image. + * @param gpus The list of GPUs available to the image. + */ + public MachineModel( + List<CpuModel> cpus, + MemoryUnit memory, + List<GpuModel> gpus, + DistributionPolicy cpuDistributionPolicy, + DistributionPolicy gpuDistributionPolicy) { + + this( + new CpuModel( + cpus.get(0).getId(), + cpus.get(0).getCoreCount() * cpus.size(), // merges multiple CPUs into one + cpus.get(0).getCoreSpeed(), + cpus.get(0).getVendor(), + cpus.get(0).getModelName(), + cpus.get(0).getArchitecture()), + memory, + gpus != null ? gpus : new ArrayList<>(), + cpuDistributionPolicy, + gpuDistributionPolicy); } /** @@ -78,21 +145,60 @@ public final class MachineModel { return memory; } + public List<GpuModel> getGpuModels() { + return gpuModels; + } + + /** + * Return specific GPU model by id. + * @param modelId The id of the GPU model to return. + * @return The GPU model with the given id, or null if not found. + */ + public GpuModel getGpuModel(int modelId) { + for (GpuModel gpuModel : gpuModels) { + if (gpuModel.getId() == modelId) { + return gpuModel; + } + } + return null; + } + + /** + * Return the distribution strategy for the CPU. + */ + public DistributionPolicy getCpuDistributionStrategy() { + return cpuDistributionStrategy; + } + + /** + * Return the distribution strategy for the GPU. + */ + public DistributionPolicy getGpuDistributionStrategy() { + return gpuDistributionPolicy; + } + + /** + * Return the resources of this machine. + */ + public List<ResourceType> getUsedResources() { + return availableResources; + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; MachineModel that = (MachineModel) o; - return cpuModel.equals(that.cpuModel) && memory.equals(that.memory); + return cpuModel.equals(that.cpuModel) && memory.equals(that.memory) && gpuModels.equals(that.gpuModels); } @Override public int hashCode() { - return Objects.hash(cpuModel, memory); + return Objects.hash(cpuModel, memory, gpuModels); } @Override public String toString() { - return "MachineModel[cpus=" + cpuModel + ",memory=" + memory + "]"; + return "MachineModel[cpus=" + cpuModel + ",memory=" + memory + ",gpus=" + gpuModels + "]"; } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/CpuPowerModel.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/PowerModel.java index 4323294e..597b6fc3 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/CpuPowerModel.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/PowerModel.java @@ -20,16 +20,16 @@ * SOFTWARE. */ -package org.opendc.simulator.compute.cpu; +package org.opendc.simulator.compute.power; import org.opendc.simulator.compute.machine.SimMachine; /** - * A model for estimating the power usage of a {@link SimMachine} based on the CPU usage. + * A model for estimating the power usage of a {@link SimMachine} based on the resource usage. */ -public interface CpuPowerModel { +public interface PowerModel { /** - * Computes CPU power consumption for each host. + * Computes resource power consumption for each host. * * @param utilization The CPU utilization percentage. * @return A double value of CPU power consumption (in W). diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/CpuPowerModels.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/PowerModels.java index b91bd7e2..af532908 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/CpuPowerModels.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/PowerModels.java @@ -20,118 +20,118 @@ * SOFTWARE. */ -package org.opendc.simulator.compute.cpu; +package org.opendc.simulator.compute.power; import java.util.Arrays; /** * A collection {@link CpuPowerModel} implementations. */ -public class CpuPowerModels { - private CpuPowerModels() {} +public class PowerModels { + private PowerModels() {} /** - * Construct a constant {@link CpuPowerModel}. + * Construct a constant {@link PowerModel}. * * @param power The power consumption of the host at all times (in W). */ - public static CpuPowerModel constant(double power) { + public static PowerModel constant(double power) { return new ConstantPowerModel(power); } /** - * Construct a square root {@link CpuPowerModel} that is adapted from CloudSim. + * Construct a square root {@link PowerModel} that is adapted from CloudSim. * * @param maxPower The maximum power draw of the host in W. - * @param idlePower The power draw of the host at its lowest utilization level in W. + * @param idlePower The power draw of the host at its lowest resource utilization level in W. */ - public static CpuPowerModel sqrt(double maxPower, double idlePower) { + public static PowerModel sqrt(double maxPower, double idlePower) { return new SqrtPowerModel(maxPower, idlePower); } /** - * Construct a linear {@link CpuPowerModel} that is adapted from CloudSim. + * Construct a linear {@link PowerModel} that is adapted from CloudSim. * * @param maxPower The maximum power draw of the host in W. - * @param idlePower The power draw of the host at its lowest utilization level in W. + * @param idlePower The power draw of the host at its lowest resource utilization level in W. */ - public static CpuPowerModel linear(double maxPower, double idlePower) { + public static PowerModel linear(double maxPower, double idlePower) { return new LinearPowerModel(maxPower, idlePower); } /** - * Construct a square {@link CpuPowerModel} that is adapted from CloudSim. + * Construct a square {@link PowerModel} that is adapted from CloudSim. * * @param maxPower The maximum power draw of the host in W. - * @param idlePower The power draw of the host at its lowest utilization level in W. + * @param idlePower The power draw of the host at its lowest resource utilization level in W. */ - public static CpuPowerModel square(double maxPower, double idlePower) { + public static PowerModel square(double maxPower, double idlePower) { return new SquarePowerModel(maxPower, idlePower); } /** - * Construct a cubic {@link CpuPowerModel} that is adapted from CloudSim. + * Construct a cubic {@link PowerModel} that is adapted from CloudSim. * * @param maxPower The maximum power draw of the host in W. - * @param idlePower The power draw of the host at its lowest utilization level in W. + * @param idlePower The power draw of the host at its lowest resource utilization level in W. */ - public static CpuPowerModel cubic(double maxPower, double idlePower) { + public static PowerModel cubic(double maxPower, double idlePower) { return new CubicPowerModel(maxPower, idlePower); } /** - * Construct a {@link CpuPowerModel} that minimizes the mean squared error (MSE) + * Construct a {@link PowerModel} that minimizes the mean squared error (MSE) * to the actual power measurement by tuning the calibration parameter. * * @param maxPower The maximum power draw of the host in W. - * @param idlePower The power draw of the host at its lowest utilization level in W. + * @param idlePower The power draw of the host at its lowest resource utilization level in W. * @param calibrationFactor The parameter set to minimize the MSE. * @see <a href="https://dl.acm.org/doi/abs/10.1145/1273440.1250665"> * Fan et al., Power provisioning for a warehouse-sized computer, ACM SIGARCH'07</a> */ - public static CpuPowerModel mse(double maxPower, double idlePower, double calibrationFactor) { + public static PowerModel mse(double maxPower, double idlePower, double calibrationFactor) { return new MsePowerModel(maxPower, idlePower, calibrationFactor); } /** - * Construct an asymptotic {@link CpuPowerModel} adapted from GreenCloud. + * Construct an asymptotic {@link PowerModel} adapted from GreenCloud. * * @param maxPower The maximum power draw of the host in W. - * @param idlePower The power draw of the host at its lowest utilization level in W. + * @param idlePower The power draw of the host at its lowest resource utilization level in W. * @param asymUtil A utilization level at which the host attains asymptotic, * i.e., close to linear power consumption versus the offered load. - * For most of the CPUs,a is in [0.2, 0.5]. + * For most of the s,a is in [0.2, 0.5]. * @param dvfs A flag indicates whether DVFS is enabled. */ - public static CpuPowerModel asymptotic(double maxPower, double idlePower, double asymUtil, boolean dvfs) { + public static PowerModel asymptotic(double maxPower, double idlePower, double asymUtil, boolean dvfs) { return new AsymptoticPowerModel(maxPower, idlePower, asymUtil, dvfs); } /** - * Construct a linear interpolation model {@link CpuPowerModel} that is adapted from CloudSim. + * Construct a linear interpolation model {@link PowerModel} that is adapted from CloudSim. * * <p> * The power consumption is linearly interpolated over the given power levels. In case of two values, the first * represents 0% utilization, while the last value represent 100% utilization. * - * @param powerLevels An array of power consumption steps (in W) for a specific CPU utilization. + * @param powerLevels An array of power consumption steps (in W) for a specific utilization. * @see <a href="http://www.spec.org/power_ssj2008/results/res2011q1/">Machines used in the SPEC benchmark</a> */ - public static CpuPowerModel interpolate(double... powerLevels) { + public static PowerModel interpolate(double... powerLevels) { return new InterpolationPowerModel(powerLevels.clone()); } /** - * Decorate an existing {@link CpuPowerModel} to ensure that zero power consumption is reported when there is no + * Decorate an existing {@link PowerModel} to ensure that zero power consumption is reported when there is no * utilization. * - * @param delegate The existing {@link CpuPowerModel} to decorate. + * @param delegate The existing {@link PowerModel} to decorate. */ - public static CpuPowerModel zeroIdle(CpuPowerModel delegate) { + public static PowerModel zeroIdle(PowerModel delegate) { return new ZeroIdlePowerDecorator(delegate); } - private static final class ConstantPowerModel implements CpuPowerModel { + private static final class ConstantPowerModel implements PowerModel { private final double power; ConstantPowerModel(double power) { @@ -154,7 +154,7 @@ public class CpuPowerModels { } } - private abstract static class MaxIdlePowerModel implements CpuPowerModel { + private abstract static class MaxIdlePowerModel implements PowerModel { protected final double maxPower; protected final double idlePower; @@ -344,7 +344,7 @@ public class CpuPowerModels { } } - private static final class InterpolationPowerModel implements CpuPowerModel { + private static final class InterpolationPowerModel implements PowerModel { private final double[] powerLevels; InterpolationPowerModel(double[] powerLevels) { @@ -380,10 +380,10 @@ public class CpuPowerModels { } } - private static final class ZeroIdlePowerDecorator implements CpuPowerModel { - private final CpuPowerModel delegate; + private static final class ZeroIdlePowerDecorator implements PowerModel { + private final PowerModel delegate; - ZeroIdlePowerDecorator(CpuPowerModel delegate) { + ZeroIdlePowerDecorator(PowerModel delegate) { this.delegate = delegate; } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/CpuPowerModelsFactory.kt b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/PowerModelsFactory.kt index 56610136..53107d19 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/cpu/CpuPowerModelsFactory.kt +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/PowerModelsFactory.kt @@ -20,10 +20,10 @@ * SOFTWARE. */ -package org.opendc.simulator.compute.cpu +package org.opendc.simulator.compute.power // TODO: couple this correctly -public enum class CPUPowerModelEnum { +public enum class PowerModelEnum { Constant, Sqrt, Linear, @@ -41,30 +41,28 @@ public fun getPowerModel( calibrationFactor: Double = 1.0, asymUtil: Double = 0.0, dvfs: Boolean = true, -): CpuPowerModel { +): PowerModel { return when (modelType) { - "constant" -> CpuPowerModels.constant(power) - "sqrt" -> CpuPowerModels.sqrt(maxPower, idlePower) - "linear" -> CpuPowerModels.linear(maxPower, idlePower) - "square" -> CpuPowerModels.square(maxPower, idlePower) - "cubic" -> CpuPowerModels.cubic(maxPower, idlePower) - "mse" -> CpuPowerModels.mse(maxPower, idlePower, calibrationFactor) - "asymptotic" -> CpuPowerModels.asymptotic(maxPower, idlePower, asymUtil, dvfs) - + "constant" -> PowerModels.constant(power) + "sqrt" -> PowerModels.sqrt(maxPower, idlePower) + "linear" -> PowerModels.linear(maxPower, idlePower) + "square" -> PowerModels.square(maxPower, idlePower) + "cubic" -> PowerModels.cubic(maxPower, idlePower) + "mse" -> PowerModels.mse(maxPower, idlePower, calibrationFactor) + "asymptotic" -> PowerModels.asymptotic(maxPower, idlePower, asymUtil, dvfs) else -> throw IllegalArgumentException("Unknown power modelType $modelType") } } -public fun getPowerModel(modelType: String): CpuPowerModel { +public fun getPowerModel(modelType: String): PowerModel { return when (modelType) { - "constant" -> CpuPowerModels.constant(200.0) - "sqrt" -> CpuPowerModels.sqrt(350.0, 200.0) - "linear" -> CpuPowerModels.linear(350.0, 200.0) - "square" -> CpuPowerModels.square(350.0, 200.0) - "cubic" -> CpuPowerModels.cubic(350.0, 200.0) - "mse" -> CpuPowerModels.mse(350.0, 200.0, 1.0) - "asymptotic" -> CpuPowerModels.asymptotic(350.0, 200.0, 0.0, true) - + "constant" -> PowerModels.constant(200.0) + "sqrt" -> PowerModels.sqrt(350.0, 200.0) + "linear" -> PowerModels.linear(350.0, 200.0) + "square" -> PowerModels.square(350.0, 200.0) + "cubic" -> PowerModels.cubic(350.0, 200.0) + "mse" -> PowerModels.mse(350.0, 200.0, 1.0) + "asymptotic" -> PowerModels.asymptotic(350.0, 200.0, 0.0, true) else -> throw IllegalArgumentException("Unknown power modelType $modelType") } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/SimPowerSource.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/SimPowerSource.java index 34804230..b00bb468 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/SimPowerSource.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/SimPowerSource.java @@ -24,6 +24,7 @@ package org.opendc.simulator.compute.power; import java.util.List; import java.util.Map; +import org.opendc.common.ResourceType; import org.opendc.simulator.compute.cpu.SimCpu; import org.opendc.simulator.engine.engine.FlowEngine; import org.opendc.simulator.engine.graph.FlowEdge; @@ -217,4 +218,9 @@ public final class SimPowerSource extends FlowNode implements FlowSupplier, Carb return Map.of(FlowEdge.NodeType.SUPPLYING, supplierEdges); } + + @Override + public ResourceType getSupplierResourceType() { + return ResourceType.POWER; + } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/SimPsu.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/SimPsu.java index 87a4e791..1ea7c570 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/SimPsu.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/SimPsu.java @@ -22,29 +22,35 @@ package org.opendc.simulator.compute.power; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; +import org.opendc.common.ResourceType; import org.opendc.simulator.compute.cpu.SimCpu; import org.opendc.simulator.engine.engine.FlowEngine; import org.opendc.simulator.engine.graph.FlowConsumer; import org.opendc.simulator.engine.graph.FlowEdge; import org.opendc.simulator.engine.graph.FlowNode; import org.opendc.simulator.engine.graph.FlowSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A {@link SimPsu} implementation that estimates the power consumption based on CPU usage. */ public final class SimPsu extends FlowNode implements FlowSupplier, FlowConsumer { + private static final Logger LOGGER = LoggerFactory.getLogger(SimPsu.class); private long lastUpdate; - private double powerDemand = 0.0; - private double powerSupplied = 0.0; + private final HashMap<ResourceType, ArrayList<Double>> powerDemandsPerResource = new HashMap<>(); + private final HashMap<ResourceType, ArrayList<Double>> powerSuppliedPerResource = new HashMap<>(); private double totalEnergyUsage = 0.0; - private FlowEdge cpuEdge; + private final HashMap<ResourceType, ArrayList<FlowEdge>> resourceEdges = new HashMap<>(); private FlowEdge powerSupplyEdge; - private double capacity = Long.MAX_VALUE; + private final double capacity = Long.MAX_VALUE; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Basic Getters and Setters @@ -56,7 +62,8 @@ public final class SimPsu extends FlowNode implements FlowSupplier, FlowConsumer * @return <code>true</code> if the InPort is connected to an OutPort, <code>false</code> otherwise. */ public boolean isConnected() { - return cpuEdge != null; + return !this.resourceEdges.isEmpty() + && this.resourceEdges.values().stream().anyMatch(list -> !list.isEmpty()); } /** @@ -65,14 +72,28 @@ public final class SimPsu extends FlowNode implements FlowSupplier, FlowConsumer * This method provides access to the power consumption of the machine before PSU losses are applied. */ public double getPowerDemand() { - return this.powerDemand; + return this.powerDemandsPerResource.values().stream() + .flatMap(List::stream) + .findFirst() + .orElse(0.0); + } + + public double getPowerDemand(ResourceType resourceType) { + return this.powerDemandsPerResource.get(resourceType).getFirst(); } /** * Return the instantaneous power usage of the machine (in W) measured at the InPort of the power supply. */ public double getPowerDraw() { - return this.powerSupplied; + return this.powerSuppliedPerResource.values().stream() + .flatMap(List::stream) + .findFirst() + .orElse(0.0); + } + + public double getPowerDraw(ResourceType resourceType) { + return this.powerSuppliedPerResource.get(resourceType).getFirst(); } /** @@ -105,10 +126,20 @@ public final class SimPsu extends FlowNode implements FlowSupplier, FlowConsumer @Override public long onUpdate(long now) { updateCounters(); - double powerSupply = this.powerDemand; - - if (powerSupply != this.powerSupplied) { - this.pushOutgoingSupply(this.cpuEdge, powerSupply); + for (ResourceType resourceType : this.resourceEdges.keySet()) { + ArrayList<FlowEdge> edges = this.resourceEdges.get(resourceType); + if (edges != null && !edges.isEmpty()) { + double powerSupply = + this.powerDemandsPerResource.get(resourceType).getFirst(); + double powerSupplied = + this.powerSuppliedPerResource.get(resourceType).getFirst(); + + if (powerSupply != powerSupplied) { + for (FlowEdge edge : edges) { + edge.pushSupply(powerSupply); + } + } + } } return Long.MAX_VALUE; @@ -127,8 +158,11 @@ public final class SimPsu extends FlowNode implements FlowSupplier, FlowConsumer long duration = now - lastUpdate; if (duration > 0) { - // Compute the energy usage of the psu - this.totalEnergyUsage += (this.powerSupplied * duration * 0.001); + for (ResourceType resourceType : this.powerSuppliedPerResource.keySet()) { + for (double powerSupplied : this.powerSuppliedPerResource.get(resourceType)) { + this.totalEnergyUsage += (powerSupplied * duration * 0.001); + } + } } } @@ -137,38 +171,61 @@ public final class SimPsu extends FlowNode implements FlowSupplier, FlowConsumer //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @Override - public void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand) { - this.powerDemand = newDemand; + public void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand, ResourceType resourceType) { + this.powerDemandsPerResource.put(resourceType, new ArrayList<>(List.of(newDemand))); powerSupplyEdge.pushDemand(newDemand); } @Override + public void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand) { + double totalDemand = this.powerDemandsPerResource.values().stream() + .flatMap(List::stream) + .reduce(0.0, Double::sum); + this.powerSupplyEdge.pushDemand(totalDemand); + } + + @Override public void pushOutgoingSupply(FlowEdge consumerEdge, double newSupply) { - this.powerSupplied = newSupply; - cpuEdge.pushSupply(newSupply); + this.pushOutgoingSupply(consumerEdge, newSupply, consumerEdge.getConsumerResourceType()); + } + + @Override + public void pushOutgoingSupply(FlowEdge consumerEdge, double newSupply, ResourceType resourceType) { + this.powerSuppliedPerResource.put(resourceType, new ArrayList<>(List.of(newSupply))); + consumerEdge.pushSupply(newSupply, false, resourceType); } @Override - public void handleIncomingDemand(FlowEdge consumerEdge, double newPowerDemand) { + public void handleIncomingDemand(FlowEdge consumerEdge, double newDemand) { + handleIncomingDemand(consumerEdge, newDemand, consumerEdge.getConsumerResourceType()); + } + @Override + public void handleIncomingDemand(FlowEdge consumerEdge, double newPowerDemand, ResourceType resourceType) { updateCounters(); - this.powerDemand = newPowerDemand; + this.powerDemandsPerResource.put(resourceType, new ArrayList<>(List.of(newPowerDemand))); pushOutgoingDemand(this.powerSupplyEdge, newPowerDemand); } @Override - public void handleIncomingSupply(FlowEdge supplierEdge, double newPowerSupply) { - + public void handleIncomingSupply(FlowEdge supplierEdge, double newSupply) { updateCounters(); - this.powerSupplied = newPowerSupply; - - pushOutgoingSupply(this.cpuEdge, newPowerSupply); + for (ResourceType resourceType : this.resourceEdges.keySet()) { + for (FlowEdge edge : this.resourceEdges.get(resourceType)) { + double outgoingSupply = + Math.min(this.powerDemandsPerResource.get(resourceType).getFirst(), newSupply); + pushOutgoingSupply(edge, outgoingSupply, resourceType); + } + } } @Override public void addConsumerEdge(FlowEdge consumerEdge) { - this.cpuEdge = consumerEdge; + ResourceType consumerResourceType = consumerEdge.getConsumerResourceType(); + this.resourceEdges.put(consumerResourceType, new ArrayList<>(List.of(consumerEdge))); + this.powerDemandsPerResource.put(consumerResourceType, new ArrayList<>(List.of(0.0))); + this.powerSuppliedPerResource.put(consumerResourceType, new ArrayList<>(List.of(0.0))); } @Override @@ -178,7 +235,12 @@ public final class SimPsu extends FlowNode implements FlowSupplier, FlowConsumer @Override public void removeConsumerEdge(FlowEdge consumerEdge) { - this.cpuEdge = null; + ResourceType resourceType = consumerEdge.getConsumerResourceType(); + if (this.resourceEdges.containsKey(resourceType)) { + this.resourceEdges.remove(resourceType); + this.powerDemandsPerResource.remove(resourceType); + this.powerSuppliedPerResource.remove(resourceType); + } } @Override @@ -188,11 +250,27 @@ public final class SimPsu extends FlowNode implements FlowSupplier, FlowConsumer @Override public Map<FlowEdge.NodeType, List<FlowEdge>> getConnectedEdges() { - List<FlowEdge> supplyingEdges = cpuEdge != null ? List.of(cpuEdge) : List.of(); + List<FlowEdge> supplyingEdges = new ArrayList<>(); + for (ResourceType resourceType : this.resourceEdges.keySet()) { + List<FlowEdge> edges = this.resourceEdges.get(resourceType); + if (edges != null && !edges.isEmpty()) { + supplyingEdges.addAll(edges); + } + } List<FlowEdge> consumingEdges = powerSupplyEdge != null ? List.of(powerSupplyEdge) : List.of(); return Map.of( FlowEdge.NodeType.SUPPLYING, supplyingEdges, FlowEdge.NodeType.CONSUMING, consumingEdges); } + + @Override + public ResourceType getSupplierResourceType() { + return ResourceType.POWER; + } + + @Override + public ResourceType getConsumerResourceType() { + return ResourceType.POWER; + } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/batteries/BatteryAggregator.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/batteries/BatteryAggregator.java index 9a05f2b3..13674369 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/batteries/BatteryAggregator.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/batteries/BatteryAggregator.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; +import org.opendc.common.ResourceType; import org.opendc.simulator.engine.engine.FlowEngine; import org.opendc.simulator.engine.graph.FlowConsumer; import org.opendc.simulator.engine.graph.FlowDistributor; @@ -190,4 +191,14 @@ public class BatteryAggregator extends FlowNode implements FlowConsumer, FlowSup FlowEdge.NodeType.CONSUMING, consumingEdges, FlowEdge.NodeType.SUPPLYING, supplyingEdges); } + + @Override + public ResourceType getSupplierResourceType() { + return ResourceType.POWER; + } + + @Override + public ResourceType getConsumerResourceType() { + return ResourceType.POWER; + } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/batteries/SimBattery.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/batteries/SimBattery.java index d749af72..d6377ef6 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/batteries/SimBattery.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/power/batteries/SimBattery.java @@ -24,6 +24,7 @@ package org.opendc.simulator.compute.power.batteries; import java.util.List; import java.util.Map; +import org.opendc.common.ResourceType; import org.opendc.simulator.compute.power.batteries.policy.BatteryPolicy; import org.opendc.simulator.engine.engine.FlowEngine; import org.opendc.simulator.engine.graph.FlowConsumer; @@ -331,4 +332,14 @@ public class SimBattery extends FlowNode implements FlowConsumer, FlowSupplier { FlowEdge.NodeType.CONSUMING, consumingEdges, FlowEdge.NodeType.SUPPLYING, supplyingEdges); } + + @Override + public ResourceType getSupplierResourceType() { + return ResourceType.POWER; + } + + @Override + public ResourceType getConsumerResourceType() { + return ResourceType.POWER; + } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/ChainWorkload.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/ChainWorkload.java index 3cdde40a..56e6093b 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/ChainWorkload.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/ChainWorkload.java @@ -23,6 +23,7 @@ package org.opendc.simulator.compute.workload; import java.util.ArrayList; +import java.util.List; import java.util.function.Consumer; import org.opendc.simulator.compute.machine.SimMachine; import org.opendc.simulator.engine.graph.FlowSupplier; @@ -47,7 +48,7 @@ public record ChainWorkload( } @Override - public SimWorkload startWorkload(FlowSupplier supplier, SimMachine machine, Consumer<Exception> completion) { + public SimWorkload startWorkload(List<FlowSupplier> supplier, SimMachine machine, Consumer<Exception> completion) { return new VirtualMachine(supplier, this, machine, completion); } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/VirtualMachine.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/VirtualMachine.java index 7632b503..622d2b89 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/VirtualMachine.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/VirtualMachine.java @@ -22,32 +22,41 @@ package org.opendc.simulator.compute.workload; +import java.util.ArrayList; +import java.util.Hashtable; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.function.Consumer; +import org.opendc.common.ResourceType; +import org.opendc.simulator.compute.ComputeResource; import org.opendc.simulator.compute.machine.PerformanceCounters; import org.opendc.simulator.compute.machine.SimMachine; import org.opendc.simulator.engine.graph.FlowEdge; import org.opendc.simulator.engine.graph.FlowNode; import org.opendc.simulator.engine.graph.FlowSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A {@link VirtualMachine} that composes multiple {@link SimWorkload}s. */ public final class VirtualMachine extends SimWorkload implements FlowSupplier { + private static final Logger LOGGER = LoggerFactory.getLogger(VirtualMachine.class); private final LinkedList<Workload> workloads; private int workloadIndex; private SimWorkload activeWorkload; - private double cpuDemand = 0.0f; - private double cpuSupply = 0.0f; - private double d = 0.0f; private FlowEdge workloadEdge; - private FlowEdge machineEdge; - private double capacity = 0; + private final Hashtable<ResourceType, Double> resourceDemands = new Hashtable<>(); + private final Hashtable<ResourceType, Double> resourceSupplies = new Hashtable<>(); + private final Hashtable<ResourceType, Double> resourceCapacities = new Hashtable<>(); + private final Hashtable<ResourceType, Double> resourceTimeScalingFactor = new Hashtable<>(); // formerly known as d + private final Hashtable<ResourceType, FlowEdge> distributorEdges = new Hashtable<>(); + private final Hashtable<ResourceType, List<PerformanceCounters>> resourcePerformanceCounters = new Hashtable<>(); private final long checkpointInterval; private final long checkpointDuration; @@ -57,16 +66,25 @@ public final class VirtualMachine extends SimWorkload implements FlowSupplier { private final ChainWorkload snapshot; private long lastUpdate; - private final PerformanceCounters performanceCounters = new PerformanceCounters(); private Consumer<Exception> completion; + private final List<ResourceType> availableResources = new ArrayList<>(); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Basic Getters and Setters //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @Override public double getCapacity() { - return this.capacity; + throw new UnsupportedOperationException("getCapacity() is not supported for VirtualMachine"); + } + + @Override + public double getCapacity(ResourceType resourceType) { + if (resourceType == ResourceType.AUXILIARY) { + return 0.0; + } + return this.resourceCapacities.get(resourceType); } @Override @@ -89,8 +107,22 @@ public final class VirtualMachine extends SimWorkload implements FlowSupplier { return checkpointIntervalScaling; } - public PerformanceCounters getPerformanceCounters() { - return performanceCounters; + public PerformanceCounters getCpuPerformanceCounters() { + return this.resourcePerformanceCounters.get(ResourceType.CPU).getFirst(); + } + + public List<PerformanceCounters> getGpuPerformanceCounters() { + return this.resourcePerformanceCounters.get(ResourceType.GPU) != null + ? this.resourcePerformanceCounters.get(ResourceType.GPU) + : new ArrayList<>(); + } + + public PerformanceCounters getGpuPerformanceCounters(int gpuId) { + List<PerformanceCounters> gpuPerformanceCounters = this.resourcePerformanceCounters.get(ResourceType.GPU); + if (gpuId < 0 || gpuId >= gpuPerformanceCounters.size()) { + throw new IndexOutOfBoundsException("No such GPU id: " + gpuId); + } + return gpuPerformanceCounters.get(gpuId); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -116,16 +148,54 @@ public final class VirtualMachine extends SimWorkload implements FlowSupplier { } this.workloadIndex = -1; - + this.availableResources.add(supplier.getSupplierResourceType()); this.onStart(); } - VirtualMachine(FlowSupplier supplier, ChainWorkload workload, SimMachine machine, Consumer<Exception> completion) { - this(supplier, workload); + VirtualMachine( + List<FlowSupplier> suppliers, ChainWorkload workload, SimMachine machine, Consumer<Exception> completion) { + super(((FlowNode) suppliers.getFirst()).getEngine()); + + this.snapshot = workload; + + for (FlowSupplier supplier : suppliers) { + new FlowEdge(this, supplier); + ResourceType resourceType = supplier.getSupplierResourceType(); + + this.availableResources.add(resourceType); + + ArrayList<ComputeResource> resources = machine.getResources(resourceType); + if (resources.isEmpty()) { + throw new IllegalArgumentException("No resources of type " + resourceType + " found in machine "); + } + + this.resourceCapacities.put(resourceType, resources.getFirst().getCapacity()); + + ArrayList<PerformanceCounters> performanceCounters = new ArrayList<>(); + + for (ComputeResource resource : resources) { + performanceCounters.add(new PerformanceCounters()); + this.resourceTimeScalingFactor.put(resourceType, 1.0 / resource.getCapacity()); + } + this.resourcePerformanceCounters.put(resourceType, performanceCounters); + this.resourceDemands.put(resourceType, 0.0); + this.resourceSupplies.put(resourceType, 0.0); + } + + this.workloads = new LinkedList<>(workload.workloads()); + this.checkpointInterval = workload.checkpointInterval(); + this.checkpointDuration = workload.checkpointDuration(); + this.checkpointIntervalScaling = workload.checkpointIntervalScaling(); + + this.lastUpdate = clock.millis(); + + if (checkpointInterval > 0) { + this.createCheckpointModel(); + } - this.capacity = machine.getCpu().getFrequency(); - this.d = 1 / machine.getCpu().getFrequency(); + this.workloadIndex = -1; this.completion = completion; + this.onStart(); } public Workload getNextWorkload() { @@ -152,18 +222,25 @@ public final class VirtualMachine extends SimWorkload implements FlowSupplier { this.lastUpdate = now; long delta = now - lastUpdate; - double cpuCapacity = 0.0f; - if (delta > 0) { - final double factor = this.d * delta; - - this.performanceCounters.addCpuActiveTime(Math.round(this.cpuSupply * factor)); - this.performanceCounters.setCpuIdleTime(Math.round((cpuCapacity - this.cpuSupply) * factor)); - this.performanceCounters.addCpuStealTime(Math.round((this.cpuDemand - this.cpuSupply) * factor)); + for (ResourceType resourceType : this.availableResources) { + int i = 0; + final double factor = this.resourceTimeScalingFactor.get(resourceType) * delta; + for (PerformanceCounters performanceCounter : this.resourcePerformanceCounters.get(resourceType)) { + if (delta > 0) { + performanceCounter.addActiveTime(Math.round(this.resourceSupplies.get(resourceType) * factor)); + performanceCounter.setIdleTime(Math.round( + (this.resourceCapacities.get(resourceType) - this.resourceSupplies.get(resourceType)) + * factor)); + performanceCounter.addStealTime(Math.round( + (this.resourceDemands.get(resourceType) - this.resourceSupplies.get(resourceType)) + * factor)); + } + performanceCounter.setDemand(this.resourceDemands.get(resourceType)); + performanceCounter.setSupply(this.resourceSupplies.get(resourceType)); + performanceCounter.setCapacity(this.resourceCapacities.get(resourceType)); + i++; + } } - - this.performanceCounters.setCpuDemand(this.cpuDemand); - this.performanceCounters.setCpuSupply(this.cpuSupply); - this.performanceCounters.setCpuCapacity(cpuCapacity); } @Override @@ -233,40 +310,66 @@ public final class VirtualMachine extends SimWorkload implements FlowSupplier { } /** - * Add Connection to the cpuMux + * Add Connection to the resource flow distributor * - * @param supplierEdge The edge to the cpuMux + * @param supplierEdge The edge to the resource flow distributor */ @Override public void addSupplierEdge(FlowEdge supplierEdge) { - this.machineEdge = supplierEdge; - this.capacity = supplierEdge.getCapacity(); + ResourceType resourceType = supplierEdge.getSupplierResourceType(); + this.resourceCapacities.put(resourceType, supplierEdge.getCapacity()); + this.distributorEdges.put(resourceType, supplierEdge); } /** - * Push demand to the cpuMux + * Push demand to the resource flow distributor * - * @param supplierEdge The edge to the cpuMux - * @param newDemand new demand to sent to the cpu + * @param supplierEdge The edge to the resource flow distributor + * @param newDemand new demand to sent to the resource flow distributor */ @Override public void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand) { + // FIXME: Needs to be assigned to specific resource if multiple exist -> add resource Id as parameter + this.pushOutgoingDemand(supplierEdge, newDemand, supplierEdge.getSupplierResourceType()); + } - this.cpuDemand = newDemand; - this.machineEdge.pushDemand(newDemand); + /** + * Push demand to the resource flow distributor + * + * @param supplierEdge The edge to the resource flow distributor + * @param newDemand new demand to sent to the resource flow distributor + */ + @Override + public void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand, ResourceType resourceType) { + // FIXME: Needs to be assigned to specific resource if multiple exist -> add resource Id as parameter + this.resourceDemands.put(resourceType, newDemand); + this.distributorEdges.get(resourceType).pushDemand(newDemand, false, resourceType); } /** * Push supply to the workload * - * @param consumerEdge The edge to the cpuMux + * @param consumerEdge The edge to the resource flow distributor * @param newSupply new supply to sent to the workload */ @Override public void pushOutgoingSupply(FlowEdge consumerEdge, double newSupply) { + this.resourceSupplies.put(consumerEdge.getConsumerResourceType(), newSupply); + this.distributorEdges + .get(consumerEdge.getConsumerResourceType()) + .pushSupply(newSupply, false, consumerEdge.getConsumerResourceType()); + } - this.cpuSupply = newSupply; - this.workloadEdge.pushSupply(newSupply); + /** + * Push supply to the workload + * + * @param consumerEdge The edge to the resource flow distributor + * @param newSupply new supply to sent to the workload + */ + @Override + public void pushOutgoingSupply(FlowEdge consumerEdge, double newSupply, ResourceType resourceType) { + this.resourceSupplies.put(resourceType, newSupply); + this.workloadEdge.pushSupply(newSupply, false, resourceType); } /** @@ -278,21 +381,42 @@ public final class VirtualMachine extends SimWorkload implements FlowSupplier { @Override public void handleIncomingDemand(FlowEdge consumerEdge, double newDemand) { updateCounters(this.clock.millis()); + this.pushOutgoingDemand(this.distributorEdges.get(consumerEdge.getConsumerResourceType()), newDemand); + } - this.pushOutgoingDemand(this.machineEdge, newDemand); + @Override + public void handleIncomingDemand(FlowEdge consumerEdge, double newDemand, ResourceType resourceType) { + updateCounters(this.clock.millis()); + this.pushOutgoingDemand(this.distributorEdges.get(resourceType), newDemand, resourceType); } /** - * Handle new supply coming from the cpuMux + * Handle new supply coming from the resource flow distributor * - * @param supplierEdge The edge to the cpuMux + * @param supplierEdge The edge to the resource flow distributor * @param newSupply The new supply that is sent to the workload */ @Override public void handleIncomingSupply(FlowEdge supplierEdge, double newSupply) { updateCounters(this.clock.millis()); - this.pushOutgoingSupply(this.machineEdge, newSupply); + this.pushOutgoingSupply( + this.distributorEdges.get(supplierEdge.getSupplierResourceType()), + newSupply, + supplierEdge.getSupplierResourceType()); + } + + /** + * Handle new supply coming from the resource flow distributor + * + * @param supplierEdge The edge to the resource flow distributor + * @param newSupply The new supply that is sent to the workload + */ + @Override + public void handleIncomingSupply(FlowEdge supplierEdge, double newSupply, ResourceType resourceType) { + updateCounters(this.clock.millis()); + + this.pushOutgoingSupply(this.distributorEdges.get(resourceType), newSupply, resourceType); } /** @@ -322,14 +446,14 @@ public final class VirtualMachine extends SimWorkload implements FlowSupplier { } /** - * Handle the removal of the connection to the cpuMux + * Handle the removal of the connection to the resource flow distributor * When this happens, close the SimChainWorkload * - * @param supplierEdge The edge to the cpuMux + * @param supplierEdge The edge to the resource flow distributor */ @Override public void removeSupplierEdge(FlowEdge supplierEdge) { - if (this.machineEdge == null) { + if (!this.distributorEdges.contains(supplierEdge.getSupplierResourceType())) { return; } @@ -338,11 +462,16 @@ public final class VirtualMachine extends SimWorkload implements FlowSupplier { @Override public Map<FlowEdge.NodeType, List<FlowEdge>> getConnectedEdges() { - List<FlowEdge> consumerEdges = (this.machineEdge != null) ? List.of(this.machineEdge) : List.of(); + List<FlowEdge> consumerEdges = + this.distributorEdges.values().stream().filter(Objects::nonNull).toList(); List<FlowEdge> supplierEdges = (this.workloadEdge != null) ? List.of(this.workloadEdge) : List.of(); return Map.of( FlowEdge.NodeType.CONSUMING, consumerEdges, FlowEdge.NodeType.SUPPLYING, supplierEdges); } + + public List<ResourceType> getAvailableResources() { + return this.availableResources; + } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/Workload.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/Workload.java index 3ad7597d..5edacb3b 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/Workload.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/Workload.java @@ -22,6 +22,7 @@ package org.opendc.simulator.compute.workload; +import java.util.List; import java.util.function.Consumer; import org.opendc.simulator.compute.machine.SimMachine; import org.opendc.simulator.engine.graph.FlowSupplier; @@ -36,5 +37,5 @@ public interface Workload { SimWorkload startWorkload(FlowSupplier supplier); - SimWorkload startWorkload(FlowSupplier supplier, SimMachine machine, Consumer<Exception> completion); + SimWorkload startWorkload(List<FlowSupplier> supplier, SimMachine machine, Consumer<Exception> completion); } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/SimTraceWorkload.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/SimTraceWorkload.java index d5635439..8b3a7188 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/SimTraceWorkload.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/SimTraceWorkload.java @@ -22,36 +22,50 @@ package org.opendc.simulator.compute.workload.trace; +import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; +import org.opendc.common.ResourceType; import org.opendc.simulator.compute.workload.SimWorkload; +import org.opendc.simulator.compute.workload.VirtualMachine; import org.opendc.simulator.compute.workload.trace.scaling.ScalingPolicy; import org.opendc.simulator.engine.graph.FlowConsumer; import org.opendc.simulator.engine.graph.FlowEdge; import org.opendc.simulator.engine.graph.FlowNode; import org.opendc.simulator.engine.graph.FlowSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class SimTraceWorkload extends SimWorkload implements FlowConsumer { + private static final Logger LOGGER = LoggerFactory.getLogger(SimTraceWorkload.class); private LinkedList<TraceFragment> remainingFragments; private int fragmentIndex; private TraceFragment currentFragment; private long startOfFragment; - private FlowEdge machineEdge; - - private double cpuFreqDemand = 0.0; // The Cpu demanded by fragment - private double cpuFreqSupplied = 0.0; // The Cpu speed supplied - private double newCpuFreqSupplied = 0.0; // The Cpu speed supplied - private double remainingWork = 0.0; // The duration of the fragment at the demanded speed + private final Map<ResourceType, FlowEdge> machineResourceEdges = new HashMap<>(); + + // TODO: Currently GPU memory is not considered and can not be used + private final ArrayList<ResourceType> usedResourceTypes = new ArrayList<>(); + private final Map<ResourceType, Double> resourcesSupplied = new HashMap<>(); // the currently supplied resources + private final Map<ResourceType, Double> newResourcesSupply = + new HashMap<>(); // The supplied resources with next update + private final Map<ResourceType, Double> resourcesDemand = new HashMap<>(); // The demands per resource type + private final Map<ResourceType, Double> remainingWork = + new HashMap<>(); // The duration of the fragment at the demanded speeds + private double totalRemainingWork = + 0.0; // The total remaining work of the fragment across all resources, used to determine the end of the + // fragment + private final Map<ResourceType, Boolean> workloadFinished = + new HashMap<>(); // The workload finished for each resource type private final long checkpointDuration; - private final TraceWorkload snapshot; private final ScalingPolicy scalingPolicy; - private final String taskName; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -98,6 +112,44 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { this.startOfFragment = this.clock.millis(); new FlowEdge(this, supplier); + if (supplier instanceof VirtualMachine) { + // instead iterate over the resources in the fragment as required resources not provided by the VM + for (ResourceType resourceType : workload.getResourceTypes()) { + this.usedResourceTypes.add(resourceType); + this.resourcesSupplied.put(resourceType, 0.0); + this.newResourcesSupply.put(resourceType, 0.0); + this.resourcesDemand.put(resourceType, 0.0); + this.remainingWork.put(resourceType, 0.0); + this.workloadFinished.put(resourceType, false); + } + } + } + + // Needed if workload not started by VM + public SimTraceWorkload(List<FlowSupplier> resourceSuppliers, TraceWorkload workload) { + // same engine for all suppliers + super(((FlowNode) resourceSuppliers.getFirst()).getEngine()); + + this.snapshot = workload; + this.checkpointDuration = workload.checkpointDuration(); + this.scalingPolicy = workload.getScalingPolicy(); + this.remainingFragments = new LinkedList<>(workload.getFragments()); + this.fragmentIndex = 0; + this.taskName = workload.getTaskName(); + + this.startOfFragment = this.clock.millis(); + + for (FlowSupplier supplier : resourceSuppliers) { + if (supplier.getSupplierResourceType() != ResourceType.AUXILIARY) { + new FlowEdge(this, supplier); + this.usedResourceTypes.add(supplier.getSupplierResourceType()); + this.resourcesSupplied.put(supplier.getSupplierResourceType(), 0.0); + this.newResourcesSupply.put(supplier.getSupplierResourceType(), 0.0); + this.resourcesDemand.put(supplier.getSupplierResourceType(), 0.0); + this.remainingWork.put(supplier.getSupplierResourceType(), 0.0); + this.workloadFinished.put(supplier.getSupplierResourceType(), false); + } + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -109,30 +161,64 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { long passedTime = getPassedTime(now); this.startOfFragment = now; - // The amount of work done since last update - double finishedWork = this.scalingPolicy.getFinishedWork(this.cpuFreqDemand, this.cpuFreqSupplied, passedTime); - - this.remainingWork -= finishedWork; + for (ResourceType resourceType : this.usedResourceTypes) { + // The amount of work done since last update + double finishedWork = this.scalingPolicy.getFinishedWork( + this.resourcesDemand.get(resourceType), this.resourcesSupplied.get(resourceType), passedTime); + this.remainingWork.put(resourceType, this.remainingWork.get(resourceType) - finishedWork); + this.totalRemainingWork -= finishedWork; + if (this.remainingWork.get(resourceType) <= 0) { + this.workloadFinished.put(resourceType, true); + } + } - // If this.remainingWork <= 0, the fragment has been completed - if (this.remainingWork <= 0) { + // If this.totalRemainingWork <= 0, the fragment has been completed across all resources + if (this.totalRemainingWork <= 0 && !this.workloadFinished.containsValue(false)) { this.startNextFragment(); this.invalidate(); return Long.MAX_VALUE; } - this.cpuFreqSupplied = this.newCpuFreqSupplied; + for (ResourceType resourceType : this.usedResourceTypes) { + if (this.machineResourceEdges.get(resourceType) != null) { + this.pushOutgoingDemand( + this.machineResourceEdges.get(resourceType), + this.resourcesDemand.get(resourceType), + resourceType); + } + } - // The amount of time required to finish the fragment at this speed - long remainingDuration = this.scalingPolicy.getRemainingDuration( - this.cpuFreqDemand, this.newCpuFreqSupplied, this.remainingWork); + // Update the supplied resources + for (ResourceType resourceType : this.usedResourceTypes) { + this.resourcesSupplied.put(resourceType, this.newResourcesSupply.get(resourceType)); + } - if (remainingDuration == 0.0) { - this.remainingWork = 0.0; + long timeUntilNextUpdate = Long.MIN_VALUE; + + for (ResourceType resourceType : this.usedResourceTypes) { + // The amount of time required to finish the fragment at this speed + long remainingDuration = this.scalingPolicy.getRemainingDuration( + this.resourcesDemand.get(resourceType), + this.resourcesSupplied.get(resourceType), + this.remainingWork.get(resourceType)); + + if (remainingDuration == 0.0) { + // if resource not initialized, then nothing happens + this.totalRemainingWork -= this.remainingWork.get(resourceType); + this.remainingWork.put(resourceType, 0.0); + this.workloadFinished.put(resourceType, true); + } + + // The next update should happen when the fastest resource is done, so that it is no longer tracked when + // unused + if (remainingDuration > 0 + && (timeUntilNextUpdate == Long.MIN_VALUE || remainingDuration < timeUntilNextUpdate)) { + timeUntilNextUpdate = remainingDuration; + } } - return now + remainingDuration; + return timeUntilNextUpdate == Long.MIN_VALUE ? now : now + timeUntilNextUpdate; } public TraceFragment getNextFragment() { @@ -152,14 +238,27 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { this.stopWorkload(); return; } - double demand = nextFragment.cpuUsage(); - this.remainingWork = this.scalingPolicy.getRemainingWork(demand, nextFragment.duration()); - this.pushOutgoingDemand(this.machineEdge, demand); + + // Reset the remaining work for all resources + this.totalRemainingWork = 0.0; + + // TODO: only acceleration is considered, not memory + for (ResourceType resourceType : usedResourceTypes) { + double demand = nextFragment.getResourceUsage(resourceType); + + this.remainingWork.put(resourceType, this.scalingPolicy.getRemainingWork(demand, nextFragment.duration())); + this.totalRemainingWork += this.remainingWork.get(resourceType); + this.workloadFinished.put(resourceType, false); + + if (this.machineResourceEdges.get(resourceType) != null) { + this.pushOutgoingDemand(this.machineResourceEdges.get(resourceType), demand, resourceType); + } + } } @Override public void stopWorkload() { - if (this.machineEdge == null) { + if (areAllEdgesNull()) { return; } @@ -167,7 +266,10 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { // Currently stopWorkload is called twice this.closeNode(); - this.machineEdge = null; + for (ResourceType resourceType : this.usedResourceTypes) { + this.machineResourceEdges.put(resourceType, null); + this.workloadFinished.put(resourceType, true); + } this.remainingFragments = null; this.currentFragment = null; } @@ -195,22 +297,38 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { long passedTime = getPassedTime(now); // The amount of work done since last update - double finishedWork = this.scalingPolicy.getFinishedWork(this.cpuFreqDemand, this.cpuFreqSupplied, passedTime); + for (ResourceType resourceType : this.usedResourceTypes) { + double finishedWork = this.scalingPolicy.getFinishedWork( + this.resourcesDemand.get(resourceType), this.resourcesSupplied.get(resourceType), passedTime); + this.remainingWork.put(resourceType, this.remainingWork.get(resourceType) - finishedWork); + this.totalRemainingWork -= finishedWork; + } - this.remainingWork -= finishedWork; + long remainingDuration = 0; + for (ResourceType resourceType : this.usedResourceTypes) { - // The amount of time required to finish the fragment at this speed - long remainingTime = - this.scalingPolicy.getRemainingDuration(this.cpuFreqDemand, this.cpuFreqDemand, this.remainingWork); + // The amount of time required to finish the fragment at this speed + remainingDuration = Math.max( + remainingDuration, + this.scalingPolicy.getRemainingDuration( + this.resourcesDemand.get(resourceType), + this.resourcesSupplied.get(resourceType), + this.remainingWork.get(resourceType))); + } // If this is the end of the Task, don't make a snapshot - if (this.currentFragment == null || (remainingTime <= 0 && remainingFragments.isEmpty())) { + if (this.currentFragment == null || (remainingDuration <= 0 && remainingFragments.isEmpty())) { return; } // Create a new fragment based on the current fragment and remaining duration - TraceFragment newFragment = - new TraceFragment(remainingTime, currentFragment.cpuUsage(), currentFragment.coreCount()); + TraceFragment newFragment = new TraceFragment( + remainingDuration, + currentFragment.cpuUsage(), + currentFragment.cpuCoreCount(), + currentFragment.gpuUsage(), + currentFragment.gpuCoreCount(), + currentFragment.gpuMemoryUsage()); // Alter the snapshot by removing finished fragments this.snapshot.removeFragments(this.fragmentIndex); @@ -220,7 +338,12 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { // Create and add a fragment for processing the snapshot process TraceFragment snapshotFragment = new TraceFragment( - this.checkpointDuration, this.snapshot.getMaxCpuDemand(), this.snapshot.getMaxCoreCount()); + this.checkpointDuration, + this.snapshot.getMaxCpuDemand(), + this.snapshot.getMaxCoreCount(), + this.snapshot.getMaxGpuDemand(), + this.snapshot.getMaxGpuCoreCount(), + this.snapshot.getMaxGpuMemoryDemand()); this.remainingFragments.addFirst(snapshotFragment); this.fragmentIndex = -1; @@ -243,12 +366,29 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { */ @Override public void handleIncomingSupply(FlowEdge supplierEdge, double newSupply) { - if (newSupply == this.cpuFreqSupplied) { + ResourceType suppliedResourceType = ResourceType.CPU; + if (this.resourcesSupplied.get(suppliedResourceType) == newSupply) { return; } + this.resourcesSupplied.put(suppliedResourceType, this.newResourcesSupply.get(suppliedResourceType)); + this.newResourcesSupply.put(suppliedResourceType, newSupply); - this.cpuFreqSupplied = this.newCpuFreqSupplied; - this.newCpuFreqSupplied = newSupply; + this.invalidate(); + } + + /** + * Handle updates in supply from the Virtual Machine + * + * @param supplierEdge edge to the VM on which this is running + * @param newSupply The new demand that needs to be sent to the VM + */ + @Override + public void handleIncomingSupply(FlowEdge supplierEdge, double newSupply, ResourceType resourceType) { + if (this.resourcesSupplied.get(resourceType) == newSupply) { + return; + } + this.resourcesSupplied.put(resourceType, this.newResourcesSupply.get(resourceType)); + this.newResourcesSupply.put(resourceType, newSupply); this.invalidate(); } @@ -261,12 +401,28 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { */ @Override public void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand) { - if (newDemand == this.cpuFreqDemand) { + ResourceType demandedResourceType = ResourceType.CPU; + if (this.resourcesDemand.get(demandedResourceType) == newDemand) { return; } - this.cpuFreqDemand = newDemand; - this.machineEdge.pushDemand(newDemand); + this.resourcesDemand.put(demandedResourceType, newDemand); + this.machineResourceEdges.get(demandedResourceType).pushDemand(newDemand); + } + /** + * Push a new demand to the Virtual Machine + * + * @param supplierEdge edge to the VM on which this is running + * @param newDemand The new demand that needs to be sent to the VM + */ + @Override + public void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand, ResourceType resourceType) { + if (this.resourcesDemand.get(resourceType) == newDemand) { + return; + } + + this.resourcesDemand.put(resourceType, newDemand); + this.machineResourceEdges.get(resourceType).pushDemand(newDemand, false, resourceType); } /** @@ -276,7 +432,24 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { */ @Override public void addSupplierEdge(FlowEdge supplierEdge) { - this.machineEdge = supplierEdge; + ResourceType incommingResourceType = supplierEdge.getResourceType(); + + if (machineResourceEdges.containsValue(supplierEdge)) { + return; // Skip if this exact edge is already registered + } + + this.machineResourceEdges.put(incommingResourceType, supplierEdge); + if (supplierEdge.getSupplier() instanceof VirtualMachine vm) { + for (ResourceType resourceType : vm.getAvailableResources()) { + if (resourceType == incommingResourceType || resourceType == ResourceType.AUXILIARY) { + continue; + } + + if (!this.machineResourceEdges.containsKey(resourceType)) { + new FlowEdge(this, vm, resourceType); + } + } + } } /** @@ -287,7 +460,7 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { */ @Override public void removeSupplierEdge(FlowEdge supplierEdge) { - if (this.machineEdge == null) { + if (areAllEdgesNull()) { return; } @@ -296,6 +469,24 @@ public class SimTraceWorkload extends SimWorkload implements FlowConsumer { @Override public Map<FlowEdge.NodeType, List<FlowEdge>> getConnectedEdges() { - return Map.of(FlowEdge.NodeType.CONSUMING, (this.machineEdge != null) ? List.of(this.machineEdge) : List.of()); + Map<FlowEdge.NodeType, List<FlowEdge>> connectedEdges = new HashMap<>(); + for (ResourceType resourceType : ResourceType.values()) { + if (this.machineResourceEdges.get(resourceType) != null) { + connectedEdges.put(FlowEdge.NodeType.CONSUMING, List.of(this.machineResourceEdges.get(resourceType))); + } + } + return connectedEdges; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Util Methods + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + private boolean areAllEdgesNull() { + for (FlowEdge edge : this.machineResourceEdges.values()) { + if (edge != null) { + return false; + } + } + return true; } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/TraceFragment.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/TraceFragment.java index a09206a1..bc3685a3 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/TraceFragment.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/TraceFragment.java @@ -22,9 +22,49 @@ package org.opendc.simulator.compute.workload.trace; -public record TraceFragment(long duration, double cpuUsage, int coreCount) { +import org.opendc.common.ResourceType; - public TraceFragment(long start, long duration, double cpuUsage, int coreCount) { - this(duration, cpuUsage, coreCount); +public record TraceFragment( + long duration, double cpuUsage, int cpuCoreCount, double gpuUsage, int gpuCoreCount, Long gpuMemoryUsage) { + + public TraceFragment(long start, long duration, double cpuUsage, int cpuCoreCount) { + this(duration, cpuUsage, cpuCoreCount, 0.0, 0, 0L); + } + + public TraceFragment(long duration, double cpuUsage, int cpuCoreCount) { + this(duration, cpuUsage, cpuCoreCount, 0.0, 0, 0L); + } + + public TraceFragment(long duration, double cpuUsage, int cpuCoreCount, double gpuUsage, int gpuCoreCount) { + this(duration, cpuUsage, cpuCoreCount, gpuUsage, gpuCoreCount, 0L); + } + + /** + * Returns the resource usage for the specified resource type. + * + * @param resourceType the type of resource + * @return the usage value for the specified resource type + */ + public double getResourceUsage(ResourceType resourceType) throws IllegalArgumentException { + return switch (resourceType) { + case CPU -> cpuUsage; + case GPU -> gpuUsage; + // case GPU_MEMORY -> gpuMemoryUsage; + default -> throw new IllegalArgumentException("Invalid resource type: " + resourceType); + }; + } + + /** + * Returns the core count for the specified resource type. + * + * @param resourceType the type of resource + * @return the core count for the specified resource type + */ + public int getCoreCount(ResourceType resourceType) throws IllegalArgumentException { + return switch (resourceType) { + case CPU -> cpuCoreCount; + case GPU -> gpuCoreCount; + default -> throw new IllegalArgumentException("Invalid resource type: " + resourceType); + }; } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/TraceWorkload.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/TraceWorkload.java index 9c31a833..d698a48d 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/TraceWorkload.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/TraceWorkload.java @@ -23,8 +23,12 @@ package org.opendc.simulator.compute.workload.trace; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; +import java.util.List; +import java.util.Objects; import java.util.function.Consumer; +import org.opendc.common.ResourceType; import org.opendc.simulator.compute.machine.SimMachine; import org.opendc.simulator.compute.workload.SimWorkload; import org.opendc.simulator.compute.workload.Workload; @@ -37,13 +41,12 @@ public class TraceWorkload implements Workload { private final long checkpointDuration; private final double checkpointIntervalScaling; private final double maxCpuDemand; - private final int maxCoreCount; - - public String getTaskName() { - return taskName; - } - + private final int maxCpuCoreCount; + private final double maxGpuDemand; + private final int maxGpuCoreCount; + private final long maxGpuMemoryDemand; private final String taskName; + private ResourceType[] resourceTypes = new ResourceType[ResourceType.values().length]; public ScalingPolicy getScalingPolicy() { return scalingPolicy; @@ -57,7 +60,8 @@ public class TraceWorkload implements Workload { long checkpointDuration, double checkpointIntervalScaling, ScalingPolicy scalingPolicy, - String taskName) { + String taskName, + ResourceType[] resourceTypes) { this.fragments = fragments; this.checkpointInterval = checkpointInterval; this.checkpointDuration = checkpointDuration; @@ -69,11 +73,25 @@ public class TraceWorkload implements Workload { this.maxCpuDemand = fragments.stream() .max(Comparator.comparing(TraceFragment::cpuUsage)) .get() - .cpuUsage(); - this.maxCoreCount = fragments.stream() - .max(Comparator.comparing(TraceFragment::coreCount)) + // .cpuUsage(); + .getResourceUsage(ResourceType.CPU); + this.maxCpuCoreCount = fragments.stream() + .max(Comparator.comparing(TraceFragment::cpuCoreCount)) .get() - .coreCount(); + // .cpuCoreCount(); + .getCoreCount(ResourceType.CPU); + + this.maxGpuDemand = fragments.stream() + .max(Comparator.comparing(TraceFragment::gpuUsage)) + .get() + .getResourceUsage(ResourceType.GPU); + this.maxGpuCoreCount = fragments.stream() + .max(Comparator.comparing(TraceFragment::gpuCoreCount)) + .get() + .getCoreCount(ResourceType.GPU); + this.maxGpuMemoryDemand = 0L; // TODO: add GPU memory demand to the trace fragments + + this.resourceTypes = resourceTypes; } public ArrayList<TraceFragment> getFragments() { @@ -96,13 +114,29 @@ public class TraceWorkload implements Workload { } public int getMaxCoreCount() { - return maxCoreCount; + return maxCpuCoreCount; } public double getMaxCpuDemand() { return maxCpuDemand; } + public double getMaxGpuDemand() { + return maxGpuDemand; + } + + public int getMaxGpuCoreCount() { + return maxGpuCoreCount; + } + + public long getMaxGpuMemoryDemand() { + return maxGpuMemoryDemand; + } + + public String getTaskName() { + return taskName; + } + public void removeFragments(int numberOfFragments) { if (numberOfFragments <= 0) { return; @@ -114,14 +148,22 @@ public class TraceWorkload implements Workload { this.fragments.addFirst(fragment); } + public ResourceType[] getResourceTypes() { + return Arrays.stream(resourceTypes).filter(Objects::nonNull).toArray(ResourceType[]::new); + } + @Override public SimWorkload startWorkload(FlowSupplier supplier) { return new SimTraceWorkload(supplier, this); + // ArrayList<FlowSupplier> flowSuppliers = new ArrayList<>(); + // flowSuppliers.add(supplier); + // return new SimTraceWorkload(flowSuppliers, this); } @Override - public SimWorkload startWorkload(FlowSupplier supplier, SimMachine machine, Consumer<Exception> completion) { - return this.startWorkload(supplier); + public SimWorkload startWorkload(List<FlowSupplier> supplier, SimMachine machine, Consumer<Exception> completion) { + // return this.startWorkload(supplier); + return new SimTraceWorkload(supplier, this); } public static Builder builder( @@ -140,6 +182,7 @@ public class TraceWorkload implements Workload { private final double checkpointIntervalScaling; private final ScalingPolicy scalingPolicy; private final String taskName; + private final ResourceType[] resourceTypes = new ResourceType[ResourceType.values().length]; /** * Construct a new {@link Builder} instance. @@ -162,11 +205,23 @@ public class TraceWorkload implements Workload { * Add a fragment to the trace. * * @param duration The timestamp at which the fragment ends (in epoch millis). - * @param usage The CPU usage at this fragment. - * @param cores The number of cores used during this fragment. + * @param cpuUsage The CPU usage at this fragment. + * @param cpuCores The number of cores used during this fragment. + * @param gpuUsage The GPU usage at this fragment. + * @param gpuCores The number of GPU cores used during this fragment. + * @param gpuMemoryUsage The GPU memory usage at this fragment. */ - public void add(long duration, double usage, int cores) { - fragments.add(fragments.size(), new TraceFragment(duration, usage, cores)); + public void add( + long duration, double cpuUsage, int cpuCores, double gpuUsage, int gpuCores, long gpuMemoryUsage) { + if (cpuUsage > 0.0) { + this.resourceTypes[ResourceType.CPU.ordinal()] = ResourceType.CPU; + } + if (gpuUsage > 0.0) { + this.resourceTypes[ResourceType.GPU.ordinal()] = ResourceType.GPU; + } + fragments.add( + fragments.size(), + new TraceFragment(duration, cpuUsage, cpuCores, gpuUsage, gpuCores, gpuMemoryUsage)); } /** @@ -179,7 +234,8 @@ public class TraceWorkload implements Workload { this.checkpointDuration, this.checkpointIntervalScaling, this.scalingPolicy, - this.taskName); + this.taskName, + this.resourceTypes); } } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/NoDelayScaling.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/NoDelayScaling.java index d0c1cc2e..91538c85 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/NoDelayScaling.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/NoDelayScaling.java @@ -31,17 +31,17 @@ package org.opendc.simulator.compute.workload.trace.scaling; */ public class NoDelayScaling implements ScalingPolicy { @Override - public double getFinishedWork(double cpuFreqDemand, double cpuFreqSupplied, long passedTime) { + public double getFinishedWork(double demand, double supplied, long passedTime) { return passedTime; } @Override - public long getRemainingDuration(double cpuFreqDemand, double cpuFreqSupplied, double remainingWork) { + public long getRemainingDuration(double demand, double supplied, double remainingWork) { return (long) remainingWork; } @Override - public double getRemainingWork(double cpuFreqDemand, long duration) { + public double getRemainingWork(double demand, long duration) { return duration; } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/PerfectScaling.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/PerfectScaling.java index 7eae70e6..c4cfba66 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/PerfectScaling.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/PerfectScaling.java @@ -31,17 +31,17 @@ package org.opendc.simulator.compute.workload.trace.scaling; */ public class PerfectScaling implements ScalingPolicy { @Override - public double getFinishedWork(double cpuFreqDemand, double cpuFreqSupplied, long passedTime) { - return cpuFreqSupplied * passedTime; + public double getFinishedWork(double demand, double supplied, long passedTime) { + return supplied * passedTime; } @Override - public long getRemainingDuration(double cpuFreqDemand, double cpuFreqSupplied, double remainingWork) { - return (long) (remainingWork / cpuFreqSupplied); + public long getRemainingDuration(double demand, double supplied, double remainingWork) { + return (long) (remainingWork / supplied); } @Override - public double getRemainingWork(double cpuFreqDemand, long duration) { - return cpuFreqDemand * duration; + public double getRemainingWork(double demand, long duration) { + return demand * duration; } } diff --git a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/ScalingPolicy.java b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/ScalingPolicy.java index a0f473ba..f0676103 100644 --- a/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/ScalingPolicy.java +++ b/opendc-simulator/opendc-simulator-compute/src/main/java/org/opendc/simulator/compute/workload/trace/scaling/ScalingPolicy.java @@ -31,29 +31,29 @@ public interface ScalingPolicy { /** * Calculate how much work was finished based on the demanded and supplied cpu * - * @param cpuFreqDemand - * @param cpuFreqSupplied - * @param passedTime - * @return + * @param demand demand of the resource by the workload + * @param supplied resource supplied for the workload + * @param passedTime time that has passed since the start + * @return the amount of work that was finished */ - double getFinishedWork(double cpuFreqDemand, double cpuFreqSupplied, long passedTime); + double getFinishedWork(double demand, double supplied, long passedTime); /** * Calculate the remaining duration of this fragment based on the demanded and supplied cpu * - * @param cpuFreqDemand - * @param cpuFreqSupplied - * @param remainingWork - * @return + * @param demand of the resource by the workload + * @param supplied resource supplied for the workload + * @param remainingWork the remaining work that needs to be done + * @return the remaining duration of the fragment */ - long getRemainingDuration(double cpuFreqDemand, double cpuFreqSupplied, double remainingWork); + long getRemainingDuration(double demand, double supplied, double remainingWork); /** * Calculate how much work is remaining based on the demanded and supplied cpu * - * @param cpuFreqDemand - * @param duration - * @return + * @param demand of the resource by the workload + * @param duration the duration of the fragment + * @return the amount of work that is remaining */ - double getRemainingWork(double cpuFreqDemand, long duration); + double getRemainingWork(double demand, long duration); } diff --git a/opendc-simulator/opendc-simulator-compute/src/test/kotlin/org/opendc/simulator/compute/SimMachineTest.kt b/opendc-simulator/opendc-simulator-compute/src/test/kotlin/org/opendc/simulator/compute/SimMachineTest.kt index 173c60e7..eb3d3377 100644 --- a/opendc-simulator/opendc-simulator-compute/src/test/kotlin/org/opendc/simulator/compute/SimMachineTest.kt +++ b/opendc-simulator/opendc-simulator-compute/src/test/kotlin/org/opendc/simulator/compute/SimMachineTest.kt @@ -22,32 +22,31 @@ package org.opendc.simulator.compute -import org.junit.jupiter.api.BeforeEach -import org.opendc.simulator.compute.models.CpuModel import org.opendc.simulator.compute.models.MachineModel -import org.opendc.simulator.compute.models.MemoryUnit /** * Test suite for the [SimBareMetalMachine] class. */ + class SimMachineTest { private lateinit var machineModel: MachineModel - - @BeforeEach - fun setUp() { - machineModel = - MachineModel( - CpuModel( - 0, - 2, - 1000.0, - "Intel", - "Xeon", - "amd64", - ), - MemoryUnit("Crucial", "MTA18ASF4G72AZ-3G2B1", 3200.0, 32_000 * 4), - ) - } +// +// @BeforeEach +// fun setUp() { +// machineModel = +// MachineModel( +// CpuModel( +// 0, +// 2, +// 1000.0, +// "Intel", +// "Xeon", +// "amd64", +// ), +// MemoryUnit("Crucial", "MTA18ASF4G72AZ-3G2B1", 3200.0, 32_000 * 4), +// null +// ) +// } // @Test // fun testFlopsWorkload() = @@ -104,10 +103,10 @@ class SimMachineTest { // val cpuNode = machineModel.cpu // val machineModel = // MachineModel( -// List(cpuNode.coreCount * 2) { +// List(cpuNode.cpuCoreCount * 2) { // CpuModel( // it, -// cpuNode.coreCount, +// cpuNode.cpuCoreCount, // 1000.0, // ) // }, diff --git a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowConsumer.java b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowConsumer.java index a9da6f5d..ac6ba8da 100644 --- a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowConsumer.java +++ b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowConsumer.java @@ -22,13 +22,28 @@ package org.opendc.simulator.engine.graph; +import org.opendc.common.ResourceType; + public interface FlowConsumer { void handleIncomingSupply(FlowEdge supplierEdge, double newSupply); + default void handleIncomingSupply(FlowEdge supplierEdge, double newSupply, ResourceType resourceType) { + handleIncomingSupply(supplierEdge, newSupply); + } + void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand); + default void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand, ResourceType resourceType) { + pushOutgoingDemand(supplierEdge, newDemand); + } + void addSupplierEdge(FlowEdge supplierEdge); void removeSupplierEdge(FlowEdge supplierEdge); + + // needed for flow nodes with multiple edges to same other flow node (PSU, VM) + default ResourceType getConsumerResourceType() { + return ResourceType.AUXILIARY; + } } diff --git a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowDistributor.java b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowDistributor.java index 09cd73f6..674db8ca 100644 --- a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowDistributor.java +++ b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowDistributor.java @@ -23,13 +23,15 @@ package org.opendc.simulator.engine.graph; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; +import org.opendc.common.ResourceType; import org.opendc.simulator.engine.engine.FlowEngine; +import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicy; +import org.opendc.simulator.engine.graph.distributionPolicies.MaxMinFairnessPolicy; public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsumer { private final ArrayList<FlowEdge> consumerEdges = new ArrayList<>(); @@ -47,9 +49,16 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu private boolean overloaded = false; private double capacity; // What is the max capacity. Can probably be removed + private DistributionPolicy distributionPolicy; public FlowDistributor(FlowEngine engine) { super(engine); + this.distributionPolicy = new MaxMinFairnessPolicy(); + } + + public FlowDistributor(FlowEngine engine, DistributionPolicy distributionPolicy) { + super(engine); + this.distributionPolicy = distributionPolicy; } public double getTotalIncomingDemand() { @@ -88,6 +97,7 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu this.invalidate(); } + // TODO: This should probably be moved to the distribution strategy private void updateOutgoingSupplies() { // If the demand is higher than the current supply, the system is overloaded. @@ -95,10 +105,11 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu if (this.totalIncomingDemand > this.currentIncomingSupply) { this.overloaded = true; - double[] supplies = distributeSupply(this.incomingDemands, this.currentIncomingSupply); + double[] supplies = + this.distributionPolicy.distributeSupply(this.incomingDemands, this.currentIncomingSupply); for (int idx = 0; idx < this.consumerEdges.size(); idx++) { - this.pushOutgoingSupply(this.consumerEdges.get(idx), supplies[idx]); + this.pushOutgoingSupply(this.consumerEdges.get(idx), supplies[idx], this.getConsumerResourceType()); } } else { @@ -108,7 +119,10 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu if (this.overloaded) { for (int idx = 0; idx < this.consumerEdges.size(); idx++) { if (!Objects.equals(this.outgoingSupplies.get(idx), this.incomingDemands.get(idx))) { - this.pushOutgoingSupply(this.consumerEdges.get(idx), this.incomingDemands.get(idx)); + this.pushOutgoingSupply( + this.consumerEdges.get(idx), + this.incomingDemands.get(idx), + this.getConsumerResourceType()); } } this.overloaded = false; @@ -117,7 +131,8 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu // Update the supplies of the consumers that changed their demand in the current cycle else { for (int idx : this.updatedDemands) { - this.pushOutgoingSupply(this.consumerEdges.get(idx), this.incomingDemands.get(idx)); + this.pushOutgoingSupply( + this.consumerEdges.get(idx), this.incomingDemands.get(idx), this.getConsumerResourceType()); } } } @@ -125,48 +140,6 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu this.updatedDemands.clear(); } - private record Demand(int idx, double value) {} - - /** - * Distributed the available supply over the different demands. - * The supply is distributed using MaxMin Fairness. - */ - private static double[] distributeSupply(ArrayList<Double> demands, double currentSupply) { - int inputSize = demands.size(); - - final double[] supplies = new double[inputSize]; - final Demand[] tempDemands = new Demand[inputSize]; - - for (int i = 0; i < inputSize; i++) { - tempDemands[i] = new Demand(i, demands.get(i)); - } - - Arrays.sort(tempDemands, (o1, o2) -> { - Double i1 = o1.value; - Double i2 = o2.value; - return i1.compareTo(i2); - }); - - double availableCapacity = currentSupply; // totalSupply - - for (int i = 0; i < inputSize; i++) { - double d = tempDemands[i].value; - - if (d == 0.0) { - continue; - } - - double availableShare = availableCapacity / (inputSize - i); - double r = Math.min(d, availableShare); - - int idx = tempDemands[i].idx; - supplies[idx] = r; // Update the rates - availableCapacity -= r; - } - - return supplies; - } - /** * Add a new consumer. * Set its demand and supply to 0.0 @@ -260,6 +233,15 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu } @Override + public void handleIncomingDemand(FlowEdge consumerEdge, double newDemand, ResourceType resourceType) { + if (resourceType != this.getSupplierResourceType()) { + throw new IllegalArgumentException("Resource type " + resourceType + + " does not match distributor resource type " + this.getSupplierResourceType()); + } + this.handleIncomingDemand(consumerEdge, newDemand); + } + + @Override public void handleIncomingSupply(FlowEdge supplierEdge, double newSupply) { this.currentIncomingSupply = newSupply; @@ -268,7 +250,7 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu @Override public void pushOutgoingDemand(FlowEdge supplierEdge, double newDemand) { - this.supplierEdge.pushDemand(newDemand); + this.supplierEdge.pushDemand(newDemand, false, this.getSupplierResourceType()); } @Override @@ -284,7 +266,8 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu } outgoingSupplies.set(idx, newSupply); - consumerEdge.pushSupply(newSupply); + consumerEdge.pushSupply(newSupply, false, this.getSupplierResourceType()); + consumerEdge.pushSupply(newSupply, false, this.getSupplierResourceType()); } @Override @@ -293,4 +276,14 @@ public class FlowDistributor extends FlowNode implements FlowSupplier, FlowConsu return Map.of(FlowEdge.NodeType.CONSUMING, supplyingEdges, FlowEdge.NodeType.SUPPLYING, this.consumerEdges); } + + @Override + public ResourceType getSupplierResourceType() { + return this.supplierEdge.getSupplierResourceType(); + } + + @Override + public ResourceType getConsumerResourceType() { + return this.consumerEdges.getFirst().getConsumerResourceType(); + } } diff --git a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowEdge.java b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowEdge.java index 95eac20b..aa3894c1 100644 --- a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowEdge.java +++ b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowEdge.java @@ -22,6 +22,8 @@ package org.opendc.simulator.engine.graph; +import org.opendc.common.ResourceType; + /** * An edge that connects two FlowStages. * A connection between FlowStages always consist of a FlowStage that demands @@ -38,7 +40,9 @@ public class FlowEdge { private double demand = 0.0; private double supply = 0.0; - private double capacity; + private final double capacity; + + private final ResourceType resourceType; public enum NodeType { CONSUMING, @@ -46,6 +50,10 @@ public class FlowEdge { } public FlowEdge(FlowConsumer consumer, FlowSupplier supplier) { + this(consumer, supplier, ResourceType.AUXILIARY); + } + + public FlowEdge(FlowConsumer consumer, FlowSupplier supplier, ResourceType resourceType) { if (!(consumer instanceof FlowNode)) { throw new IllegalArgumentException("Flow consumer is not a FlowNode"); } @@ -55,8 +63,9 @@ public class FlowEdge { this.consumer = consumer; this.supplier = supplier; + this.resourceType = resourceType; - this.capacity = supplier.getCapacity(); + this.capacity = supplier.getCapacity(resourceType); this.consumer.addSupplierEdge(this); this.supplier.addConsumerEdge(this); @@ -112,6 +121,33 @@ public class FlowEdge { return this.supply; } + /** + * Get the resource type of this edge. + * + * @return The resource type of this edge. + */ + public ResourceType getResourceType() { + return this.resourceType; + } + + /** + * Get the resource type of the supplier of this edge. + * + * @return The resource type of the supplier. + */ + public ResourceType getSupplierResourceType() { + return this.supplier.getSupplierResourceType(); + } + + /** + * Get the resource type of the consumer of this edge. + * + * @return The resource type of the consumer. + */ + public ResourceType getConsumerResourceType() { + return this.consumer.getConsumerResourceType(); + } + public int getConsumerIndex() { return consumerIndex; } @@ -128,6 +164,16 @@ public class FlowEdge { this.supplierIndex = supplierIndex; } + public void pushDemand(double newDemand, boolean forceThrough, ResourceType resourceType) { + // or store last resource type in the edge + if ((newDemand == this.demand) && !forceThrough) { + return; + } + + this.demand = newDemand; + this.supplier.handleIncomingDemand(this, newDemand, resourceType); + } + /** * Push new demand from the Consumer to the Supplier */ @@ -150,19 +196,19 @@ public class FlowEdge { /** * Push new supply from the Supplier to the Consumer */ - public void pushSupply(double newSupply, boolean forceThrough) { + public void pushSupply(double newSupply, boolean forceThrough, ResourceType resourceType) { if ((newSupply == this.supply) && !forceThrough) { return; } this.supply = newSupply; - this.consumer.handleIncomingSupply(this, newSupply); + this.consumer.handleIncomingSupply(this, newSupply, resourceType); } /** * Push new supply from the Supplier to the Consumer */ public void pushSupply(double newSupply) { - this.pushSupply(newSupply, false); + this.pushSupply(newSupply, false, this.supplier.getSupplierResourceType()); } } diff --git a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowSupplier.java b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowSupplier.java index da65392b..eb665b8c 100644 --- a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowSupplier.java +++ b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/FlowSupplier.java @@ -22,15 +22,35 @@ package org.opendc.simulator.engine.graph; +import org.opendc.common.ResourceType; + public interface FlowSupplier { void handleIncomingDemand(FlowEdge consumerEdge, double newDemand); + default void handleIncomingDemand(FlowEdge consumerEdge, double newDemand, ResourceType resourceType) { + handleIncomingDemand(consumerEdge, newDemand); + } + void pushOutgoingSupply(FlowEdge consumerEdge, double newSupply); + default void pushOutgoingSupply(FlowEdge consumerEdge, double newSupply, ResourceType resourceType) { + pushOutgoingSupply(consumerEdge, newSupply); + } + ; + void addConsumerEdge(FlowEdge consumerEdge); void removeConsumerEdge(FlowEdge consumerEdge); double getCapacity(); + + default double getCapacity(ResourceType resourceType) { + return getCapacity(); + } + + default ResourceType getSupplierResourceType() { + return ResourceType.AUXILIARY; + } + ; } diff --git a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/DistributionPolicy.java b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/DistributionPolicy.java new file mode 100644 index 00000000..9d2246cd --- /dev/null +++ b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/DistributionPolicy.java @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.engine.graph.distributionPolicies; + +import java.util.ArrayList; + +public interface DistributionPolicy { + double[] distributeSupply(ArrayList<Double> supply, double currentSupply); +} diff --git a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/DistributionPolicyFactory.java b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/DistributionPolicyFactory.java new file mode 100644 index 00000000..53cded87 --- /dev/null +++ b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/DistributionPolicyFactory.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2025 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.engine.graph.distributionPolicies; + +public class DistributionPolicyFactory { + + public enum DistributionPolicyType { + MaxMinFairness, + FixedShare; + } + + public static DistributionPolicy getDistributionStrategy(DistributionPolicyType distributionPolicyType) { + + return switch (distributionPolicyType) { + case MaxMinFairness -> new MaxMinFairnessPolicy(); + case FixedShare -> new FixedShare(1); + // actively misspelling + default -> throw new IllegalArgumentException( + "Unknown distribution strategy type: " + distributionPolicyType); + }; + } +} diff --git a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/FixedShare.java b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/FixedShare.java new file mode 100644 index 00000000..40d70b5e --- /dev/null +++ b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/FixedShare.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2025 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.engine.graph.distributionPolicies; + +import java.util.ArrayList; + +/** + * A distribution policy that distributes supply equally among all nodes. + * The share can be set to a fixed value, defaulting to 1. + * This policy not implemented yet and is used as a placeholder. + */ +public class FixedShare implements DistributionPolicy { + + private int share; + + public FixedShare() { + this.share = 1; + } + + public FixedShare(int share) { + this.share = share; + } + + @Override + public double[] distributeSupply(ArrayList<Double> supply, double currentSupply) { + return new double[0]; + } +} diff --git a/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/MaxMinFairnessPolicy.java b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/MaxMinFairnessPolicy.java new file mode 100644 index 00000000..1d387349 --- /dev/null +++ b/opendc-simulator/opendc-simulator-flow/src/main/java/org/opendc/simulator/engine/graph/distributionPolicies/MaxMinFairnessPolicy.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.simulator.engine.graph.distributionPolicies; + +import java.util.ArrayList; +import java.util.Arrays; + +/** + * A distribution policy that implements the Max-Min Fairness algorithm. + * This policy distributes supply to demands in a way that maximizes the minimum + * allocation across all demands, ensuring fairness. + */ +public class MaxMinFairnessPolicy implements DistributionPolicy { + private record Demand(int idx, double value) {} + + @Override + public double[] distributeSupply(ArrayList<Double> demands, double currentSupply) { + int inputSize = demands.size(); + + final double[] supplies = new double[inputSize]; + final Demand[] tempDemands = new Demand[inputSize]; + + for (int i = 0; i < inputSize; i++) { + tempDemands[i] = new Demand(i, demands.get(i)); + } + + Arrays.sort(tempDemands, (o1, o2) -> { + Double i1 = o1.value; + Double i2 = o2.value; + return i1.compareTo(i2); + }); + + double availableCapacity = currentSupply; // totalSupply + + for (int i = 0; i < inputSize; i++) { + double d = tempDemands[i].value; + + if (d == 0.0) { + continue; + } + + double availableShare = availableCapacity / (inputSize - i); + double r = Math.min(d, availableShare); + + int idx = tempDemands[i].idx; + supplies[idx] = r; // Update the rates + availableCapacity -= r; + } + + return supplies; + } +} diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceColumns.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceColumns.kt index d0f56bff..181ca8e8 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceColumns.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceColumns.kt @@ -73,6 +73,24 @@ public val resourceCpuCapacity: String = "cpu_capacity" public val resourceMemCapacity: String = "mem_capacity" /** + * Number of GPU cores for the resource. + */ +@JvmField +public val resourceGpuCount: String = "gpu_count" + +/** + * Total GPU capacity of the resource in MHz. + */ +@JvmField +public val resourceGpuCapacity: String = "gpu_capacity" + +/** + * Total GPU memory capacity of the resource in MB. + */ +@JvmField +public val resourceGpuMemCapacity: String = "gpu_mem_capacity" + +/** * Nature of the task. Delayable, interruptible, etc. */ @JvmField diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceStateColumns.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceStateColumns.kt index eede6bd6..f4ab7759 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceStateColumns.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceStateColumns.kt @@ -95,3 +95,9 @@ public val resourceStateNetRx: String = "net_rx" */ @JvmField public val resourceStateNetTx: String = "net_tx" + +/** + * Total GPU capacity of the resource in MHz. + */ +@JvmField +public val resourceStateGpuUsage: String = "gpu_usage" diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceStateTableReader.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceStateTableReader.kt index 39475f9f..d474e0ec 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceStateTableReader.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceStateTableReader.kt @@ -24,9 +24,11 @@ package org.opendc.trace.formats.opendc import org.opendc.trace.TableReader import org.opendc.trace.conv.resourceCpuCount +import org.opendc.trace.conv.resourceGpuCount import org.opendc.trace.conv.resourceID import org.opendc.trace.conv.resourceStateCpuUsage import org.opendc.trace.conv.resourceStateDuration +import org.opendc.trace.conv.resourceStateGpuUsage import org.opendc.trace.conv.resourceStateTimestamp import org.opendc.trace.formats.opendc.parquet.ResourceState import org.opendc.trace.util.parquet.LocalParquetReader @@ -60,6 +62,9 @@ internal class OdcVmResourceStateTableReader(private val reader: LocalParquetRea private val colDuration = 2 private val colCpuCount = 3 private val colCpuUsage = 4 + private val colGpuCount = 5 + private val colGpuUsage = 6 + private val colMemoryCapacity = 7 override fun resolve(name: String): Int { return when (name) { @@ -68,6 +73,8 @@ internal class OdcVmResourceStateTableReader(private val reader: LocalParquetRea resourceStateDuration -> colDuration resourceCpuCount -> colCpuCount resourceStateCpuUsage -> colCpuUsage + resourceGpuCount -> colGpuCount + resourceStateGpuUsage -> colGpuUsage else -> -1 } } @@ -85,6 +92,7 @@ internal class OdcVmResourceStateTableReader(private val reader: LocalParquetRea val record = checkNotNull(record) { "Reader in invalid state" } return when (index) { colCpuCount -> record.cpuCount + colGpuCount -> record.gpuCount else -> throw IllegalArgumentException("Invalid column or type [index $index]") } } @@ -101,6 +109,7 @@ internal class OdcVmResourceStateTableReader(private val reader: LocalParquetRea val record = checkNotNull(record) { "Reader in invalid state" } return when (index) { colCpuUsage -> record.cpuUsage + colGpuUsage -> record.gpuUsage else -> throw IllegalArgumentException("Invalid column or type [index $index]") } } diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceStateTableWriter.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceStateTableWriter.kt index 1421d77c..c6f117d2 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceStateTableWriter.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceStateTableWriter.kt @@ -25,9 +25,11 @@ package org.opendc.trace.formats.opendc import org.apache.parquet.hadoop.ParquetWriter import org.opendc.trace.TableWriter import org.opendc.trace.conv.resourceCpuCount +import org.opendc.trace.conv.resourceGpuCount import org.opendc.trace.conv.resourceID import org.opendc.trace.conv.resourceStateCpuUsage import org.opendc.trace.conv.resourceStateDuration +import org.opendc.trace.conv.resourceStateGpuUsage import org.opendc.trace.conv.resourceStateTimestamp import org.opendc.trace.formats.opendc.parquet.ResourceState import java.time.Duration @@ -47,6 +49,8 @@ internal class OdcVmResourceStateTableWriter(private val writer: ParquetWriter<R private var localDuration: Duration = Duration.ZERO private var localCpuCount: Int = 0 private var localCpuUsage: Double = Double.NaN + private var localGpuCount: Int = 0 + private var localGpuUsage: Double = Double.NaN override fun startRow() { localIsActive = true @@ -55,6 +59,8 @@ internal class OdcVmResourceStateTableWriter(private val writer: ParquetWriter<R localDuration = Duration.ZERO localCpuCount = 0 localCpuUsage = Double.NaN + localGpuCount = 0 + localGpuUsage = Double.NaN } override fun endRow() { @@ -63,7 +69,7 @@ internal class OdcVmResourceStateTableWriter(private val writer: ParquetWriter<R check(lastId != localID || localTimestamp >= lastTimestamp) { "Records need to be ordered by (id, timestamp)" } - writer.write(ResourceState(localID, localTimestamp, localDuration, localCpuCount, localCpuUsage)) + writer.write(ResourceState(localID, localTimestamp, localDuration, localCpuCount, localCpuUsage, localGpuCount, localGpuUsage)) lastId = localID lastTimestamp = localTimestamp @@ -76,6 +82,8 @@ internal class OdcVmResourceStateTableWriter(private val writer: ParquetWriter<R resourceStateDuration -> colDuration resourceCpuCount -> colCpuCount resourceStateCpuUsage -> colCpuUsage + resourceGpuCount -> colGpuCount + resourceStateGpuUsage -> colGpuUsage else -> -1 } } @@ -94,6 +102,7 @@ internal class OdcVmResourceStateTableWriter(private val writer: ParquetWriter<R check(localIsActive) { "No active row" } when (index) { colCpuCount -> localCpuCount = value + colGpuCount -> localGpuCount = value else -> throw IllegalArgumentException("Invalid column or type [index $index]") } } @@ -119,6 +128,7 @@ internal class OdcVmResourceStateTableWriter(private val writer: ParquetWriter<R check(localIsActive) { "No active row" } when (index) { colCpuUsage -> localCpuUsage = value + colGpuUsage -> localGpuUsage = value else -> throw IllegalArgumentException("Invalid column or type [index $index]") } } @@ -206,4 +216,6 @@ internal class OdcVmResourceStateTableWriter(private val writer: ParquetWriter<R private val colDuration = 2 private val colCpuCount = 3 private val colCpuUsage = 4 + private val colGpuCount = 5 + private val colGpuUsage = 6 } diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceTableReader.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceTableReader.kt index 10f60658..97c2847e 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceTableReader.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceTableReader.kt @@ -27,6 +27,8 @@ import org.opendc.trace.conv.resourceCpuCapacity import org.opendc.trace.conv.resourceCpuCount import org.opendc.trace.conv.resourceDeadline import org.opendc.trace.conv.resourceDuration +import org.opendc.trace.conv.resourceGpuCapacity +import org.opendc.trace.conv.resourceGpuCount import org.opendc.trace.conv.resourceID import org.opendc.trace.conv.resourceMemCapacity import org.opendc.trace.conv.resourceNature @@ -66,6 +68,8 @@ internal class OdcVmResourceTableReader(private val reader: LocalParquetReader<R private val colMemCapacity = 5 private val colNature = 6 private val colDeadline = 7 + private val colGpuCapacity = 8 + private val colGpuCount = 9 override fun resolve(name: String): Int { return when (name) { @@ -77,6 +81,8 @@ internal class OdcVmResourceTableReader(private val reader: LocalParquetReader<R resourceMemCapacity -> colMemCapacity resourceNature -> colNature resourceDeadline -> colDeadline + resourceGpuCount -> colGpuCount + resourceGpuCapacity -> colGpuCapacity else -> -1 } } @@ -101,6 +107,7 @@ internal class OdcVmResourceTableReader(private val reader: LocalParquetReader<R return when (index) { colCpuCount -> record.cpuCount + colGpuCount -> record.gpuCount else -> throw IllegalArgumentException("Invalid column") } } @@ -124,6 +131,7 @@ internal class OdcVmResourceTableReader(private val reader: LocalParquetReader<R return when (index) { colCpuCapacity -> record.cpuCapacity colMemCapacity -> record.memCapacity + colGpuCapacity -> record.gpuCapacity else -> throw IllegalArgumentException("Invalid column") } } diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceTableWriter.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceTableWriter.kt index 2b8db7f1..310d3dfc 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceTableWriter.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/OdcVmResourceTableWriter.kt @@ -53,6 +53,8 @@ internal class OdcVmResourceTableWriter(private val writer: ParquetWriter<Resour private var localMemCapacity: Double = Double.NaN private var localNature: String? = null private var localDeadline: Long = -1 + private var localGpuCount: Int = 0 + private var localGpuCapacity: Double = Double.NaN override fun startRow() { localIsActive = true @@ -62,6 +64,8 @@ internal class OdcVmResourceTableWriter(private val writer: ParquetWriter<Resour localCpuCount = 0 localCpuCapacity = Double.NaN localMemCapacity = Double.NaN + localGpuCount = 0 + localGpuCapacity = Double.NaN localNature = null localDeadline = -1L } @@ -77,6 +81,8 @@ internal class OdcVmResourceTableWriter(private val writer: ParquetWriter<Resour localCpuCount, localCpuCapacity, localMemCapacity, + localGpuCount, + localGpuCapacity, localNature, localDeadline, ), @@ -111,6 +117,7 @@ internal class OdcVmResourceTableWriter(private val writer: ParquetWriter<Resour check(localIsActive) { "No active row" } when (index) { colCpuCount -> localCpuCount = value + colGpuCount -> localGpuCount = value else -> throw IllegalArgumentException("Invalid column or type [index $index]") } } @@ -142,6 +149,7 @@ internal class OdcVmResourceTableWriter(private val writer: ParquetWriter<Resour when (index) { colCpuCapacity -> localCpuCapacity = value colMemCapacity -> localMemCapacity = value + colGpuCapacity -> localGpuCapacity = value else -> throw IllegalArgumentException("Invalid column or type [index $index]") } } @@ -220,4 +228,6 @@ internal class OdcVmResourceTableWriter(private val writer: ParquetWriter<Resour private val colMemCapacity = 5 private val colNature = 6 private val colDeadline = 7 + private val colGpuCount = 8 + private val colGpuCapacity = 9 } diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/Resource.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/Resource.kt index 00922d4f..6747e9ce 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/Resource.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/Resource.kt @@ -34,6 +34,8 @@ internal data class Resource( val cpuCount: Int, val cpuCapacity: Double, val memCapacity: Double, + val gpuCount: Int, + val gpuCapacity: Double, val nature: String? = null, val deadline: Long = -1, ) diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceRecordMaterializer.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceRecordMaterializer.kt index 866b304e..fe92ad65 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceRecordMaterializer.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceRecordMaterializer.kt @@ -43,6 +43,8 @@ internal class ResourceRecordMaterializer(schema: MessageType) : RecordMateriali private var localCpuCount = 0 private var localCpuCapacity = 0.0 private var localMemCapacity = 0.0 + private var localGpuCount = 0 + private var localGpuCapacity = 0.0 private var localNature: String? = null private var localDeadline = -1L @@ -97,6 +99,18 @@ internal class ResourceRecordMaterializer(schema: MessageType) : RecordMateriali localMemCapacity = value.toDouble() } } + "gpu_count", "gpuMaxCores" -> + object : PrimitiveConverter() { + override fun addInt(value: Int) { + localGpuCount = value + } + } + "gpu_capacity" -> + object : PrimitiveConverter() { + override fun addDouble(value: Double) { + localGpuCapacity = value + } + } "nature" -> object : PrimitiveConverter() { override fun addBinary(value: Binary) { @@ -120,6 +134,8 @@ internal class ResourceRecordMaterializer(schema: MessageType) : RecordMateriali localCpuCount = 0 localCpuCapacity = 0.0 localMemCapacity = 0.0 + localGpuCount = 0 + localGpuCapacity = 0.0 localNature = null localDeadline = -1 } @@ -137,6 +153,8 @@ internal class ResourceRecordMaterializer(schema: MessageType) : RecordMateriali localCpuCount, localCpuCapacity, localMemCapacity, + localGpuCount, + localGpuCapacity, localNature, localDeadline, ) diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceState.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceState.kt index 64ab9dca..10fc6be4 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceState.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceState.kt @@ -31,4 +31,6 @@ internal class ResourceState( val duration: Duration, val cpuCount: Int, val cpuUsage: Double, + val gpuCount: Int, + val gpuUsage: Double, ) diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceStateRecordMaterializer.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceStateRecordMaterializer.kt index 8ff0e476..9ad786d5 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceStateRecordMaterializer.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/formats/opendc/parquet/ResourceStateRecordMaterializer.kt @@ -43,6 +43,8 @@ internal class ResourceStateRecordMaterializer(schema: MessageType) : RecordMate private var localDuration = Duration.ZERO private var localCpuCount = 0 private var localCpuUsage = 0.0 + private var localGpuCount = 0 + private var localGpuUsage = 0.0 /** * Root converter for the record. @@ -85,6 +87,18 @@ internal class ResourceStateRecordMaterializer(schema: MessageType) : RecordMate localCpuUsage = value } } + "gpu_count", "gpu_cores" -> + object : PrimitiveConverter() { + override fun addInt(value: Int) { + localGpuCount = value + } + } + "gpu_usage", "gpuUsage" -> + object : PrimitiveConverter() { + override fun addDouble(value: Double) { + localGpuUsage = value + } + } "flops" -> object : PrimitiveConverter() { override fun addLong(value: Long) { @@ -101,6 +115,8 @@ internal class ResourceStateRecordMaterializer(schema: MessageType) : RecordMate localDuration = Duration.ZERO localCpuCount = 0 localCpuUsage = 0.0 + localGpuCount = 0 + localGpuUsage = 0.0 } override fun end() {} @@ -108,7 +124,16 @@ internal class ResourceStateRecordMaterializer(schema: MessageType) : RecordMate override fun getConverter(fieldIndex: Int): Converter = converters[fieldIndex] } - override fun getCurrentRecord(): ResourceState = ResourceState(localId, localTimestamp, localDuration, localCpuCount, localCpuUsage) + override fun getCurrentRecord(): ResourceState = + ResourceState( + localId, + localTimestamp, + localDuration, + localCpuCount, + localCpuUsage, + localGpuCount, + localGpuUsage, + ) override fun getRootConverter(): GroupConverter = root } diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt index 406c9772..309763f1 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt @@ -36,10 +36,10 @@ import org.opendc.compute.topology.specs.HostSpec import org.opendc.compute.topology.specs.PowerSourceSpec import org.opendc.compute.workload.ComputeWorkloadLoader import org.opendc.experiments.base.runner.replay -import org.opendc.simulator.compute.cpu.CpuPowerModels import org.opendc.simulator.compute.models.CpuModel import org.opendc.simulator.compute.models.MachineModel import org.opendc.simulator.compute.models.MemoryUnit +import org.opendc.simulator.compute.power.PowerModels import org.opendc.simulator.kotlin.runSimulation import org.opendc.web.proto.runner.Job import org.opendc.web.proto.runner.Scenario @@ -353,14 +353,15 @@ public class OpenDCRunner( } val energyConsumptionW = machine.cpus.sumOf { it.energyConsumptionW } - val powerModel = CpuPowerModels.linear(2 * energyConsumptionW, energyConsumptionW * 0.5) + val cpuPowerModel = PowerModels.linear(2 * energyConsumptionW, energyConsumptionW * 0.5) val spec = HostSpec( "node-$clusterId-$position", clusterId, MachineModel(processors, memoryUnits[0]), - powerModel, + cpuPowerModel, + null, ) res += spec diff --git a/settings.gradle.kts b/settings.gradle.kts index 6b838108..139dbd7e 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -40,6 +40,7 @@ include(":opendc-experiments:opendc-experiments-faas") include(":opendc-experiments:opendc-experiments-scenario") include(":opendc-experiments:opendc-experiments-tf20") include(":opendc-experiments:opendc-experiments-m3sa") +include(":opendc-web") include(":opendc-web:opendc-web-proto") include(":opendc-web:opendc-web-server") include(":opendc-web:opendc-web-client") |
