summaryrefslogtreecommitdiff
path: root/opendc-compute
diff options
context:
space:
mode:
authorNiels Thiele <noleu66@posteo.net>2025-06-22 12:31:21 +0200
committerGitHub <noreply@github.com>2025-06-22 12:31:21 +0200
commit0203254b709614fa732c114aa25916f61b8b3275 (patch)
tree63232140a8e60e16e1668a51eb58954d8609fbdc /opendc-compute
parent8f846655347195bf6f22a4a102aa06f0ab127da1 (diff)
Implemented Single GPU Support & outline of host-level allocation policies (#342)
* renamed performance counter to distinguish different resource types * added GPU, modelled similar to CPU * added GPUs to machine model * list of GPUs instead of single instance * renamed memory speed to bandwidth * enabled parsing of GPU resources * split powermodel into cpu and GPU powermodel * added gpu parsing tests * added idea of host level scheduling * added tests for multi gpu parsing * renamed powermodel to cpupowermodel * clarified naming of cpu and gpu components * added resource type to flow suplier and edge * added resourcetype * added GPU components and resource type to fragments * added GPU to workload and updated resource usage retrieval * implemented first version of multi resource * added name to workload * renamed perfomance counters * removed commented out code * removed deprecated comments * included demand and supply into calculations * resolving rebase mismatches * moved resource type from flowedge class to common package * added available resources to machinees * cleaner separation if workload is started of simmachine or vm * Replaced exception with dedicated enum * Only looping over resources that are actually used * using hashmaps to handle resourcetype instead of arrays for readability * fixed condition * tracking finished workloads per resource type * removed resource type from flowedge * made supply and demand distribution resource specific * added power model for GPU * removed unused test setup * removed depracated comments * removed unused parameter * added ID for GPU * added GPUs and GPU performance counters (naively) * implemented capturing of GPU statistics * added reminders for future implementations * renamed properties for better identification * added capturing GPU statistics * implemented first tests for GPUs * unified access to performance counters * added interface for general compute resource handling * implemented multi resource support in simmachine * added individual edge to VM per resource * extended compute resource interface * implemented multi-resource support in PSU * implemented generic retrieval of computeresources * implemented mult-resource suppport in vm * made method use more resource specific * implemented simple GPU tests * rolled back frquency and demand use * made naming independent of used resource * using workloads resources instead of VMs to determine available resource * implemented determination of used resources in workload * removed logging statements * implemented reading from workload * fixed naming for host-level allocation * fixed next deadline calculation * fixed forwarding supply * reduced memory footprint * made GPU powermodel nullable * maded Gpu powermodel configurable in topology * implemented tests for basic gpu scheduler * added gpu properties * implemented weights, filter and simple cpu-gpu scheduler * spotless apply * spotless apply pt. 2 * fixed capitalization * spotless kotlin run * implemented coloumn export * todo update * removed code comments * Merged PerformanceCounter classes into one & removed interface * removed GPU specific powermodel * Rebase master: kept both versions of TopologyFactories * renamed CpuPowermodel to resource independent Powermodel Moved it from Cpu package to power package * implementated default of getResourceType & removed overrides if possible * split getResourceType into Consumer and Supplier * added power as resource type * reduced supply demand from arrayList to single value * combining GPUs into one large GPU, until full multi-gpu support * merged distribution policy enum with corresponding factory * added comment * post-rebase fixes * aligned naming * Added GPU metrics to task output * Updates power resource type to uppercase. Standardizes the `ResourceType.Power` enum to `ResourceType.POWER` for consistency with other resource types and improved readability. * Removes deprecated test assertions Removes commented-out assertions in GPU tests. These assertions are no longer needed and clutter the test code. * Renames MaxMinFairnessStrategy to Policy Renames MaxMinFairnessStrategy to MaxMinFairnessPolicy for clarity and consistency with naming conventions. This change affects the factory and distributor to use the updated name. * applies spotless * nulls GPUs as it is not used
Diffstat (limited to 'opendc-compute')
-rw-r--r--opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt7
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java33
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java19
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java10
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java11
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java24
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java44
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java46
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt73
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt43
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt1
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt31
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt2
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt4
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt48
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt50
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt2
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt2
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt43
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt46
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt42
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt37
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt1
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt45
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt87
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt36
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt104
-rw-r--r--opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt56
-rw-r--r--opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt8
-rw-r--r--opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt4
-rw-r--r--opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt57
-rw-r--r--opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt5
-rw-r--r--opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt17
-rw-r--r--opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt54
-rw-r--r--opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt3
35 files changed, 997 insertions, 98 deletions
diff --git a/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt b/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt
index e88379f6..a54a0130 100644
--- a/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt
+++ b/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Flavor.kt
@@ -30,10 +30,15 @@ public interface Flavor : Resource {
/**
* The number of (virtual) processing cores to use.
*/
- public val coreCount: Int
+ public val cpuCoreCount: Int
/**
* The amount of RAM available to the task (in MB).
*/
public val memorySize: Long
+
+ /**
+ * The amount of gpu cores available to the task.
+ */
+ public val gpuCoreCount: Int
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java
new file mode 100644
index 00000000..97aaa820
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.host;
+
+/**
+ * A model for a GPU in a host.
+ *
+ * @param gpuCoreCapacity The capacity of the GPU cores hz.
+ * @param gpuCoreCount The number of GPU cores.
+ * @param GpuMemoryCapacity The capacity of the GPU memory in GB.
+ * @param GpuMemorySpeed The speed of the GPU memory in GB/s.
+ */
+public record GpuHostModel(double gpuCoreCapacity, int gpuCoreCount, long GpuMemoryCapacity, double GpuMemorySpeed) {}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java
index 1ea73ea6..6464a56c 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java
@@ -22,11 +22,24 @@
package org.opendc.compute.simulator.host;
+import java.util.List;
+
/**
* Record describing the static machine properties of the host.
*
- * @param cpuCapacity The total CPU capacity of the host in MHz.
- * @param coreCount The number of logical processing cores available for this host.
+ * @param cpuCapacity The total CPU capacity of the host in MHz.
+ * @param coreCount The number of logical processing cores available for this host.
* @param memoryCapacity The amount of memory available for this host in MB.
*/
-public record HostModel(double cpuCapacity, int coreCount, long memoryCapacity) {}
+public record HostModel(double cpuCapacity, int coreCount, long memoryCapacity, List<GpuHostModel> gpuHostModels) {
+ /**
+ * Create a new host model.
+ *
+ * @param cpuCapacity The total CPU capacity of the host in MHz.
+ * @param coreCount The number of logical processing cores available for this host.
+ * @param memoryCapacity The amount of memory available for this host in MB.
+ */
+ public HostModel(double cpuCapacity, int coreCount, long memoryCapacity) {
+ this(cpuCapacity, coreCount, memoryCapacity, null);
+ }
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java
index 2b4306af..835c7186 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java
@@ -198,7 +198,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver {
HostView hv = hostToView.get(host);
final ServiceFlavor flavor = task.getFlavor();
if (hv != null) {
- hv.provisionedCores -= flavor.getCoreCount();
+ hv.provisionedCpuCores -= flavor.getCpuCoreCount();
hv.instanceCount--;
hv.availableMemory += flavor.getMemorySize();
} else {
@@ -496,7 +496,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver {
if (result.getResultType() == SchedulingResultType.FAILURE) {
LOGGER.trace("Task {} selected for scheduling but no capacity available for it at the moment", task);
- if (flavor.getMemorySize() > maxMemory || flavor.getCoreCount() > maxCores) {
+ if (flavor.getMemorySize() > maxMemory || flavor.getCpuCoreCount() > maxCores) {
// Remove the incoming image
taskQueue.remove(req);
tasksPending--;
@@ -531,7 +531,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver {
attemptsSuccess++;
hv.instanceCount++;
- hv.provisionedCores += flavor.getCoreCount();
+ hv.provisionedCpuCores += flavor.getCpuCoreCount();
hv.availableMemory -= flavor.getMemorySize();
activeTasks.put(task, host);
@@ -612,12 +612,12 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver {
@NotNull
public ServiceFlavor newFlavor(
- @NotNull String name, int cpuCount, long memorySize, @NotNull Map<String, ?> meta) {
+ @NotNull String name, int cpuCount, long memorySize, int gpuCoreCount, @NotNull Map<String, ?> meta) {
checkOpen();
final ComputeService service = this.service;
UUID uid = new UUID(service.clock.millis(), service.random.nextLong());
- ServiceFlavor flavor = new ServiceFlavor(service, uid, name, cpuCount, memorySize, meta);
+ ServiceFlavor flavor = new ServiceFlavor(service, uid, name, cpuCount, memorySize, gpuCoreCount, meta);
// service.flavorById.put(uid, flavor);
// service.flavors.add(flavor);
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java
index 7c548add..c07f58c7 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java
@@ -31,7 +31,8 @@ public class HostView {
private final SimHost host;
int instanceCount;
long availableMemory;
- int provisionedCores;
+ int provisionedCpuCores;
+ int provisionedGpuCores;
/**
* Scheduler bookkeeping
@@ -83,8 +84,12 @@ public class HostView {
/**
* Return the provisioned cores on the host.
*/
- public int getProvisionedCores() {
- return provisionedCores;
+ public int getProvisionedCpuCores() {
+ return provisionedCpuCores;
+ }
+
+ public int getProvisionedGpuCores() {
+ return provisionedGpuCores;
}
@Override
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java
index eddde87e..8a4359b4 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java
@@ -36,22 +36,31 @@ public final class ServiceFlavor implements Flavor {
private final ComputeService service;
private final UUID uid;
private final String name;
- private final int coreCount;
+ private final int cpuCoreCount;
private final long memorySize;
+ private final int gpuCoreCount;
private final Map<String, ?> meta;
- ServiceFlavor(ComputeService service, UUID uid, String name, int coreCount, long memorySize, Map<String, ?> meta) {
+ ServiceFlavor(
+ ComputeService service,
+ UUID uid,
+ String name,
+ int cpuCoreCount,
+ long memorySize,
+ int gpuCoreCount,
+ Map<String, ?> meta) {
this.service = service;
this.uid = uid;
this.name = name;
- this.coreCount = coreCount;
+ this.cpuCoreCount = cpuCoreCount;
this.memorySize = memorySize;
+ this.gpuCoreCount = gpuCoreCount;
this.meta = meta;
}
@Override
- public int getCoreCount() {
- return coreCount;
+ public int getCpuCoreCount() {
+ return cpuCoreCount;
}
@Override
@@ -59,6 +68,11 @@ public final class ServiceFlavor implements Flavor {
return memorySize;
}
+ @Override
+ public int getGpuCoreCount() {
+ return gpuCoreCount;
+ }
+
@NotNull
@Override
public UUID getUid() {
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java
new file mode 100644
index 00000000..1aba13e3
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.telemetry;
+
+/**
+ * Statistics about the GPUs of a guest.
+ *
+ * @param activeTime The cumulative time (in seconds) that the GPUs of the guest were actively running.
+ * @param idleTime The cumulative time (in seconds) the GPUs of the guest were idle.
+ * @param stealTime The cumulative GPU time (in seconds) that the guest was ready to run, but not granted time by the host.
+ * @param lostTime The cumulative GPU time (in seconds) that was lost due to interference with other machines.
+ * @param capacity The available GPU capacity of the guest (in MHz).
+ * @param usage Amount of GPU resources (in MHz) actually used by the guest.
+ * @param utilization The utilization of the GPU resources (in %) relative to the total GPU capacity.
+ */
+public record GuestGpuStats(
+ long activeTime,
+ long idleTime,
+ long stealTime,
+ long lostTime,
+ double capacity,
+ double usage,
+ double demand,
+ double utilization) {}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java
new file mode 100644
index 00000000..e42d7704
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.telemetry;
+
+/**
+ * Statistics about the GPUs of a host.
+ *
+ * @param activeTime The cumulative time (in seconds) that the GPUs of the host were actively running.
+ * @param idleTime The cumulative time (in seconds) the GPUs of the host were idle.
+ * @param stealTime The cumulative GPU time (in seconds) that virtual machines were ready to run, but were not able to.
+ * @param lostTime The cumulative GPU time (in seconds) that was lost due to interference between virtual machines.
+ * @param capacity The available GPU capacity of the host (in MHz).
+ * @param demand Amount of GPU resources (in MHz) the guests would use if there were no GPU contention or GPU
+ * limits.
+ * @param usage Amount of GPU resources (in MHz) actually used by the host.
+ * @param utilization The utilization of the GPU resources (in %) relative to the total GPU capacity.
+ */
+public record HostGpuStats(
+ long activeTime,
+ long idleTime,
+ long stealTime,
+ long lostTime,
+ double capacity,
+ double demand,
+ double usage,
+ double utilization) {}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt
index d23794ab..effe3d5b 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt
@@ -22,19 +22,22 @@
package org.opendc.compute.simulator.host
+import org.opendc.common.ResourceType
import org.opendc.compute.api.Flavor
import org.opendc.compute.api.TaskState
import org.opendc.compute.simulator.internal.Guest
import org.opendc.compute.simulator.internal.GuestListener
import org.opendc.compute.simulator.service.ServiceTask
import org.opendc.compute.simulator.telemetry.GuestCpuStats
+import org.opendc.compute.simulator.telemetry.GuestGpuStats
import org.opendc.compute.simulator.telemetry.GuestSystemStats
import org.opendc.compute.simulator.telemetry.HostCpuStats
+import org.opendc.compute.simulator.telemetry.HostGpuStats
import org.opendc.compute.simulator.telemetry.HostSystemStats
-import org.opendc.simulator.compute.cpu.CpuPowerModel
import org.opendc.simulator.compute.machine.SimMachine
import org.opendc.simulator.compute.models.MachineModel
import org.opendc.simulator.compute.models.MemoryUnit
+import org.opendc.simulator.compute.power.PowerModel
import org.opendc.simulator.engine.engine.FlowEngine
import org.opendc.simulator.engine.graph.FlowDistributor
import java.time.Duration
@@ -57,7 +60,8 @@ public class SimHost(
private val clock: InstantSource,
private val engine: FlowEngine,
private val machineModel: MachineModel,
- private val cpuPowerModel: CpuPowerModel,
+ private val cpuPowerModel: PowerModel,
+ private val gpuPowerModel: PowerModel?,
private val embodiedCarbon: Double,
private val expectedLifetime: Double,
private val powerDistributor: FlowDistributor,
@@ -81,11 +85,22 @@ public class SimHost(
field = value
}
+ private val gpuHostModels: List<GpuHostModel>? =
+ machineModel.gpuModels?.map { gpumodel ->
+ return@map GpuHostModel(
+ gpumodel.totalCoreCapacity,
+ gpumodel.coreCount,
+ gpumodel.memorySize,
+ gpumodel.memoryBandwidth,
+ )
+ }
+
private val model: HostModel =
HostModel(
machineModel.cpuModel.totalCapacity,
machineModel.cpuModel.coreCount,
machineModel.memory.size,
+ gpuHostModels,
)
private var simMachine: SimMachine? = null
@@ -136,6 +151,7 @@ public class SimHost(
this.machineModel,
this.powerDistributor,
this.cpuPowerModel,
+ this.gpuPowerModel,
) { cause ->
hostState = if (cause != null) HostState.ERROR else HostState.DOWN
}
@@ -207,7 +223,7 @@ public class SimHost(
public fun canFit(task: ServiceTask): Boolean {
val sufficientMemory = model.memoryCapacity >= task.flavor.memorySize
- val enoughCpus = model.coreCount >= task.flavor.coreCount
+ val enoughCpus = model.coreCount >= task.flavor.cpuCoreCount
val canFit = simMachine!!.canFit(task.flavor.toMachineModel())
return sufficientMemory && enoughCpus && canFit
@@ -324,14 +340,14 @@ public class SimHost(
val counters = simMachine!!.performanceCounters
return HostCpuStats(
- counters.cpuActiveTime,
- counters.cpuIdleTime,
- counters.cpuStealTime,
- counters.cpuLostTime,
- counters.cpuCapacity,
- counters.cpuDemand,
- counters.cpuSupply,
- counters.cpuSupply / cpuLimit,
+ counters.activeTime,
+ counters.idleTime,
+ counters.stealTime,
+ counters.lostTime,
+ counters.capacity,
+ counters.demand,
+ counters.supply,
+ counters.supply / cpuLimit,
)
}
@@ -340,6 +356,33 @@ public class SimHost(
return guest.getCpuStats()
}
+ public fun getGpuStats(): List<HostGpuStats> {
+ val gpuStats = mutableListOf<HostGpuStats>()
+ for (gpu in simMachine!!.gpus) {
+ gpu.updateCounters(this.clock.millis())
+ val counters = simMachine!!.getGpuPerformanceCounters(gpu.id)
+
+ gpuStats.add(
+ HostGpuStats(
+ counters.activeTime,
+ counters.idleTime,
+ counters.stealTime,
+ counters.lostTime,
+ counters.capacity,
+ counters.demand,
+ counters.supply,
+ counters.supply / gpu.getCapacity(ResourceType.GPU),
+ ),
+ )
+ }
+ return gpuStats
+ }
+
+ public fun getGpuStats(task: ServiceTask): List<GuestGpuStats> {
+ val guest = requireNotNull(taskToGuestMap[task]) { "Unknown task ${task.name} at host $name" }
+ return guest.getGpuStats()
+ }
+
override fun hashCode(): Int = name.hashCode()
override fun equals(other: Any?): Boolean {
@@ -352,7 +395,13 @@ public class SimHost(
* Convert flavor to machine model.
*/
private fun Flavor.toMachineModel(): MachineModel {
- return MachineModel(simMachine!!.machineModel.cpuModel, MemoryUnit("Generic", "Generic", 3200.0, memorySize))
+ return MachineModel(
+ simMachine!!.machineModel.cpuModel,
+ MemoryUnit("Generic", "Generic", 3200.0, memorySize),
+ simMachine!!.machineModel.gpuModels,
+ simMachine!!.machineModel.cpuDistributionStrategy,
+ simMachine!!.machineModel.gpuDistributionStrategy,
+ )
}
/**
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
index fe8cbf2f..a980f6cb 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
@@ -27,6 +27,7 @@ import org.opendc.compute.api.TaskState
import org.opendc.compute.simulator.host.SimHost
import org.opendc.compute.simulator.service.ServiceTask
import org.opendc.compute.simulator.telemetry.GuestCpuStats
+import org.opendc.compute.simulator.telemetry.GuestGpuStats
import org.opendc.compute.simulator.telemetry.GuestSystemStats
import org.opendc.simulator.compute.machine.SimMachine
import org.opendc.simulator.compute.workload.ChainWorkload
@@ -64,6 +65,7 @@ public class Guest(
private var lastReport = clock.millis()
private var bootTime: Instant? = null
private val cpuLimit = simMachine.cpu.cpuModel.totalCapacity
+ private val gpuLimit = simMachine.gpus?.firstOrNull()?.gpuModel?.totalCoreCapacity ?: 0.0
/**
* Start the guest.
@@ -242,20 +244,43 @@ public class Guest(
*/
public fun getCpuStats(): GuestCpuStats {
virtualMachine!!.updateCounters(this.clock.millis())
- val counters = virtualMachine!!.performanceCounters
+ val counters = virtualMachine!!.cpuPerformanceCounters
return GuestCpuStats(
- counters.cpuActiveTime / 1000L,
- counters.cpuIdleTime / 1000L,
- counters.cpuStealTime / 1000L,
- counters.cpuLostTime / 1000L,
- counters.cpuCapacity,
- counters.cpuSupply,
- counters.cpuDemand,
- counters.cpuSupply / cpuLimit,
+ counters.activeTime / 1000L,
+ counters.idleTime / 1000L,
+ counters.stealTime / 1000L,
+ counters.lostTime / 1000L,
+ counters.capacity,
+ counters.supply,
+ counters.demand,
+ counters.supply / cpuLimit,
)
}
+ public fun getGpuStats(): List<GuestGpuStats> {
+ virtualMachine!!.updateCounters(this.clock.millis())
+ val counters = virtualMachine!!.gpuPerformanceCounters
+
+ val gpuStats = mutableListOf<GuestGpuStats>()
+ for (gpuCounter in counters) {
+ gpuStats.add(
+ GuestGpuStats(
+ gpuCounter.activeTime / 1000L,
+ gpuCounter.idleTime / 1000L,
+ gpuCounter.stealTime / 1000L,
+ gpuCounter.lostTime / 1000L,
+ gpuCounter.capacity,
+ gpuCounter.supply,
+ gpuCounter.demand,
+ // Assuming similar scaling as CPU
+ gpuCounter.supply / gpuLimit,
+ ),
+ )
+ }
+ return gpuStats
+ }
+
/**
* Helper function to track the uptime and downtime of the guest.
*/
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt
index 675ce3a9..791ab692 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt
@@ -127,6 +127,7 @@ public class HostsProvisioningStep internal constructor(
engine,
hostSpec.model,
hostSpec.cpuPowerModel,
+ hostSpec.gpuPowerModel,
hostSpec.embodiedCarbon,
hostSpec.expectedLifetime,
hostDistributor,
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt
index e70cec58..0376a492 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt
@@ -27,11 +27,13 @@ package org.opendc.compute.simulator.scheduler
import org.opendc.compute.simulator.scheduler.filters.ComputeFilter
import org.opendc.compute.simulator.scheduler.filters.RamFilter
import org.opendc.compute.simulator.scheduler.filters.VCpuFilter
+import org.opendc.compute.simulator.scheduler.filters.VGpuFilter
import org.opendc.compute.simulator.scheduler.timeshift.TimeshiftScheduler
import org.opendc.compute.simulator.scheduler.weights.CoreRamWeigher
import org.opendc.compute.simulator.scheduler.weights.InstanceCountWeigher
import org.opendc.compute.simulator.scheduler.weights.RamWeigher
import org.opendc.compute.simulator.scheduler.weights.VCpuWeigher
+import org.opendc.compute.simulator.scheduler.weights.VGpuWeigher
import java.time.InstantSource
import java.util.SplittableRandom
import java.util.random.RandomGenerator
@@ -48,6 +50,8 @@ public enum class ComputeSchedulerEnum {
Random,
TaskNumMemorizing,
Timeshift,
+ ProvisionedCpuGpuCores,
+ ProvisionedCpuGpuCoresInv,
}
public fun createPrefabComputeScheduler(
@@ -68,6 +72,7 @@ public fun createPrefabComputeScheduler(
): ComputeScheduler {
val cpuAllocationRatio = 1.0
val ramAllocationRatio = 1.5
+ val gpuAllocationRatio = 1.0
return when (name) {
ComputeSchedulerEnum.Mem ->
FilterScheduler(
@@ -128,5 +133,31 @@ public fun createPrefabComputeScheduler(
clock = clock,
random = SplittableRandom(seeder.nextLong()),
)
+ ComputeSchedulerEnum.ProvisionedCpuGpuCores ->
+ FilterScheduler(
+ filters =
+ listOf(
+ ComputeFilter(),
+ VCpuFilter(cpuAllocationRatio),
+ VGpuFilter(gpuAllocationRatio),
+ RamFilter(ramAllocationRatio),
+ ),
+ weighers = listOf(VCpuWeigher(cpuAllocationRatio, multiplier = 1.0), VGpuWeigher(gpuAllocationRatio, multiplier = 1.0)),
+ )
+ ComputeSchedulerEnum.ProvisionedCpuGpuCoresInv ->
+ FilterScheduler(
+ filters =
+ listOf(
+ ComputeFilter(),
+ VCpuFilter(cpuAllocationRatio),
+ VGpuFilter(gpuAllocationRatio),
+ RamFilter(ramAllocationRatio),
+ ),
+ weighers =
+ listOf(
+ VCpuWeigher(cpuAllocationRatio, multiplier = -1.0),
+ VGpuWeigher(gpuAllocationRatio, multiplier = -1.0),
+ ),
+ )
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt
index 4e63baf4..7fa7a051 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt
@@ -40,7 +40,7 @@ public class VCpuCapacityFilter : HostFilter {
return (
requiredCapacity == null ||
(availableCapacity / host.host.getModel().coreCount)
- >= (requiredCapacity / task.flavor.coreCount)
+ >= (requiredCapacity / task.flavor.cpuCoreCount)
)
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt
index c179a7bf..89739658 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt
@@ -35,7 +35,7 @@ public class VCpuFilter(private val allocationRatio: Double) : HostFilter {
host: HostView,
task: ServiceTask,
): Boolean {
- val requested = task.flavor.coreCount
+ val requested = task.flavor.cpuCoreCount
val totalCores = host.host.getModel().coreCount
val limit = totalCores * allocationRatio
@@ -44,7 +44,7 @@ public class VCpuFilter(private val allocationRatio: Double) : HostFilter {
return false
}
- val availableCores = limit - host.provisionedCores
+ val availableCores = limit - host.provisionedCpuCores
return availableCores >= requested
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt
new file mode 100644
index 00000000..6dc27327
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.scheduler.filters
+
+import org.opendc.compute.simulator.service.HostView
+import org.opendc.compute.simulator.service.ServiceTask
+import kotlin.collections.maxOfOrNull
+
+/**
+ * A [HostFilter] that filters hosts based on the vCPU speed requirements of a [ServiceTask] and the available
+ * capacity on the host.
+ */
+public class VGpuCapacityFilter : HostFilter {
+ override fun test(
+ host: HostView,
+ task: ServiceTask,
+ ): Boolean {
+ val requiredCapacity = task.flavor.meta["gpu-capacity"] as? Double
+ val availableCapacity = (host.host.getModel().gpuHostModels().maxOfOrNull { it.gpuCoreCapacity() } ?: 0).toDouble()
+ val availableCores = (host.host.getModel().gpuHostModels().maxOfOrNull { it -> it.gpuCoreCount } ?: -1).toDouble()
+ val availableRatio = availableCapacity / availableCores
+
+ return (
+ requiredCapacity == null ||
+ ((availableCapacity / availableCores) >= (requiredCapacity / task.flavor.gpuCoreCount))
+ )
+ }
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt
new file mode 100644
index 00000000..9f564776
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.scheduler.filters
+
+import org.opendc.compute.simulator.service.HostView
+import org.opendc.compute.simulator.service.ServiceTask
+
+/**
+ * A [HostFilter] that filters hosts based on the vCPU requirements of a [ServiceTask] and the available vCPUs on the host.
+ *
+ * @param allocationRatio Virtual CPU to physical CPU allocation ratio.
+ */
+public class VGpuFilter(private val allocationRatio: Double) : HostFilter {
+ override fun test(
+ host: HostView,
+ task: ServiceTask,
+ ): Boolean {
+ val requested = task.flavor.gpuCoreCount
+ val totalCores = host.host.getModel().gpuHostModels().maxOfOrNull { it.gpuCoreCount() } ?: 0
+ val limit = totalCores * allocationRatio
+
+ // Do not allow an instance to overcommit against itself, only against other instances
+ if (requested > totalCores) {
+ return false
+ }
+
+ val availableCores = limit - host.provisionedGpuCores
+ return availableCores >= requested
+ }
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt
index 4f52e11a..d9b094fb 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt
@@ -35,7 +35,7 @@ public class VCpuCapacityWeigher(override val multiplier: Double = 1.0) : HostWe
): Double {
val model = host.host.getModel()
val requiredCapacity = task.flavor.meta["cpu-capacity"] as? Double ?: 0.0
- return model.cpuCapacity - requiredCapacity / task.flavor.coreCount
+ return model.cpuCapacity - requiredCapacity / task.flavor.cpuCoreCount
}
override fun toString(): String = "VCpuWeigher"
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt
index 3f9a7f03..d882c237 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt
@@ -39,7 +39,7 @@ public class VCpuWeigher(private val allocationRatio: Double, override val multi
host: HostView,
task: ServiceTask,
): Double {
- return allocationRatio - host.provisionedCores
+ return allocationRatio - host.provisionedCpuCores
}
override fun toString(): String = "VCpuWeigher"
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt
new file mode 100644
index 00000000..35f2c7b9
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.scheduler.weights
+
+import org.opendc.compute.simulator.service.HostView
+import org.opendc.compute.simulator.service.ServiceTask
+
+/**
+ * A [HostWeigher] that weighs the hosts based on the difference required vCPU capacity and the available CPU capacity.
+ */
+public class VGpuCapacityWeigher(override val multiplier: Double = 1.0) : HostWeigher {
+ override fun getWeight(
+ host: HostView,
+ task: ServiceTask,
+ ): Double {
+ val model = host.host.getModel()
+ val requiredCapacity = task.flavor.meta["gpu-capacity"] as? Double ?: 0.0
+ val availableCapacity = model.gpuHostModels.maxOfOrNull { it.gpuCoreCapacity } ?: 0.0
+ return availableCapacity - requiredCapacity / task.flavor.gpuCoreCount
+ }
+
+ override fun toString(): String = "VGpuWeigher"
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt
new file mode 100644
index 00000000..7397bf10
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.scheduler.weights
+
+import org.opendc.compute.simulator.service.HostView
+import org.opendc.compute.simulator.service.ServiceTask
+
+/**
+ * A [HostWeigher] that weighs the hosts based on the remaining number of vCPUs available.
+ *
+ * @param allocationRatio Virtual CPU to physical CPU allocation ratio.
+ */
+public class VGpuWeigher(private val allocationRatio: Double, override val multiplier: Double = 1.0) : HostWeigher {
+ init {
+ require(allocationRatio > 0.0) { "Allocation ratio must be greater than zero" }
+ }
+
+ override fun getWeight(
+ host: HostView,
+ task: ServiceTask,
+ ): Double {
+ return allocationRatio - host.provisionedGpuCores
+ }
+
+ override fun toString(): String = "VGpuWeigher"
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt
index 00f7854d..affaab58 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt
@@ -144,6 +144,48 @@ public object DfltHostExportColumns {
field = Types.required(INT64).named("cpu_time_lost"),
) { it.cpuLostTime }
+ // TODO: support multiple GPUs
+
+ public val GPU_CAPACITY: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_capacity"),
+ ) { it.gpuCapacities.getOrNull(0) }
+
+ public val GPU_USAGE: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_usage"),
+ ) { it.gpuUsages.getOrNull(0) }
+
+ public val GPU_DEMAND: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_demand"),
+ ) { it.gpuDemands.getOrNull(0) }
+
+ public val GPU_UTILIZATION: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_utilization"),
+ ) { it.gpuUtilizations.getOrNull(0) }
+
+ public val GPU_TIME_ACTIVE: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_active"),
+ ) { it.gpuActiveTimes.getOrNull(0) }
+
+ public val GPU_TIME_IDLE: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_idle"),
+ ) { it.gpuIdleTimes.getOrNull(0) }
+
+ public val GPU_TIME_STEAL: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_steal"),
+ ) { it.gpuStealTimes.getOrNull(0) }
+
+ public val GPU_TIME_LOST: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_lost"),
+ ) { it.gpuLostTimes.getOrNull(0) }
+
public val POWER_DRAW: ExportColumn<HostTableReader> =
ExportColumn(
field = Types.required(FLOAT).named("power_draw"),
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt
index f533eb1f..ad7a1d52 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt
@@ -132,6 +132,43 @@ public object DfltTaskExportColumns {
field = Types.required(INT64).named("cpu_time_lost"),
) { it.cpuLostTime }
+ // TODO: support multiple GPUs
+
+ public val GPU_CAPACITY: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_capacity"),
+ ) { it.gpuLimits?.getOrNull(0) }
+
+ public val GPU_USAGE: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_usage"),
+ ) { it.gpuUsages?.getOrNull(0) }
+
+ public val GPU_DEMAND: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_demand"),
+ ) { it.gpuDemands?.getOrNull(0) }
+
+ public val GPU_TIME_ACTIVE: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_active"),
+ ) { it.gpuActiveTimes?.getOrNull(0) }
+
+ public val GPU_TIME_IDLE: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_idle"),
+ ) { it.gpuIdleTimes?.getOrNull(0) }
+
+ public val GPU_TIME_STEAL: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_steal"),
+ ) { it.gpuStealTimes?.getOrNull(0) }
+
+ public val GPU_TIME_LOST: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_lost"),
+ ) { it.gpuLostTimes?.getOrNull(0) }
+
public val UP_TIME: ExportColumn<TaskTableReader> =
ExportColumn(
field = Types.required(INT64).named("uptime"),
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt
index a626c41b..4fb930e1 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt
@@ -44,6 +44,7 @@ public class ParquetComputeMonitor(
private val batteryExporter: Exporter<BatteryTableReader>?,
private val serviceExporter: Exporter<ServiceTableReader>?,
) : ComputeMonitor, AutoCloseable {
+ // FIXME: Include GPU
override fun record(reader: HostTableReader) {
hostExporter?.write(reader)
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt
index ff0115df..fbffd508 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt
@@ -113,6 +113,51 @@ public interface HostTableReader : Exportable {
public val cpuLostTime: Long
/**
+ * The capacity of the CPUs in the host (in MHz).
+ */
+ public val gpuCapacities: ArrayList<Double>
+
+ /**
+ * The capacity of the GPUs in the host (in MHz). They inserted by GPU ID.
+ */
+ public val gpuLimits: ArrayList<Double>
+
+ /**
+ * The usage per GPU in the host (in MHz). They inserted by GPU ID
+ */
+ public val gpuUsages: ArrayList<Double>
+
+ /**
+ * The demand per GPU of the guests (in MHz). They inserted by GPU ID
+ */
+ public val gpuDemands: ArrayList<Double>
+
+ /**
+ * The GPU utilization of the host of each GPU. They inserted by GPU ID.
+ */
+ public val gpuUtilizations: ArrayList<Double>
+
+ /**
+ * The duration (in ms) that the respective GPU was active in the host. They inserted by GPU ID
+ */
+ public val gpuActiveTimes: ArrayList<Long>
+
+ /**
+ * The duration (in ms) that a GPU was idle in the host. They inserted by GPU ID
+ */
+ public val gpuIdleTimes: ArrayList<Long>
+
+ /**
+ * The duration (in ms) that a vGPU wanted to run, but no capacity was available. They inserted by GPU ID.
+ */
+ public val gpuStealTimes: ArrayList<Long>
+
+ /**
+ * The duration (in ms) of GPU time that was lost due to interference. They inserted by GPU ID
+ */
+ public val gpuLostTimes: ArrayList<Long>
+
+ /**
* The current power draw of the host in W.
*/
public val powerDraw: Double
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt
index 6e1dac48..cb25358a 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt
@@ -49,6 +49,7 @@ public class HostTableReaderImpl(
_tasksActive = table.tasksActive
_guestsError = table.guestsError
_guestsInvalid = table.guestsInvalid
+ // CPU stats
_cpuCapacity = table.cpuCapacity
_cpuDemand = table.cpuDemand
_cpuUsage = table.cpuUsage
@@ -57,6 +58,18 @@ public class HostTableReaderImpl(
_cpuIdleTime = table.cpuIdleTime
_cpuStealTime = table.cpuStealTime
_cpuLostTime = table.cpuLostTime
+ // GPU stats
+ _gpuCapacities = table.gpuCapacities
+ _gpuLimits = table.gpuLimits
+ _gpuDemands = table.gpuDemands
+ _gpuUsages = table.gpuUsages
+ _gpuUtilizations = table.gpuUtilizations
+ _gpuActiveTimes = table.gpuActiveTimes
+ _gpuIdleTimes = table.gpuIdleTimes
+ _gpuStealTimes = table.gpuStealTimes
+ _gpuLostTimes = table.gpuLostTimes
+
+ // energy & carbon stats
_powerDraw = table.powerDraw
_energyUsage = table.energyUsage
_embodiedCarbon = table.embodiedCarbon
@@ -135,6 +148,65 @@ public class HostTableReaderImpl(
private var _cpuLostTime = 0L
private var previousCpuLostTime = 0L
+ override val gpuCapacities: ArrayList<Double>
+ get() = _gpuCapacities
+ private var _gpuCapacities: ArrayList<Double> = ArrayList()
+
+ override val gpuLimits: ArrayList<Double>
+ get() = _gpuLimits
+ private var _gpuLimits: ArrayList<Double> = ArrayList()
+
+ override val gpuUsages: ArrayList<Double>
+ get() = _gpuUsages
+ private var _gpuUsages: ArrayList<Double> = ArrayList()
+
+ override val gpuDemands: ArrayList<Double>
+ get() = _gpuDemands
+ private var _gpuDemands: ArrayList<Double> = ArrayList()
+
+ override val gpuUtilizations: ArrayList<Double>
+ get() = _gpuUtilizations
+ private var _gpuUtilizations: ArrayList<Double> = ArrayList()
+
+ // half of the CPU stats
+ override val gpuActiveTimes: ArrayList<Long>
+// get() = _gpuActiveTimes.zip(previousGpuActiveTimes) { current, previous -> current - previous} as ArrayList<Long>
+ get() =
+ (0 until _gpuActiveTimes.size).map {
+ i ->
+ (_gpuActiveTimes.getOrNull(i) ?: 0L) - (previousGpuActiveTimes.getOrNull(i) ?: 0L)
+ } as ArrayList<Long>
+ private var _gpuActiveTimes: ArrayList<Long> = ArrayList()
+ private var previousGpuActiveTimes: ArrayList<Long> = ArrayList()
+
+ override val gpuIdleTimes: ArrayList<Long>
+// get() = _gpuIdleTimes.zip(previousGpuIdleTimes) { current, previous -> current - previous} as ArrayList<Long>
+ get() =
+ (0 until _gpuIdleTimes.size).map {
+ i ->
+ (_gpuIdleTimes.getOrNull(i) ?: 0L) - (previousGpuIdleTimes.getOrNull(i) ?: 0L)
+ } as ArrayList<Long>
+ private var _gpuIdleTimes: ArrayList<Long> = ArrayList()
+ private var previousGpuIdleTimes: ArrayList<Long> = ArrayList()
+
+ override val gpuStealTimes: ArrayList<Long>
+ get() =
+ (0 until _gpuStealTimes.size).map {
+ i ->
+ (_gpuStealTimes.getOrNull(i) ?: 0L) - (previousGpuStealTimes.getOrNull(i) ?: 0L)
+ } as ArrayList<Long>
+ private var _gpuStealTimes: ArrayList<Long> = ArrayList()
+ private var previousGpuStealTimes: ArrayList<Long> = ArrayList()
+
+ override val gpuLostTimes: ArrayList<Long>
+ get() =
+ (0 until _gpuLostTimes.size).map {
+ i ->
+ (_gpuLostTimes.getOrNull(i) ?: 0L) - (previousGpuLostTimes.getOrNull(i) ?: 0L)
+ } as ArrayList<Long>
+ private var _gpuLostTimes: ArrayList<Long> = ArrayList()
+ private var previousGpuLostTimes: ArrayList<Long> = ArrayList()
+
override val powerDraw: Double
get() = _powerDraw
private var _powerDraw = 0.0
@@ -168,6 +240,7 @@ public class HostTableReaderImpl(
override fun record(now: Instant) {
val hostCpuStats = host.getCpuStats()
val hostSysStats = host.getSystemStats()
+ val hostGpuStats = host.getGpuStats()
_timestamp = now
_timestampAbsolute = now + startTime
@@ -184,6 +257,16 @@ public class HostTableReaderImpl(
_cpuIdleTime = hostCpuStats.idleTime
_cpuStealTime = hostCpuStats.stealTime
_cpuLostTime = hostCpuStats.lostTime
+ // GPU stats
+ _gpuLimits = hostGpuStats.map { it.capacity } as ArrayList<Double>
+ _gpuDemands = hostGpuStats.map { it.demand } as ArrayList<Double>
+ _gpuUsages = hostGpuStats.map { it.usage } as ArrayList<Double>
+ _gpuUtilizations = hostGpuStats.map { it.utilization } as ArrayList<Double>
+ _gpuActiveTimes = hostGpuStats.map { it.activeTime } as ArrayList<Long>
+ _gpuIdleTimes = hostGpuStats.map { it.idleTime } as ArrayList<Long>
+ _gpuStealTimes = hostGpuStats.map { it.stealTime } as ArrayList<Long>
+ _gpuLostTimes = hostGpuStats.map { it.lostTime } as ArrayList<Long>
+ // energy & carbon stats
_powerDraw = hostSysStats.powerDraw
_energyUsage = hostSysStats.energyUsage
_embodiedCarbon = hostSysStats.embodiedCarbon
@@ -202,6 +285,10 @@ public class HostTableReaderImpl(
previousCpuIdleTime = _cpuIdleTime
previousCpuStealTime = _cpuStealTime
previousCpuLostTime = _cpuLostTime
+ previousGpuActiveTimes = _gpuActiveTimes
+ previousGpuIdleTimes = _gpuIdleTimes
+ previousGpuStealTimes = _gpuStealTimes
+ previousGpuLostTimes = _gpuLostTimes
previousEnergyUsage = _energyUsage
previousUptime = _uptime
previousDowntime = _downtime
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt
index b0745dd6..f71587c7 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt
@@ -32,6 +32,7 @@ import java.time.Instant
* An interface that is used to read a row of a task trace entry.
*/
public interface TaskTableReader : Exportable {
+ // TODO: find better way for more resources
public fun copy(): TaskTableReader
public fun setValues(table: TaskTableReader)
@@ -130,6 +131,41 @@ public interface TaskTableReader : Exportable {
public val cpuLostTime: Long
/**
+ * The capacity of the GPUs of Host on which the task is running (in MHz).
+ */
+ public val gpuLimits: DoubleArray?
+
+ /**
+ * The amount of GPus allocated to the task (in MHz).
+ */
+ public val gpuUsages: DoubleArray?
+
+ /**
+ * The GPU demanded by this task (in MHz).
+ */
+ public val gpuDemands: DoubleArray?
+
+ /**
+ * The duration (in seconds) that a GPU was active in the task.
+ */
+ public val gpuActiveTimes: LongArray?
+
+ /**
+ * The duration (in seconds) that a GPU was idle in the task.
+ */
+ public val gpuIdleTimes: LongArray?
+
+ /**
+ * The duration (in seconds) that a vGPU wanted to run, but no capacity was available.
+ */
+ public val gpuStealTimes: LongArray?
+
+ /**
+ * The duration (in seconds) of GPU time that was lost due to interference.
+ */
+ public val gpuLostTimes: LongArray?
+
+ /**
* The state of the task
*/
public val taskState: TaskState?
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt
index d63202a9..6128c9a2 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt
@@ -63,6 +63,15 @@ public class TaskTableReaderImpl(
_cpuIdleTime = table.cpuIdleTime
_cpuStealTime = table.cpuStealTime
_cpuLostTime = table.cpuLostTime
+ // GPU stats
+ _gpuLimits = table.gpuLimits
+ _gpuDemands = table.gpuDemands
+ _gpuUsages = table.gpuUsages
+ _gpuActiveTimes = table.gpuActiveTimes
+ _gpuIdleTimes = table.gpuIdleTimes
+ _gpuStealTimes = table.gpuStealTimes
+ _gpuLostTimes = table.gpuLostTimes
+
_uptime = table.uptime
_downtime = table.downtime
_numFailures = table.numFailures
@@ -84,7 +93,7 @@ public class TaskTableReaderImpl(
task.name,
"vm",
"x86",
- task.flavor.coreCount,
+ task.flavor.cpuCoreCount,
task.flavor.memorySize,
)
@@ -168,6 +177,74 @@ public class TaskTableReaderImpl(
private var _cpuLostTime = 0L
private var previousCpuLostTime = 0L
+ override val gpuLimits: DoubleArray?
+ get() = _gpuLimits ?: DoubleArray(0)
+ private var _gpuLimits: DoubleArray? = null
+
+ override val gpuUsages: DoubleArray?
+ get() = _gpuUsages ?: DoubleArray(0)
+ private var _gpuUsages: DoubleArray? = null
+
+ override val gpuDemands: DoubleArray?
+ get() = _gpuDemands ?: DoubleArray(0)
+ private var _gpuDemands: DoubleArray? = null
+
+ override val gpuActiveTimes: LongArray?
+ get() {
+ val current = _gpuActiveTimes ?: return LongArray(0)
+ val previous = previousGpuActiveTimes
+
+ return if (previous == null || current.size != previous.size) { // not sure if I like the second clause
+ current
+ } else {
+ LongArray(current.size) { i -> current[i] - previous[i] }
+ }
+ }
+ private var _gpuActiveTimes: LongArray? = null
+ private var previousGpuActiveTimes: LongArray? = null
+
+ override val gpuIdleTimes: LongArray?
+ get() {
+ val current = _gpuIdleTimes ?: return LongArray(0)
+ val previous = previousGpuIdleTimes
+
+ return if (previous == null || current.size != previous.size) { // not sure if I like the second clause
+ current
+ } else {
+ LongArray(current.size) { i -> current[i] - previous[i] }
+ }
+ }
+ private var _gpuIdleTimes: LongArray? = null
+ private var previousGpuIdleTimes: LongArray? = null
+
+ override val gpuStealTimes: LongArray?
+ get() {
+ val current = _gpuStealTimes ?: return LongArray(0)
+ val previous = previousGpuStealTimes
+
+ return if (previous == null || current.size != previous.size) {
+ current
+ } else {
+ LongArray(current.size) { i -> current[i] - previous[i] }
+ }
+ }
+ private var _gpuStealTimes: LongArray? = null
+ private var previousGpuStealTimes: LongArray? = null
+
+ override val gpuLostTimes: LongArray?
+ get() {
+ val current = _gpuLostTimes ?: return LongArray(0)
+ val previous = previousGpuLostTimes
+
+ return if (previous == null || current.size != previous.size) {
+ current
+ } else {
+ LongArray(current.size) { i -> current[i] - previous[i] }
+ }
+ }
+ private var _gpuLostTimes: LongArray? = null
+ private var previousGpuLostTimes: LongArray? = null
+
override val taskState: TaskState?
get() = _taskState
private var _taskState: TaskState? = null
@@ -192,6 +269,7 @@ public class TaskTableReaderImpl(
val cpuStats = simHost?.getCpuStats(task)
val sysStats = simHost?.getSystemStats(task)
+ val gpuStats = simHost?.getGpuStats(task)
_hostName = task.hostName
@@ -214,6 +292,26 @@ public class TaskTableReaderImpl(
_scheduleTime = task.scheduledAt
_finishTime = task.finishedAt
+ if (gpuStats != null && gpuStats.isNotEmpty()) {
+ val size = gpuStats.size
+ _gpuLimits = DoubleArray(size) { i -> gpuStats[i].capacity }
+ _gpuDemands = DoubleArray(size) { i -> gpuStats[i].demand }
+ _gpuUsages = DoubleArray(size) { i -> gpuStats[i].usage }
+ _gpuActiveTimes = LongArray(size) { i -> gpuStats[i].activeTime }
+ _gpuIdleTimes = LongArray(size) { i -> gpuStats[i].idleTime }
+ _gpuStealTimes = LongArray(size) { i -> gpuStats[i].stealTime }
+ _gpuLostTimes = LongArray(size) { i -> gpuStats[i].lostTime }
+ } else {
+ _gpuIdleTimes = null
+ _gpuStealTimes = null
+ _gpuLostTimes = null
+ _gpuIdleTimes = null
+ _gpuLimits = null
+ _gpuUsages = null
+ _gpuDemands = null
+ _gpuActiveTimes = null
+ }
+
_taskState = task.state
}
@@ -227,6 +325,10 @@ public class TaskTableReaderImpl(
previousCpuIdleTime = _cpuIdleTime
previousCpuStealTime = _cpuStealTime
previousCpuLostTime = _cpuLostTime
+ previousGpuActiveTimes = _gpuActiveTimes
+ previousGpuIdleTimes = _gpuIdleTimes
+ previousGpuStealTimes = _gpuStealTimes
+ previousGpuLostTimes = _gpuLostTimes
simHost = null
_cpuLimit = 0.0
diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt
index 04a20f49..5109f828 100644
--- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/FilterSchedulerTest.kt
@@ -78,7 +78,7 @@ internal class FilterSchedulerTest {
)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -103,7 +103,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -134,7 +134,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -159,7 +159,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(host)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -180,7 +180,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(host)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -209,7 +209,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -232,7 +232,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(host)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 2300
every { req.isCancelled } returns false
@@ -250,18 +250,18 @@ internal class FilterSchedulerTest {
val hostA = mockk<HostView>()
every { hostA.host.getState() } returns HostState.UP
every { hostA.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
- every { hostA.provisionedCores } returns 3
+ every { hostA.provisionedCpuCores } returns 3
val hostB = mockk<HostView>()
every { hostB.host.getState() } returns HostState.UP
every { hostB.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
- every { hostB.provisionedCores } returns 0
+ every { hostB.provisionedCpuCores } returns 0
scheduler.addHost(hostA)
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -279,12 +279,12 @@ internal class FilterSchedulerTest {
val host = mockk<HostView>()
every { host.host.getState() } returns HostState.UP
every { host.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
- every { host.provisionedCores } returns 0
+ every { host.provisionedCpuCores } returns 0
scheduler.addHost(host)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 8
+ every { req.task.flavor.cpuCoreCount } returns 8
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -312,7 +312,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.task.flavor.meta } returns mapOf("cpu-capacity" to 2 * 3200.0)
every { req.isCancelled } returns false
@@ -342,7 +342,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -358,7 +358,7 @@ internal class FilterSchedulerTest {
)
val reqA = mockk<SchedulingRequest>()
- every { reqA.task.flavor.coreCount } returns 2
+ every { reqA.task.flavor.cpuCoreCount } returns 2
every { reqA.task.flavor.memorySize } returns 1024
every { reqA.isCancelled } returns false
val taskA = mockk<ServiceTask>()
@@ -369,19 +369,19 @@ internal class FilterSchedulerTest {
every { hostA.host.getState() } returns HostState.UP
every { hostA.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
every { hostA.host.getInstances() } returns emptySet()
- every { hostA.provisionedCores } returns 3
+ every { hostA.provisionedCpuCores } returns 3
val hostB = mockk<HostView>()
every { hostB.host.getState() } returns HostState.UP
every { hostB.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
every { hostB.host.getInstances() } returns setOf(reqA.task)
- every { hostB.provisionedCores } returns 0
+ every { hostB.provisionedCpuCores } returns 0
scheduler.addHost(hostA)
scheduler.addHost(hostB)
val reqB = mockk<SchedulingRequest>()
- every { reqB.task.flavor.coreCount } returns 2
+ every { reqB.task.flavor.cpuCoreCount } returns 2
every { reqB.task.flavor.memorySize } returns 1024
every { reqB.task.meta } returns emptyMap()
every { reqB.isCancelled } returns false
@@ -402,7 +402,7 @@ internal class FilterSchedulerTest {
)
val reqA = mockk<SchedulingRequest>()
- every { reqA.task.flavor.coreCount } returns 2
+ every { reqA.task.flavor.cpuCoreCount } returns 2
every { reqA.task.flavor.memorySize } returns 1024
every { reqA.isCancelled } returns false
val taskA = mockk<ServiceTask>()
@@ -413,19 +413,19 @@ internal class FilterSchedulerTest {
every { hostA.host.getState() } returns HostState.UP
every { hostA.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
every { hostA.host.getInstances() } returns setOf(reqA.task)
- every { hostA.provisionedCores } returns 3
+ every { hostA.provisionedCpuCores } returns 3
val hostB = mockk<HostView>()
every { hostB.host.getState() } returns HostState.UP
every { hostB.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
every { hostB.host.getInstances() } returns emptySet()
- every { hostB.provisionedCores } returns 0
+ every { hostB.provisionedCpuCores } returns 0
scheduler.addHost(hostA)
scheduler.addHost(hostB)
val reqB = mockk<SchedulingRequest>()
- every { reqB.task.flavor.coreCount } returns 2
+ every { reqB.task.flavor.cpuCoreCount } returns 2
every { reqB.task.flavor.memorySize } returns 1024
every { reqB.task.meta } returns emptyMap()
every { reqB.isCancelled } returns false
@@ -459,7 +459,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -488,7 +488,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -506,18 +506,18 @@ internal class FilterSchedulerTest {
val hostA = mockk<HostView>()
every { hostA.host.getState() } returns HostState.UP
every { hostA.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
- every { hostA.provisionedCores } returns 2
+ every { hostA.provisionedCpuCores } returns 2
val hostB = mockk<HostView>()
every { hostB.host.getState() } returns HostState.UP
every { hostB.host.getModel() } returns HostModel(4 * 2600.0, 4, 2048)
- every { hostB.provisionedCores } returns 0
+ every { hostB.provisionedCpuCores } returns 0
scheduler.addHost(hostA)
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -546,7 +546,7 @@ internal class FilterSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt
index 92d5008b..6b9b0048 100644
--- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/MemorizingSchedulerTest.kt
@@ -43,7 +43,7 @@ internal class MemorizingSchedulerTest {
)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -67,7 +67,7 @@ internal class MemorizingSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
@@ -101,7 +101,7 @@ internal class MemorizingSchedulerTest {
scheduler.addHost(hostB)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
val skipped = slot<Int>()
@@ -129,7 +129,7 @@ internal class MemorizingSchedulerTest {
scheduler.addHost(host)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 2300
every { req.isCancelled } returns false
val skipped = slot<Int>()
diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt
index 46c6425e..02f83eaf 100644
--- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/scheduler/TimeshiftSchedulerTest.kt
@@ -48,7 +48,7 @@ class TimeshiftSchedulerTest {
)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
every { req.task.nature } returns TaskNature(true)
@@ -76,7 +76,7 @@ class TimeshiftSchedulerTest {
)
val req = mockk<SchedulingRequest>()
- every { req.task.flavor.coreCount } returns 2
+ every { req.task.flavor.cpuCoreCount } returns 2
every { req.task.flavor.memorySize } returns 1024
every { req.isCancelled } returns false
every { req.task.nature } returns TaskNature(true)
diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt
index b6c945d2..b52608a9 100644
--- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt
+++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt
@@ -31,10 +31,13 @@ import org.opendc.compute.topology.specs.HostJSONSpec
import org.opendc.compute.topology.specs.HostSpec
import org.opendc.compute.topology.specs.PowerSourceSpec
import org.opendc.compute.topology.specs.TopologySpec
-import org.opendc.simulator.compute.cpu.getPowerModel
import org.opendc.simulator.compute.models.CpuModel
+import org.opendc.simulator.compute.models.GpuModel
import org.opendc.simulator.compute.models.MachineModel
import org.opendc.simulator.compute.models.MemoryUnit
+import org.opendc.simulator.compute.power.getPowerModel
+import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicyFactory
+import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicyFactory.DistributionPolicyType
import java.io.File
import java.io.InputStream
@@ -166,29 +169,63 @@ private fun HostJSONSpec.toHostSpec(clusterName: String): HostSpec {
}
val unknownMemoryUnit = MemoryUnit(memory.vendor, memory.modelName, memory.memorySpeed.toMHz(), memory.memorySize.toMiB().toLong())
+ val gpuUnits =
+ List(gpu?.count ?: 0) {
+ GpuModel(
+ globalCoreId++,
+ gpu!!.coreCount,
+ gpu.coreSpeed.toMHz(),
+ gpu.memoryBandwidth.toKibps(),
+ gpu.memorySize.toMiB().toLong(),
+ gpu.vendor,
+ gpu.modelName,
+ gpu.architecture,
+ )
+ }
+
val machineModel =
MachineModel(
units,
unknownMemoryUnit,
+ gpuUnits,
+ // TODO: Pass through
+ DistributionPolicyFactory.getDistributionStrategy(DistributionPolicyType.MaxMinFairness),
+ DistributionPolicyFactory.getDistributionStrategy(DistributionPolicyType.MaxMinFairness),
)
- val powerModel =
+ val cpuPowerModel =
getPowerModel(
- powerModel.modelType,
- powerModel.power.toWatts(),
- powerModel.maxPower.toWatts(),
- powerModel.idlePower.toWatts(),
- powerModel.calibrationFactor,
- powerModel.asymUtil,
- powerModel.dvfs,
+ cpuPowerModel.modelType,
+ cpuPowerModel.power.toWatts(),
+ cpuPowerModel.maxPower.toWatts(),
+ cpuPowerModel.idlePower.toWatts(),
+ cpuPowerModel.calibrationFactor,
+ cpuPowerModel.asymUtil,
+ cpuPowerModel.dvfs,
)
+ val gpuPowerModel =
+ if (gpuUnits.isEmpty()) {
+ null
+ } else {
+ getPowerModel(
+ gpuPowerModel.modelType,
+ gpuPowerModel.power.toWatts(),
+ gpuPowerModel.maxPower.toWatts(),
+ gpuPowerModel.idlePower.toWatts(),
+ gpuPowerModel.calibrationFactor,
+ gpuPowerModel.asymUtil,
+ gpuPowerModel.dvfs,
+ )
+ }
+
val hostSpec =
HostSpec(
createUniqueName(this.name, hostNames),
clusterName,
machineModel,
- powerModel,
+ cpuPowerModel,
+ gpuPowerModel,
)
return hostSpec
}
diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt
index e4ec89e1..30a75896 100644
--- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt
+++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt
@@ -22,8 +22,8 @@
package org.opendc.compute.topology.specs
-import org.opendc.simulator.compute.cpu.CpuPowerModel
import org.opendc.simulator.compute.models.MachineModel
+import org.opendc.simulator.compute.power.PowerModel
/**
* Description of a physical host that will be simulated by OpenDC and host the virtual machines.
@@ -36,7 +36,8 @@ public data class HostSpec(
val name: String,
val clusterName: String,
val model: MachineModel,
- val cpuPowerModel: CpuPowerModel,
+ val cpuPowerModel: PowerModel,
+ val gpuPowerModel: PowerModel?,
val embodiedCarbon: Double = 1000.0,
val expectedLifetime: Double = 5.0,
)
diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt
index 8cbf818b..62c3906a 100644
--- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt
+++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt
@@ -24,6 +24,7 @@ package org.opendc.compute.topology.specs
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
+import org.opendc.common.units.DataRate
import org.opendc.common.units.DataSize
import org.opendc.common.units.Frequency
import org.opendc.common.units.Power
@@ -76,7 +77,9 @@ public data class HostJSONSpec(
val cpu: CPUJSONSpec,
val count: Int = 1,
val memory: MemoryJSONSpec,
- val powerModel: PowerModelSpec = PowerModelSpec.DFLT,
+ val gpu: GPUJSONSpec? = null,
+ val cpuPowerModel: PowerModelSpec = PowerModelSpec.DFLT,
+ val gpuPowerModel: PowerModelSpec = PowerModelSpec.DFLT,
)
/**
@@ -118,6 +121,18 @@ public data class MemoryJSONSpec(
)
@Serializable
+public data class GPUJSONSpec(
+ val count: Int = 1,
+ val coreCount: Int,
+ val coreSpeed: Frequency,
+ val memorySize: DataSize = DataSize.ofMiB(-1),
+ val memoryBandwidth: DataRate = DataRate.ofKibps(-1),
+ val vendor: String = "unknown",
+ val modelName: String = "unknown",
+ val architecture: String = "unknown",
+)
+
+@Serializable
public data class PowerModelSpec(
val modelType: String,
val power: Power = Power.ofWatts(400),
diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt
index 80996c0e..7599d4e1 100644
--- a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt
+++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeWorkloadLoader.kt
@@ -33,11 +33,15 @@ import org.opendc.trace.conv.resourceCpuCapacity
import org.opendc.trace.conv.resourceCpuCount
import org.opendc.trace.conv.resourceDeadline
import org.opendc.trace.conv.resourceDuration
+import org.opendc.trace.conv.resourceGpuCapacity
+import org.opendc.trace.conv.resourceGpuCount
+import org.opendc.trace.conv.resourceGpuMemCapacity
import org.opendc.trace.conv.resourceID
import org.opendc.trace.conv.resourceMemCapacity
import org.opendc.trace.conv.resourceNature
import org.opendc.trace.conv.resourceStateCpuUsage
import org.opendc.trace.conv.resourceStateDuration
+import org.opendc.trace.conv.resourceStateGpuUsage
import org.opendc.trace.conv.resourceSubmissionTime
import java.io.File
import java.lang.ref.SoftReference
@@ -79,6 +83,8 @@ public class ComputeWorkloadLoader(
val durationCol = reader.resolve(resourceStateDuration)
val coresCol = reader.resolve(resourceCpuCount)
val usageCol = reader.resolve(resourceStateCpuUsage)
+ val gpuCoresCol = reader.resolve(resourceGpuCount)
+ val resourceGpuCapacityCol = reader.resolve(resourceStateGpuUsage)
val fragments = mutableMapOf<String, Builder>()
@@ -88,12 +94,23 @@ public class ComputeWorkloadLoader(
val durationMs = reader.getDuration(durationCol)!!
val cores = reader.getInt(coresCol)
val cpuUsage = reader.getDouble(usageCol)
+ val gpuUsage =
+ if (reader.getDouble(
+ resourceGpuCapacityCol,
+ ).isNaN()
+ ) {
+ 0.0
+ } else {
+ reader.getDouble(resourceGpuCapacityCol) // Default to 0 if not present
+ }
+ val gpuCores = reader.getInt(gpuCoresCol) // Default to 0 if not present
+ val gpuMemory = 0L // Default to 0 if not present
val builder =
fragments.computeIfAbsent(
id,
) { Builder(checkpointInterval, checkpointDuration, checkpointIntervalScaling, scalingPolicy, id) }
- builder.add(durationMs, cpuUsage, cores)
+ builder.add(durationMs, cpuUsage, cores, gpuUsage, gpuCores, gpuMemory)
}
fragments
@@ -117,6 +134,9 @@ public class ComputeWorkloadLoader(
val cpuCountCol = reader.resolve(resourceCpuCount)
val cpuCapacityCol = reader.resolve(resourceCpuCapacity)
val memCol = reader.resolve(resourceMemCapacity)
+ val gpuCapacityCol = reader.resolve(resourceGpuCapacity) // Assuming GPU capacity is also present
+ val gpuCoreCountCol = reader.resolve(resourceGpuCount) // Assuming GPU cores are also present
+ val gpuMemoryCol = reader.resolve(resourceGpuMemCapacity) // Assuming GPU memory is also present
val natureCol = reader.resolve(resourceNature)
val deadlineCol = reader.resolve(resourceDeadline)
@@ -135,6 +155,17 @@ public class ComputeWorkloadLoader(
val cpuCount = reader.getInt(cpuCountCol)
val cpuCapacity = reader.getDouble(cpuCapacityCol)
val memCapacity = reader.getDouble(memCol) / 1000.0 // Convert from KB to MB
+ val gpuUsage =
+ if (reader.getDouble(
+ gpuCapacityCol,
+ ).isNaN()
+ ) {
+ 0.0
+ } else {
+ reader.getDouble(gpuCapacityCol) // Default to 0 if not present// Default to 0 if not present
+ }
+ val gpuCoreCount = reader.getInt(gpuCoreCountCol) // Default to 0 if not present
+ val gpuMemory = 0L // currently not implemented
val uid = UUID.nameUUIDFromBytes("$id-${counter++}".toByteArray())
var nature = reader.getString(natureCol)
var deadline = reader.getLong(deadlineCol)
@@ -153,6 +184,9 @@ public class ComputeWorkloadLoader(
cpuCount,
cpuCapacity,
memCapacity.roundToLong(),
+ gpuCoreCount,
+ gpuUsage,
+ gpuMemory,
totalLoad,
submissionTime,
duration,
@@ -224,17 +258,23 @@ public class ComputeWorkloadLoader(
* Add a fragment to the trace.
*
* @param duration The duration of the fragment (in epoch millis).
- * @param usage CPU usage of this fragment.
- * @param cores Number of cores used.
+ * @param cpuUsage CPU usage of this fragment.
+ * @param cpuCores Number of cores used.
+ * @param gpuUsage GPU usage of this fragment.
+ * @param gpuCores Number of GPU cores used.
+ * @param gpuMemoryUsage GPU memory usage of this fragment.
*/
fun add(
duration: Duration,
- usage: Double,
- cores: Int,
+ cpuUsage: Double,
+ cpuCores: Int,
+ gpuUsage: Double = 0.0,
+ gpuCores: Int = 0,
+ gpuMemoryUsage: Long = 0,
) {
- totalLoad += (usage * duration.toMillis()) / 1000 // avg MHz * duration = MFLOPs
+ totalLoad += ((cpuUsage * duration.toMillis()) + (gpuUsage * duration.toMillis())) / 1000 // avg MHz * duration = MFLOPs
- builder.add(duration.toMillis(), usage, cores)
+ builder.add(duration.toMillis(), cpuUsage, cpuCores, gpuUsage, gpuCores, gpuMemoryUsage)
}
/**
diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt
index 787f271e..228b84ed 100644
--- a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt
+++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/Task.kt
@@ -43,6 +43,9 @@ public data class Task(
val cpuCount: Int,
val cpuCapacity: Double,
val memCapacity: Long,
+ val gpuCount: Int = 0,
+ val gpuCapacity: Double = 0.0,
+ val gpuMemCapacity: Long = 0L,
val totalLoad: Double,
var submissionTime: Long,
val duration: Long,