summaryrefslogtreecommitdiff
path: root/opendc-compute/opendc-compute-simulator/src/main
diff options
context:
space:
mode:
authorNiels Thiele <noleu66@posteo.net>2025-06-22 12:31:21 +0200
committerGitHub <noreply@github.com>2025-06-22 12:31:21 +0200
commit0203254b709614fa732c114aa25916f61b8b3275 (patch)
tree63232140a8e60e16e1668a51eb58954d8609fbdc /opendc-compute/opendc-compute-simulator/src/main
parent8f846655347195bf6f22a4a102aa06f0ab127da1 (diff)
Implemented Single GPU Support & outline of host-level allocation policies (#342)
* renamed performance counter to distinguish different resource types * added GPU, modelled similar to CPU * added GPUs to machine model * list of GPUs instead of single instance * renamed memory speed to bandwidth * enabled parsing of GPU resources * split powermodel into cpu and GPU powermodel * added gpu parsing tests * added idea of host level scheduling * added tests for multi gpu parsing * renamed powermodel to cpupowermodel * clarified naming of cpu and gpu components * added resource type to flow suplier and edge * added resourcetype * added GPU components and resource type to fragments * added GPU to workload and updated resource usage retrieval * implemented first version of multi resource * added name to workload * renamed perfomance counters * removed commented out code * removed deprecated comments * included demand and supply into calculations * resolving rebase mismatches * moved resource type from flowedge class to common package * added available resources to machinees * cleaner separation if workload is started of simmachine or vm * Replaced exception with dedicated enum * Only looping over resources that are actually used * using hashmaps to handle resourcetype instead of arrays for readability * fixed condition * tracking finished workloads per resource type * removed resource type from flowedge * made supply and demand distribution resource specific * added power model for GPU * removed unused test setup * removed depracated comments * removed unused parameter * added ID for GPU * added GPUs and GPU performance counters (naively) * implemented capturing of GPU statistics * added reminders for future implementations * renamed properties for better identification * added capturing GPU statistics * implemented first tests for GPUs * unified access to performance counters * added interface for general compute resource handling * implemented multi resource support in simmachine * added individual edge to VM per resource * extended compute resource interface * implemented multi-resource support in PSU * implemented generic retrieval of computeresources * implemented mult-resource suppport in vm * made method use more resource specific * implemented simple GPU tests * rolled back frquency and demand use * made naming independent of used resource * using workloads resources instead of VMs to determine available resource * implemented determination of used resources in workload * removed logging statements * implemented reading from workload * fixed naming for host-level allocation * fixed next deadline calculation * fixed forwarding supply * reduced memory footprint * made GPU powermodel nullable * maded Gpu powermodel configurable in topology * implemented tests for basic gpu scheduler * added gpu properties * implemented weights, filter and simple cpu-gpu scheduler * spotless apply * spotless apply pt. 2 * fixed capitalization * spotless kotlin run * implemented coloumn export * todo update * removed code comments * Merged PerformanceCounter classes into one & removed interface * removed GPU specific powermodel * Rebase master: kept both versions of TopologyFactories * renamed CpuPowermodel to resource independent Powermodel Moved it from Cpu package to power package * implementated default of getResourceType & removed overrides if possible * split getResourceType into Consumer and Supplier * added power as resource type * reduced supply demand from arrayList to single value * combining GPUs into one large GPU, until full multi-gpu support * merged distribution policy enum with corresponding factory * added comment * post-rebase fixes * aligned naming * Added GPU metrics to task output * Updates power resource type to uppercase. Standardizes the `ResourceType.Power` enum to `ResourceType.POWER` for consistency with other resource types and improved readability. * Removes deprecated test assertions Removes commented-out assertions in GPU tests. These assertions are no longer needed and clutter the test code. * Renames MaxMinFairnessStrategy to Policy Renames MaxMinFairnessStrategy to MaxMinFairnessPolicy for clarity and consistency with naming conventions. This change affects the factory and distributor to use the updated name. * applies spotless * nulls GPUs as it is not used
Diffstat (limited to 'opendc-compute/opendc-compute-simulator/src/main')
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java33
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java19
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java10
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java11
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java24
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java44
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java46
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt73
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt43
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt1
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt31
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt2
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt4
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt48
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt50
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt2
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt2
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt43
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt46
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt42
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt37
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt1
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt45
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt87
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt36
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt104
26 files changed, 841 insertions, 43 deletions
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java
new file mode 100644
index 00000000..97aaa820
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/GpuHostModel.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.host;
+
+/**
+ * A model for a GPU in a host.
+ *
+ * @param gpuCoreCapacity The capacity of the GPU cores hz.
+ * @param gpuCoreCount The number of GPU cores.
+ * @param GpuMemoryCapacity The capacity of the GPU memory in GB.
+ * @param GpuMemorySpeed The speed of the GPU memory in GB/s.
+ */
+public record GpuHostModel(double gpuCoreCapacity, int gpuCoreCount, long GpuMemoryCapacity, double GpuMemorySpeed) {}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java
index 1ea73ea6..6464a56c 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/host/HostModel.java
@@ -22,11 +22,24 @@
package org.opendc.compute.simulator.host;
+import java.util.List;
+
/**
* Record describing the static machine properties of the host.
*
- * @param cpuCapacity The total CPU capacity of the host in MHz.
- * @param coreCount The number of logical processing cores available for this host.
+ * @param cpuCapacity The total CPU capacity of the host in MHz.
+ * @param coreCount The number of logical processing cores available for this host.
* @param memoryCapacity The amount of memory available for this host in MB.
*/
-public record HostModel(double cpuCapacity, int coreCount, long memoryCapacity) {}
+public record HostModel(double cpuCapacity, int coreCount, long memoryCapacity, List<GpuHostModel> gpuHostModels) {
+ /**
+ * Create a new host model.
+ *
+ * @param cpuCapacity The total CPU capacity of the host in MHz.
+ * @param coreCount The number of logical processing cores available for this host.
+ * @param memoryCapacity The amount of memory available for this host in MB.
+ */
+ public HostModel(double cpuCapacity, int coreCount, long memoryCapacity) {
+ this(cpuCapacity, coreCount, memoryCapacity, null);
+ }
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java
index 2b4306af..835c7186 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ComputeService.java
@@ -198,7 +198,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver {
HostView hv = hostToView.get(host);
final ServiceFlavor flavor = task.getFlavor();
if (hv != null) {
- hv.provisionedCores -= flavor.getCoreCount();
+ hv.provisionedCpuCores -= flavor.getCpuCoreCount();
hv.instanceCount--;
hv.availableMemory += flavor.getMemorySize();
} else {
@@ -496,7 +496,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver {
if (result.getResultType() == SchedulingResultType.FAILURE) {
LOGGER.trace("Task {} selected for scheduling but no capacity available for it at the moment", task);
- if (flavor.getMemorySize() > maxMemory || flavor.getCoreCount() > maxCores) {
+ if (flavor.getMemorySize() > maxMemory || flavor.getCpuCoreCount() > maxCores) {
// Remove the incoming image
taskQueue.remove(req);
tasksPending--;
@@ -531,7 +531,7 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver {
attemptsSuccess++;
hv.instanceCount++;
- hv.provisionedCores += flavor.getCoreCount();
+ hv.provisionedCpuCores += flavor.getCpuCoreCount();
hv.availableMemory -= flavor.getMemorySize();
activeTasks.put(task, host);
@@ -612,12 +612,12 @@ public final class ComputeService implements AutoCloseable, CarbonReceiver {
@NotNull
public ServiceFlavor newFlavor(
- @NotNull String name, int cpuCount, long memorySize, @NotNull Map<String, ?> meta) {
+ @NotNull String name, int cpuCount, long memorySize, int gpuCoreCount, @NotNull Map<String, ?> meta) {
checkOpen();
final ComputeService service = this.service;
UUID uid = new UUID(service.clock.millis(), service.random.nextLong());
- ServiceFlavor flavor = new ServiceFlavor(service, uid, name, cpuCount, memorySize, meta);
+ ServiceFlavor flavor = new ServiceFlavor(service, uid, name, cpuCount, memorySize, gpuCoreCount, meta);
// service.flavorById.put(uid, flavor);
// service.flavors.add(flavor);
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java
index 7c548add..c07f58c7 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/HostView.java
@@ -31,7 +31,8 @@ public class HostView {
private final SimHost host;
int instanceCount;
long availableMemory;
- int provisionedCores;
+ int provisionedCpuCores;
+ int provisionedGpuCores;
/**
* Scheduler bookkeeping
@@ -83,8 +84,12 @@ public class HostView {
/**
* Return the provisioned cores on the host.
*/
- public int getProvisionedCores() {
- return provisionedCores;
+ public int getProvisionedCpuCores() {
+ return provisionedCpuCores;
+ }
+
+ public int getProvisionedGpuCores() {
+ return provisionedGpuCores;
}
@Override
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java
index eddde87e..8a4359b4 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/service/ServiceFlavor.java
@@ -36,22 +36,31 @@ public final class ServiceFlavor implements Flavor {
private final ComputeService service;
private final UUID uid;
private final String name;
- private final int coreCount;
+ private final int cpuCoreCount;
private final long memorySize;
+ private final int gpuCoreCount;
private final Map<String, ?> meta;
- ServiceFlavor(ComputeService service, UUID uid, String name, int coreCount, long memorySize, Map<String, ?> meta) {
+ ServiceFlavor(
+ ComputeService service,
+ UUID uid,
+ String name,
+ int cpuCoreCount,
+ long memorySize,
+ int gpuCoreCount,
+ Map<String, ?> meta) {
this.service = service;
this.uid = uid;
this.name = name;
- this.coreCount = coreCount;
+ this.cpuCoreCount = cpuCoreCount;
this.memorySize = memorySize;
+ this.gpuCoreCount = gpuCoreCount;
this.meta = meta;
}
@Override
- public int getCoreCount() {
- return coreCount;
+ public int getCpuCoreCount() {
+ return cpuCoreCount;
}
@Override
@@ -59,6 +68,11 @@ public final class ServiceFlavor implements Flavor {
return memorySize;
}
+ @Override
+ public int getGpuCoreCount() {
+ return gpuCoreCount;
+ }
+
@NotNull
@Override
public UUID getUid() {
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java
new file mode 100644
index 00000000..1aba13e3
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/GuestGpuStats.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.telemetry;
+
+/**
+ * Statistics about the GPUs of a guest.
+ *
+ * @param activeTime The cumulative time (in seconds) that the GPUs of the guest were actively running.
+ * @param idleTime The cumulative time (in seconds) the GPUs of the guest were idle.
+ * @param stealTime The cumulative GPU time (in seconds) that the guest was ready to run, but not granted time by the host.
+ * @param lostTime The cumulative GPU time (in seconds) that was lost due to interference with other machines.
+ * @param capacity The available GPU capacity of the guest (in MHz).
+ * @param usage Amount of GPU resources (in MHz) actually used by the guest.
+ * @param utilization The utilization of the GPU resources (in %) relative to the total GPU capacity.
+ */
+public record GuestGpuStats(
+ long activeTime,
+ long idleTime,
+ long stealTime,
+ long lostTime,
+ double capacity,
+ double usage,
+ double demand,
+ double utilization) {}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java
new file mode 100644
index 00000000..e42d7704
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/java/org/opendc/compute/simulator/telemetry/HostGpuStats.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.telemetry;
+
+/**
+ * Statistics about the GPUs of a host.
+ *
+ * @param activeTime The cumulative time (in seconds) that the GPUs of the host were actively running.
+ * @param idleTime The cumulative time (in seconds) the GPUs of the host were idle.
+ * @param stealTime The cumulative GPU time (in seconds) that virtual machines were ready to run, but were not able to.
+ * @param lostTime The cumulative GPU time (in seconds) that was lost due to interference between virtual machines.
+ * @param capacity The available GPU capacity of the host (in MHz).
+ * @param demand Amount of GPU resources (in MHz) the guests would use if there were no GPU contention or GPU
+ * limits.
+ * @param usage Amount of GPU resources (in MHz) actually used by the host.
+ * @param utilization The utilization of the GPU resources (in %) relative to the total GPU capacity.
+ */
+public record HostGpuStats(
+ long activeTime,
+ long idleTime,
+ long stealTime,
+ long lostTime,
+ double capacity,
+ double demand,
+ double usage,
+ double utilization) {}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt
index d23794ab..effe3d5b 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/host/SimHost.kt
@@ -22,19 +22,22 @@
package org.opendc.compute.simulator.host
+import org.opendc.common.ResourceType
import org.opendc.compute.api.Flavor
import org.opendc.compute.api.TaskState
import org.opendc.compute.simulator.internal.Guest
import org.opendc.compute.simulator.internal.GuestListener
import org.opendc.compute.simulator.service.ServiceTask
import org.opendc.compute.simulator.telemetry.GuestCpuStats
+import org.opendc.compute.simulator.telemetry.GuestGpuStats
import org.opendc.compute.simulator.telemetry.GuestSystemStats
import org.opendc.compute.simulator.telemetry.HostCpuStats
+import org.opendc.compute.simulator.telemetry.HostGpuStats
import org.opendc.compute.simulator.telemetry.HostSystemStats
-import org.opendc.simulator.compute.cpu.CpuPowerModel
import org.opendc.simulator.compute.machine.SimMachine
import org.opendc.simulator.compute.models.MachineModel
import org.opendc.simulator.compute.models.MemoryUnit
+import org.opendc.simulator.compute.power.PowerModel
import org.opendc.simulator.engine.engine.FlowEngine
import org.opendc.simulator.engine.graph.FlowDistributor
import java.time.Duration
@@ -57,7 +60,8 @@ public class SimHost(
private val clock: InstantSource,
private val engine: FlowEngine,
private val machineModel: MachineModel,
- private val cpuPowerModel: CpuPowerModel,
+ private val cpuPowerModel: PowerModel,
+ private val gpuPowerModel: PowerModel?,
private val embodiedCarbon: Double,
private val expectedLifetime: Double,
private val powerDistributor: FlowDistributor,
@@ -81,11 +85,22 @@ public class SimHost(
field = value
}
+ private val gpuHostModels: List<GpuHostModel>? =
+ machineModel.gpuModels?.map { gpumodel ->
+ return@map GpuHostModel(
+ gpumodel.totalCoreCapacity,
+ gpumodel.coreCount,
+ gpumodel.memorySize,
+ gpumodel.memoryBandwidth,
+ )
+ }
+
private val model: HostModel =
HostModel(
machineModel.cpuModel.totalCapacity,
machineModel.cpuModel.coreCount,
machineModel.memory.size,
+ gpuHostModels,
)
private var simMachine: SimMachine? = null
@@ -136,6 +151,7 @@ public class SimHost(
this.machineModel,
this.powerDistributor,
this.cpuPowerModel,
+ this.gpuPowerModel,
) { cause ->
hostState = if (cause != null) HostState.ERROR else HostState.DOWN
}
@@ -207,7 +223,7 @@ public class SimHost(
public fun canFit(task: ServiceTask): Boolean {
val sufficientMemory = model.memoryCapacity >= task.flavor.memorySize
- val enoughCpus = model.coreCount >= task.flavor.coreCount
+ val enoughCpus = model.coreCount >= task.flavor.cpuCoreCount
val canFit = simMachine!!.canFit(task.flavor.toMachineModel())
return sufficientMemory && enoughCpus && canFit
@@ -324,14 +340,14 @@ public class SimHost(
val counters = simMachine!!.performanceCounters
return HostCpuStats(
- counters.cpuActiveTime,
- counters.cpuIdleTime,
- counters.cpuStealTime,
- counters.cpuLostTime,
- counters.cpuCapacity,
- counters.cpuDemand,
- counters.cpuSupply,
- counters.cpuSupply / cpuLimit,
+ counters.activeTime,
+ counters.idleTime,
+ counters.stealTime,
+ counters.lostTime,
+ counters.capacity,
+ counters.demand,
+ counters.supply,
+ counters.supply / cpuLimit,
)
}
@@ -340,6 +356,33 @@ public class SimHost(
return guest.getCpuStats()
}
+ public fun getGpuStats(): List<HostGpuStats> {
+ val gpuStats = mutableListOf<HostGpuStats>()
+ for (gpu in simMachine!!.gpus) {
+ gpu.updateCounters(this.clock.millis())
+ val counters = simMachine!!.getGpuPerformanceCounters(gpu.id)
+
+ gpuStats.add(
+ HostGpuStats(
+ counters.activeTime,
+ counters.idleTime,
+ counters.stealTime,
+ counters.lostTime,
+ counters.capacity,
+ counters.demand,
+ counters.supply,
+ counters.supply / gpu.getCapacity(ResourceType.GPU),
+ ),
+ )
+ }
+ return gpuStats
+ }
+
+ public fun getGpuStats(task: ServiceTask): List<GuestGpuStats> {
+ val guest = requireNotNull(taskToGuestMap[task]) { "Unknown task ${task.name} at host $name" }
+ return guest.getGpuStats()
+ }
+
override fun hashCode(): Int = name.hashCode()
override fun equals(other: Any?): Boolean {
@@ -352,7 +395,13 @@ public class SimHost(
* Convert flavor to machine model.
*/
private fun Flavor.toMachineModel(): MachineModel {
- return MachineModel(simMachine!!.machineModel.cpuModel, MemoryUnit("Generic", "Generic", 3200.0, memorySize))
+ return MachineModel(
+ simMachine!!.machineModel.cpuModel,
+ MemoryUnit("Generic", "Generic", 3200.0, memorySize),
+ simMachine!!.machineModel.gpuModels,
+ simMachine!!.machineModel.cpuDistributionStrategy,
+ simMachine!!.machineModel.gpuDistributionStrategy,
+ )
}
/**
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
index fe8cbf2f..a980f6cb 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
@@ -27,6 +27,7 @@ import org.opendc.compute.api.TaskState
import org.opendc.compute.simulator.host.SimHost
import org.opendc.compute.simulator.service.ServiceTask
import org.opendc.compute.simulator.telemetry.GuestCpuStats
+import org.opendc.compute.simulator.telemetry.GuestGpuStats
import org.opendc.compute.simulator.telemetry.GuestSystemStats
import org.opendc.simulator.compute.machine.SimMachine
import org.opendc.simulator.compute.workload.ChainWorkload
@@ -64,6 +65,7 @@ public class Guest(
private var lastReport = clock.millis()
private var bootTime: Instant? = null
private val cpuLimit = simMachine.cpu.cpuModel.totalCapacity
+ private val gpuLimit = simMachine.gpus?.firstOrNull()?.gpuModel?.totalCoreCapacity ?: 0.0
/**
* Start the guest.
@@ -242,20 +244,43 @@ public class Guest(
*/
public fun getCpuStats(): GuestCpuStats {
virtualMachine!!.updateCounters(this.clock.millis())
- val counters = virtualMachine!!.performanceCounters
+ val counters = virtualMachine!!.cpuPerformanceCounters
return GuestCpuStats(
- counters.cpuActiveTime / 1000L,
- counters.cpuIdleTime / 1000L,
- counters.cpuStealTime / 1000L,
- counters.cpuLostTime / 1000L,
- counters.cpuCapacity,
- counters.cpuSupply,
- counters.cpuDemand,
- counters.cpuSupply / cpuLimit,
+ counters.activeTime / 1000L,
+ counters.idleTime / 1000L,
+ counters.stealTime / 1000L,
+ counters.lostTime / 1000L,
+ counters.capacity,
+ counters.supply,
+ counters.demand,
+ counters.supply / cpuLimit,
)
}
+ public fun getGpuStats(): List<GuestGpuStats> {
+ virtualMachine!!.updateCounters(this.clock.millis())
+ val counters = virtualMachine!!.gpuPerformanceCounters
+
+ val gpuStats = mutableListOf<GuestGpuStats>()
+ for (gpuCounter in counters) {
+ gpuStats.add(
+ GuestGpuStats(
+ gpuCounter.activeTime / 1000L,
+ gpuCounter.idleTime / 1000L,
+ gpuCounter.stealTime / 1000L,
+ gpuCounter.lostTime / 1000L,
+ gpuCounter.capacity,
+ gpuCounter.supply,
+ gpuCounter.demand,
+ // Assuming similar scaling as CPU
+ gpuCounter.supply / gpuLimit,
+ ),
+ )
+ }
+ return gpuStats
+ }
+
/**
* Helper function to track the uptime and downtime of the guest.
*/
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt
index 675ce3a9..791ab692 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/provisioner/HostsProvisioningStep.kt
@@ -127,6 +127,7 @@ public class HostsProvisioningStep internal constructor(
engine,
hostSpec.model,
hostSpec.cpuPowerModel,
+ hostSpec.gpuPowerModel,
hostSpec.embodiedCarbon,
hostSpec.expectedLifetime,
hostDistributor,
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt
index e70cec58..0376a492 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/ComputeSchedulers.kt
@@ -27,11 +27,13 @@ package org.opendc.compute.simulator.scheduler
import org.opendc.compute.simulator.scheduler.filters.ComputeFilter
import org.opendc.compute.simulator.scheduler.filters.RamFilter
import org.opendc.compute.simulator.scheduler.filters.VCpuFilter
+import org.opendc.compute.simulator.scheduler.filters.VGpuFilter
import org.opendc.compute.simulator.scheduler.timeshift.TimeshiftScheduler
import org.opendc.compute.simulator.scheduler.weights.CoreRamWeigher
import org.opendc.compute.simulator.scheduler.weights.InstanceCountWeigher
import org.opendc.compute.simulator.scheduler.weights.RamWeigher
import org.opendc.compute.simulator.scheduler.weights.VCpuWeigher
+import org.opendc.compute.simulator.scheduler.weights.VGpuWeigher
import java.time.InstantSource
import java.util.SplittableRandom
import java.util.random.RandomGenerator
@@ -48,6 +50,8 @@ public enum class ComputeSchedulerEnum {
Random,
TaskNumMemorizing,
Timeshift,
+ ProvisionedCpuGpuCores,
+ ProvisionedCpuGpuCoresInv,
}
public fun createPrefabComputeScheduler(
@@ -68,6 +72,7 @@ public fun createPrefabComputeScheduler(
): ComputeScheduler {
val cpuAllocationRatio = 1.0
val ramAllocationRatio = 1.5
+ val gpuAllocationRatio = 1.0
return when (name) {
ComputeSchedulerEnum.Mem ->
FilterScheduler(
@@ -128,5 +133,31 @@ public fun createPrefabComputeScheduler(
clock = clock,
random = SplittableRandom(seeder.nextLong()),
)
+ ComputeSchedulerEnum.ProvisionedCpuGpuCores ->
+ FilterScheduler(
+ filters =
+ listOf(
+ ComputeFilter(),
+ VCpuFilter(cpuAllocationRatio),
+ VGpuFilter(gpuAllocationRatio),
+ RamFilter(ramAllocationRatio),
+ ),
+ weighers = listOf(VCpuWeigher(cpuAllocationRatio, multiplier = 1.0), VGpuWeigher(gpuAllocationRatio, multiplier = 1.0)),
+ )
+ ComputeSchedulerEnum.ProvisionedCpuGpuCoresInv ->
+ FilterScheduler(
+ filters =
+ listOf(
+ ComputeFilter(),
+ VCpuFilter(cpuAllocationRatio),
+ VGpuFilter(gpuAllocationRatio),
+ RamFilter(ramAllocationRatio),
+ ),
+ weighers =
+ listOf(
+ VCpuWeigher(cpuAllocationRatio, multiplier = -1.0),
+ VGpuWeigher(gpuAllocationRatio, multiplier = -1.0),
+ ),
+ )
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt
index 4e63baf4..7fa7a051 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuCapacityFilter.kt
@@ -40,7 +40,7 @@ public class VCpuCapacityFilter : HostFilter {
return (
requiredCapacity == null ||
(availableCapacity / host.host.getModel().coreCount)
- >= (requiredCapacity / task.flavor.coreCount)
+ >= (requiredCapacity / task.flavor.cpuCoreCount)
)
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt
index c179a7bf..89739658 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VCpuFilter.kt
@@ -35,7 +35,7 @@ public class VCpuFilter(private val allocationRatio: Double) : HostFilter {
host: HostView,
task: ServiceTask,
): Boolean {
- val requested = task.flavor.coreCount
+ val requested = task.flavor.cpuCoreCount
val totalCores = host.host.getModel().coreCount
val limit = totalCores * allocationRatio
@@ -44,7 +44,7 @@ public class VCpuFilter(private val allocationRatio: Double) : HostFilter {
return false
}
- val availableCores = limit - host.provisionedCores
+ val availableCores = limit - host.provisionedCpuCores
return availableCores >= requested
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt
new file mode 100644
index 00000000..6dc27327
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuCapacityFilter.kt
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.scheduler.filters
+
+import org.opendc.compute.simulator.service.HostView
+import org.opendc.compute.simulator.service.ServiceTask
+import kotlin.collections.maxOfOrNull
+
+/**
+ * A [HostFilter] that filters hosts based on the vCPU speed requirements of a [ServiceTask] and the available
+ * capacity on the host.
+ */
+public class VGpuCapacityFilter : HostFilter {
+ override fun test(
+ host: HostView,
+ task: ServiceTask,
+ ): Boolean {
+ val requiredCapacity = task.flavor.meta["gpu-capacity"] as? Double
+ val availableCapacity = (host.host.getModel().gpuHostModels().maxOfOrNull { it.gpuCoreCapacity() } ?: 0).toDouble()
+ val availableCores = (host.host.getModel().gpuHostModels().maxOfOrNull { it -> it.gpuCoreCount } ?: -1).toDouble()
+ val availableRatio = availableCapacity / availableCores
+
+ return (
+ requiredCapacity == null ||
+ ((availableCapacity / availableCores) >= (requiredCapacity / task.flavor.gpuCoreCount))
+ )
+ }
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt
new file mode 100644
index 00000000..9f564776
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/filters/VGpuFilter.kt
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.scheduler.filters
+
+import org.opendc.compute.simulator.service.HostView
+import org.opendc.compute.simulator.service.ServiceTask
+
+/**
+ * A [HostFilter] that filters hosts based on the vCPU requirements of a [ServiceTask] and the available vCPUs on the host.
+ *
+ * @param allocationRatio Virtual CPU to physical CPU allocation ratio.
+ */
+public class VGpuFilter(private val allocationRatio: Double) : HostFilter {
+ override fun test(
+ host: HostView,
+ task: ServiceTask,
+ ): Boolean {
+ val requested = task.flavor.gpuCoreCount
+ val totalCores = host.host.getModel().gpuHostModels().maxOfOrNull { it.gpuCoreCount() } ?: 0
+ val limit = totalCores * allocationRatio
+
+ // Do not allow an instance to overcommit against itself, only against other instances
+ if (requested > totalCores) {
+ return false
+ }
+
+ val availableCores = limit - host.provisionedGpuCores
+ return availableCores >= requested
+ }
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt
index 4f52e11a..d9b094fb 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuCapacityWeigher.kt
@@ -35,7 +35,7 @@ public class VCpuCapacityWeigher(override val multiplier: Double = 1.0) : HostWe
): Double {
val model = host.host.getModel()
val requiredCapacity = task.flavor.meta["cpu-capacity"] as? Double ?: 0.0
- return model.cpuCapacity - requiredCapacity / task.flavor.coreCount
+ return model.cpuCapacity - requiredCapacity / task.flavor.cpuCoreCount
}
override fun toString(): String = "VCpuWeigher"
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt
index 3f9a7f03..d882c237 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VCpuWeigher.kt
@@ -39,7 +39,7 @@ public class VCpuWeigher(private val allocationRatio: Double, override val multi
host: HostView,
task: ServiceTask,
): Double {
- return allocationRatio - host.provisionedCores
+ return allocationRatio - host.provisionedCpuCores
}
override fun toString(): String = "VCpuWeigher"
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt
new file mode 100644
index 00000000..35f2c7b9
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuCapacityWeigher.kt
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.scheduler.weights
+
+import org.opendc.compute.simulator.service.HostView
+import org.opendc.compute.simulator.service.ServiceTask
+
+/**
+ * A [HostWeigher] that weighs the hosts based on the difference required vCPU capacity and the available CPU capacity.
+ */
+public class VGpuCapacityWeigher(override val multiplier: Double = 1.0) : HostWeigher {
+ override fun getWeight(
+ host: HostView,
+ task: ServiceTask,
+ ): Double {
+ val model = host.host.getModel()
+ val requiredCapacity = task.flavor.meta["gpu-capacity"] as? Double ?: 0.0
+ val availableCapacity = model.gpuHostModels.maxOfOrNull { it.gpuCoreCapacity } ?: 0.0
+ return availableCapacity - requiredCapacity / task.flavor.gpuCoreCount
+ }
+
+ override fun toString(): String = "VGpuWeigher"
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt
new file mode 100644
index 00000000..7397bf10
--- /dev/null
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/scheduler/weights/VGpuWeigher.kt
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.simulator.scheduler.weights
+
+import org.opendc.compute.simulator.service.HostView
+import org.opendc.compute.simulator.service.ServiceTask
+
+/**
+ * A [HostWeigher] that weighs the hosts based on the remaining number of vCPUs available.
+ *
+ * @param allocationRatio Virtual CPU to physical CPU allocation ratio.
+ */
+public class VGpuWeigher(private val allocationRatio: Double, override val multiplier: Double = 1.0) : HostWeigher {
+ init {
+ require(allocationRatio > 0.0) { "Allocation ratio must be greater than zero" }
+ }
+
+ override fun getWeight(
+ host: HostView,
+ task: ServiceTask,
+ ): Double {
+ return allocationRatio - host.provisionedGpuCores
+ }
+
+ override fun toString(): String = "VGpuWeigher"
+}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt
index 00f7854d..affaab58 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltHostExportColumns.kt
@@ -144,6 +144,48 @@ public object DfltHostExportColumns {
field = Types.required(INT64).named("cpu_time_lost"),
) { it.cpuLostTime }
+ // TODO: support multiple GPUs
+
+ public val GPU_CAPACITY: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_capacity"),
+ ) { it.gpuCapacities.getOrNull(0) }
+
+ public val GPU_USAGE: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_usage"),
+ ) { it.gpuUsages.getOrNull(0) }
+
+ public val GPU_DEMAND: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_demand"),
+ ) { it.gpuDemands.getOrNull(0) }
+
+ public val GPU_UTILIZATION: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_utilization"),
+ ) { it.gpuUtilizations.getOrNull(0) }
+
+ public val GPU_TIME_ACTIVE: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_active"),
+ ) { it.gpuActiveTimes.getOrNull(0) }
+
+ public val GPU_TIME_IDLE: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_idle"),
+ ) { it.gpuIdleTimes.getOrNull(0) }
+
+ public val GPU_TIME_STEAL: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_steal"),
+ ) { it.gpuStealTimes.getOrNull(0) }
+
+ public val GPU_TIME_LOST: ExportColumn<HostTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_lost"),
+ ) { it.gpuLostTimes.getOrNull(0) }
+
public val POWER_DRAW: ExportColumn<HostTableReader> =
ExportColumn(
field = Types.required(FLOAT).named("power_draw"),
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt
index f533eb1f..ad7a1d52 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/DfltTaskExportColumns.kt
@@ -132,6 +132,43 @@ public object DfltTaskExportColumns {
field = Types.required(INT64).named("cpu_time_lost"),
) { it.cpuLostTime }
+ // TODO: support multiple GPUs
+
+ public val GPU_CAPACITY: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_capacity"),
+ ) { it.gpuLimits?.getOrNull(0) }
+
+ public val GPU_USAGE: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_usage"),
+ ) { it.gpuUsages?.getOrNull(0) }
+
+ public val GPU_DEMAND: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(FLOAT).named("gpu_demand"),
+ ) { it.gpuDemands?.getOrNull(0) }
+
+ public val GPU_TIME_ACTIVE: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_active"),
+ ) { it.gpuActiveTimes?.getOrNull(0) }
+
+ public val GPU_TIME_IDLE: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_idle"),
+ ) { it.gpuIdleTimes?.getOrNull(0) }
+
+ public val GPU_TIME_STEAL: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_steal"),
+ ) { it.gpuStealTimes?.getOrNull(0) }
+
+ public val GPU_TIME_LOST: ExportColumn<TaskTableReader> =
+ ExportColumn(
+ field = Types.optional(INT64).named("gpu_time_lost"),
+ ) { it.gpuLostTimes?.getOrNull(0) }
+
public val UP_TIME: ExportColumn<TaskTableReader> =
ExportColumn(
field = Types.required(INT64).named("uptime"),
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt
index a626c41b..4fb930e1 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/parquet/ParquetComputeMonitor.kt
@@ -44,6 +44,7 @@ public class ParquetComputeMonitor(
private val batteryExporter: Exporter<BatteryTableReader>?,
private val serviceExporter: Exporter<ServiceTableReader>?,
) : ComputeMonitor, AutoCloseable {
+ // FIXME: Include GPU
override fun record(reader: HostTableReader) {
hostExporter?.write(reader)
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt
index ff0115df..fbffd508 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReader.kt
@@ -113,6 +113,51 @@ public interface HostTableReader : Exportable {
public val cpuLostTime: Long
/**
+ * The capacity of the CPUs in the host (in MHz).
+ */
+ public val gpuCapacities: ArrayList<Double>
+
+ /**
+ * The capacity of the GPUs in the host (in MHz). They inserted by GPU ID.
+ */
+ public val gpuLimits: ArrayList<Double>
+
+ /**
+ * The usage per GPU in the host (in MHz). They inserted by GPU ID
+ */
+ public val gpuUsages: ArrayList<Double>
+
+ /**
+ * The demand per GPU of the guests (in MHz). They inserted by GPU ID
+ */
+ public val gpuDemands: ArrayList<Double>
+
+ /**
+ * The GPU utilization of the host of each GPU. They inserted by GPU ID.
+ */
+ public val gpuUtilizations: ArrayList<Double>
+
+ /**
+ * The duration (in ms) that the respective GPU was active in the host. They inserted by GPU ID
+ */
+ public val gpuActiveTimes: ArrayList<Long>
+
+ /**
+ * The duration (in ms) that a GPU was idle in the host. They inserted by GPU ID
+ */
+ public val gpuIdleTimes: ArrayList<Long>
+
+ /**
+ * The duration (in ms) that a vGPU wanted to run, but no capacity was available. They inserted by GPU ID.
+ */
+ public val gpuStealTimes: ArrayList<Long>
+
+ /**
+ * The duration (in ms) of GPU time that was lost due to interference. They inserted by GPU ID
+ */
+ public val gpuLostTimes: ArrayList<Long>
+
+ /**
* The current power draw of the host in W.
*/
public val powerDraw: Double
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt
index 6e1dac48..cb25358a 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/host/HostTableReaderImpl.kt
@@ -49,6 +49,7 @@ public class HostTableReaderImpl(
_tasksActive = table.tasksActive
_guestsError = table.guestsError
_guestsInvalid = table.guestsInvalid
+ // CPU stats
_cpuCapacity = table.cpuCapacity
_cpuDemand = table.cpuDemand
_cpuUsage = table.cpuUsage
@@ -57,6 +58,18 @@ public class HostTableReaderImpl(
_cpuIdleTime = table.cpuIdleTime
_cpuStealTime = table.cpuStealTime
_cpuLostTime = table.cpuLostTime
+ // GPU stats
+ _gpuCapacities = table.gpuCapacities
+ _gpuLimits = table.gpuLimits
+ _gpuDemands = table.gpuDemands
+ _gpuUsages = table.gpuUsages
+ _gpuUtilizations = table.gpuUtilizations
+ _gpuActiveTimes = table.gpuActiveTimes
+ _gpuIdleTimes = table.gpuIdleTimes
+ _gpuStealTimes = table.gpuStealTimes
+ _gpuLostTimes = table.gpuLostTimes
+
+ // energy & carbon stats
_powerDraw = table.powerDraw
_energyUsage = table.energyUsage
_embodiedCarbon = table.embodiedCarbon
@@ -135,6 +148,65 @@ public class HostTableReaderImpl(
private var _cpuLostTime = 0L
private var previousCpuLostTime = 0L
+ override val gpuCapacities: ArrayList<Double>
+ get() = _gpuCapacities
+ private var _gpuCapacities: ArrayList<Double> = ArrayList()
+
+ override val gpuLimits: ArrayList<Double>
+ get() = _gpuLimits
+ private var _gpuLimits: ArrayList<Double> = ArrayList()
+
+ override val gpuUsages: ArrayList<Double>
+ get() = _gpuUsages
+ private var _gpuUsages: ArrayList<Double> = ArrayList()
+
+ override val gpuDemands: ArrayList<Double>
+ get() = _gpuDemands
+ private var _gpuDemands: ArrayList<Double> = ArrayList()
+
+ override val gpuUtilizations: ArrayList<Double>
+ get() = _gpuUtilizations
+ private var _gpuUtilizations: ArrayList<Double> = ArrayList()
+
+ // half of the CPU stats
+ override val gpuActiveTimes: ArrayList<Long>
+// get() = _gpuActiveTimes.zip(previousGpuActiveTimes) { current, previous -> current - previous} as ArrayList<Long>
+ get() =
+ (0 until _gpuActiveTimes.size).map {
+ i ->
+ (_gpuActiveTimes.getOrNull(i) ?: 0L) - (previousGpuActiveTimes.getOrNull(i) ?: 0L)
+ } as ArrayList<Long>
+ private var _gpuActiveTimes: ArrayList<Long> = ArrayList()
+ private var previousGpuActiveTimes: ArrayList<Long> = ArrayList()
+
+ override val gpuIdleTimes: ArrayList<Long>
+// get() = _gpuIdleTimes.zip(previousGpuIdleTimes) { current, previous -> current - previous} as ArrayList<Long>
+ get() =
+ (0 until _gpuIdleTimes.size).map {
+ i ->
+ (_gpuIdleTimes.getOrNull(i) ?: 0L) - (previousGpuIdleTimes.getOrNull(i) ?: 0L)
+ } as ArrayList<Long>
+ private var _gpuIdleTimes: ArrayList<Long> = ArrayList()
+ private var previousGpuIdleTimes: ArrayList<Long> = ArrayList()
+
+ override val gpuStealTimes: ArrayList<Long>
+ get() =
+ (0 until _gpuStealTimes.size).map {
+ i ->
+ (_gpuStealTimes.getOrNull(i) ?: 0L) - (previousGpuStealTimes.getOrNull(i) ?: 0L)
+ } as ArrayList<Long>
+ private var _gpuStealTimes: ArrayList<Long> = ArrayList()
+ private var previousGpuStealTimes: ArrayList<Long> = ArrayList()
+
+ override val gpuLostTimes: ArrayList<Long>
+ get() =
+ (0 until _gpuLostTimes.size).map {
+ i ->
+ (_gpuLostTimes.getOrNull(i) ?: 0L) - (previousGpuLostTimes.getOrNull(i) ?: 0L)
+ } as ArrayList<Long>
+ private var _gpuLostTimes: ArrayList<Long> = ArrayList()
+ private var previousGpuLostTimes: ArrayList<Long> = ArrayList()
+
override val powerDraw: Double
get() = _powerDraw
private var _powerDraw = 0.0
@@ -168,6 +240,7 @@ public class HostTableReaderImpl(
override fun record(now: Instant) {
val hostCpuStats = host.getCpuStats()
val hostSysStats = host.getSystemStats()
+ val hostGpuStats = host.getGpuStats()
_timestamp = now
_timestampAbsolute = now + startTime
@@ -184,6 +257,16 @@ public class HostTableReaderImpl(
_cpuIdleTime = hostCpuStats.idleTime
_cpuStealTime = hostCpuStats.stealTime
_cpuLostTime = hostCpuStats.lostTime
+ // GPU stats
+ _gpuLimits = hostGpuStats.map { it.capacity } as ArrayList<Double>
+ _gpuDemands = hostGpuStats.map { it.demand } as ArrayList<Double>
+ _gpuUsages = hostGpuStats.map { it.usage } as ArrayList<Double>
+ _gpuUtilizations = hostGpuStats.map { it.utilization } as ArrayList<Double>
+ _gpuActiveTimes = hostGpuStats.map { it.activeTime } as ArrayList<Long>
+ _gpuIdleTimes = hostGpuStats.map { it.idleTime } as ArrayList<Long>
+ _gpuStealTimes = hostGpuStats.map { it.stealTime } as ArrayList<Long>
+ _gpuLostTimes = hostGpuStats.map { it.lostTime } as ArrayList<Long>
+ // energy & carbon stats
_powerDraw = hostSysStats.powerDraw
_energyUsage = hostSysStats.energyUsage
_embodiedCarbon = hostSysStats.embodiedCarbon
@@ -202,6 +285,10 @@ public class HostTableReaderImpl(
previousCpuIdleTime = _cpuIdleTime
previousCpuStealTime = _cpuStealTime
previousCpuLostTime = _cpuLostTime
+ previousGpuActiveTimes = _gpuActiveTimes
+ previousGpuIdleTimes = _gpuIdleTimes
+ previousGpuStealTimes = _gpuStealTimes
+ previousGpuLostTimes = _gpuLostTimes
previousEnergyUsage = _energyUsage
previousUptime = _uptime
previousDowntime = _downtime
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt
index b0745dd6..f71587c7 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReader.kt
@@ -32,6 +32,7 @@ import java.time.Instant
* An interface that is used to read a row of a task trace entry.
*/
public interface TaskTableReader : Exportable {
+ // TODO: find better way for more resources
public fun copy(): TaskTableReader
public fun setValues(table: TaskTableReader)
@@ -130,6 +131,41 @@ public interface TaskTableReader : Exportable {
public val cpuLostTime: Long
/**
+ * The capacity of the GPUs of Host on which the task is running (in MHz).
+ */
+ public val gpuLimits: DoubleArray?
+
+ /**
+ * The amount of GPus allocated to the task (in MHz).
+ */
+ public val gpuUsages: DoubleArray?
+
+ /**
+ * The GPU demanded by this task (in MHz).
+ */
+ public val gpuDemands: DoubleArray?
+
+ /**
+ * The duration (in seconds) that a GPU was active in the task.
+ */
+ public val gpuActiveTimes: LongArray?
+
+ /**
+ * The duration (in seconds) that a GPU was idle in the task.
+ */
+ public val gpuIdleTimes: LongArray?
+
+ /**
+ * The duration (in seconds) that a vGPU wanted to run, but no capacity was available.
+ */
+ public val gpuStealTimes: LongArray?
+
+ /**
+ * The duration (in seconds) of GPU time that was lost due to interference.
+ */
+ public val gpuLostTimes: LongArray?
+
+ /**
* The state of the task
*/
public val taskState: TaskState?
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt
index d63202a9..6128c9a2 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/telemetry/table/task/TaskTableReaderImpl.kt
@@ -63,6 +63,15 @@ public class TaskTableReaderImpl(
_cpuIdleTime = table.cpuIdleTime
_cpuStealTime = table.cpuStealTime
_cpuLostTime = table.cpuLostTime
+ // GPU stats
+ _gpuLimits = table.gpuLimits
+ _gpuDemands = table.gpuDemands
+ _gpuUsages = table.gpuUsages
+ _gpuActiveTimes = table.gpuActiveTimes
+ _gpuIdleTimes = table.gpuIdleTimes
+ _gpuStealTimes = table.gpuStealTimes
+ _gpuLostTimes = table.gpuLostTimes
+
_uptime = table.uptime
_downtime = table.downtime
_numFailures = table.numFailures
@@ -84,7 +93,7 @@ public class TaskTableReaderImpl(
task.name,
"vm",
"x86",
- task.flavor.coreCount,
+ task.flavor.cpuCoreCount,
task.flavor.memorySize,
)
@@ -168,6 +177,74 @@ public class TaskTableReaderImpl(
private var _cpuLostTime = 0L
private var previousCpuLostTime = 0L
+ override val gpuLimits: DoubleArray?
+ get() = _gpuLimits ?: DoubleArray(0)
+ private var _gpuLimits: DoubleArray? = null
+
+ override val gpuUsages: DoubleArray?
+ get() = _gpuUsages ?: DoubleArray(0)
+ private var _gpuUsages: DoubleArray? = null
+
+ override val gpuDemands: DoubleArray?
+ get() = _gpuDemands ?: DoubleArray(0)
+ private var _gpuDemands: DoubleArray? = null
+
+ override val gpuActiveTimes: LongArray?
+ get() {
+ val current = _gpuActiveTimes ?: return LongArray(0)
+ val previous = previousGpuActiveTimes
+
+ return if (previous == null || current.size != previous.size) { // not sure if I like the second clause
+ current
+ } else {
+ LongArray(current.size) { i -> current[i] - previous[i] }
+ }
+ }
+ private var _gpuActiveTimes: LongArray? = null
+ private var previousGpuActiveTimes: LongArray? = null
+
+ override val gpuIdleTimes: LongArray?
+ get() {
+ val current = _gpuIdleTimes ?: return LongArray(0)
+ val previous = previousGpuIdleTimes
+
+ return if (previous == null || current.size != previous.size) { // not sure if I like the second clause
+ current
+ } else {
+ LongArray(current.size) { i -> current[i] - previous[i] }
+ }
+ }
+ private var _gpuIdleTimes: LongArray? = null
+ private var previousGpuIdleTimes: LongArray? = null
+
+ override val gpuStealTimes: LongArray?
+ get() {
+ val current = _gpuStealTimes ?: return LongArray(0)
+ val previous = previousGpuStealTimes
+
+ return if (previous == null || current.size != previous.size) {
+ current
+ } else {
+ LongArray(current.size) { i -> current[i] - previous[i] }
+ }
+ }
+ private var _gpuStealTimes: LongArray? = null
+ private var previousGpuStealTimes: LongArray? = null
+
+ override val gpuLostTimes: LongArray?
+ get() {
+ val current = _gpuLostTimes ?: return LongArray(0)
+ val previous = previousGpuLostTimes
+
+ return if (previous == null || current.size != previous.size) {
+ current
+ } else {
+ LongArray(current.size) { i -> current[i] - previous[i] }
+ }
+ }
+ private var _gpuLostTimes: LongArray? = null
+ private var previousGpuLostTimes: LongArray? = null
+
override val taskState: TaskState?
get() = _taskState
private var _taskState: TaskState? = null
@@ -192,6 +269,7 @@ public class TaskTableReaderImpl(
val cpuStats = simHost?.getCpuStats(task)
val sysStats = simHost?.getSystemStats(task)
+ val gpuStats = simHost?.getGpuStats(task)
_hostName = task.hostName
@@ -214,6 +292,26 @@ public class TaskTableReaderImpl(
_scheduleTime = task.scheduledAt
_finishTime = task.finishedAt
+ if (gpuStats != null && gpuStats.isNotEmpty()) {
+ val size = gpuStats.size
+ _gpuLimits = DoubleArray(size) { i -> gpuStats[i].capacity }
+ _gpuDemands = DoubleArray(size) { i -> gpuStats[i].demand }
+ _gpuUsages = DoubleArray(size) { i -> gpuStats[i].usage }
+ _gpuActiveTimes = LongArray(size) { i -> gpuStats[i].activeTime }
+ _gpuIdleTimes = LongArray(size) { i -> gpuStats[i].idleTime }
+ _gpuStealTimes = LongArray(size) { i -> gpuStats[i].stealTime }
+ _gpuLostTimes = LongArray(size) { i -> gpuStats[i].lostTime }
+ } else {
+ _gpuIdleTimes = null
+ _gpuStealTimes = null
+ _gpuLostTimes = null
+ _gpuIdleTimes = null
+ _gpuLimits = null
+ _gpuUsages = null
+ _gpuDemands = null
+ _gpuActiveTimes = null
+ }
+
_taskState = task.state
}
@@ -227,6 +325,10 @@ public class TaskTableReaderImpl(
previousCpuIdleTime = _cpuIdleTime
previousCpuStealTime = _cpuStealTime
previousCpuLostTime = _cpuLostTime
+ previousGpuActiveTimes = _gpuActiveTimes
+ previousGpuIdleTimes = _gpuIdleTimes
+ previousGpuStealTimes = _gpuStealTimes
+ previousGpuLostTimes = _gpuLostTimes
simHost = null
_cpuLimit = 0.0