summaryrefslogtreecommitdiff
path: root/opendc-compute/opendc-compute-topology/src
diff options
context:
space:
mode:
authorNiels Thiele <noleu66@posteo.net>2025-06-22 12:31:21 +0200
committerGitHub <noreply@github.com>2025-06-22 12:31:21 +0200
commit0203254b709614fa732c114aa25916f61b8b3275 (patch)
tree63232140a8e60e16e1668a51eb58954d8609fbdc /opendc-compute/opendc-compute-topology/src
parent8f846655347195bf6f22a4a102aa06f0ab127da1 (diff)
Implemented Single GPU Support & outline of host-level allocation policies (#342)
* renamed performance counter to distinguish different resource types * added GPU, modelled similar to CPU * added GPUs to machine model * list of GPUs instead of single instance * renamed memory speed to bandwidth * enabled parsing of GPU resources * split powermodel into cpu and GPU powermodel * added gpu parsing tests * added idea of host level scheduling * added tests for multi gpu parsing * renamed powermodel to cpupowermodel * clarified naming of cpu and gpu components * added resource type to flow suplier and edge * added resourcetype * added GPU components and resource type to fragments * added GPU to workload and updated resource usage retrieval * implemented first version of multi resource * added name to workload * renamed perfomance counters * removed commented out code * removed deprecated comments * included demand and supply into calculations * resolving rebase mismatches * moved resource type from flowedge class to common package * added available resources to machinees * cleaner separation if workload is started of simmachine or vm * Replaced exception with dedicated enum * Only looping over resources that are actually used * using hashmaps to handle resourcetype instead of arrays for readability * fixed condition * tracking finished workloads per resource type * removed resource type from flowedge * made supply and demand distribution resource specific * added power model for GPU * removed unused test setup * removed depracated comments * removed unused parameter * added ID for GPU * added GPUs and GPU performance counters (naively) * implemented capturing of GPU statistics * added reminders for future implementations * renamed properties for better identification * added capturing GPU statistics * implemented first tests for GPUs * unified access to performance counters * added interface for general compute resource handling * implemented multi resource support in simmachine * added individual edge to VM per resource * extended compute resource interface * implemented multi-resource support in PSU * implemented generic retrieval of computeresources * implemented mult-resource suppport in vm * made method use more resource specific * implemented simple GPU tests * rolled back frquency and demand use * made naming independent of used resource * using workloads resources instead of VMs to determine available resource * implemented determination of used resources in workload * removed logging statements * implemented reading from workload * fixed naming for host-level allocation * fixed next deadline calculation * fixed forwarding supply * reduced memory footprint * made GPU powermodel nullable * maded Gpu powermodel configurable in topology * implemented tests for basic gpu scheduler * added gpu properties * implemented weights, filter and simple cpu-gpu scheduler * spotless apply * spotless apply pt. 2 * fixed capitalization * spotless kotlin run * implemented coloumn export * todo update * removed code comments * Merged PerformanceCounter classes into one & removed interface * removed GPU specific powermodel * Rebase master: kept both versions of TopologyFactories * renamed CpuPowermodel to resource independent Powermodel Moved it from Cpu package to power package * implementated default of getResourceType & removed overrides if possible * split getResourceType into Consumer and Supplier * added power as resource type * reduced supply demand from arrayList to single value * combining GPUs into one large GPU, until full multi-gpu support * merged distribution policy enum with corresponding factory * added comment * post-rebase fixes * aligned naming * Added GPU metrics to task output * Updates power resource type to uppercase. Standardizes the `ResourceType.Power` enum to `ResourceType.POWER` for consistency with other resource types and improved readability. * Removes deprecated test assertions Removes commented-out assertions in GPU tests. These assertions are no longer needed and clutter the test code. * Renames MaxMinFairnessStrategy to Policy Renames MaxMinFairnessStrategy to MaxMinFairnessPolicy for clarity and consistency with naming conventions. This change affects the factory and distributor to use the updated name. * applies spotless * nulls GPUs as it is not used
Diffstat (limited to 'opendc-compute/opendc-compute-topology/src')
-rw-r--r--opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt57
-rw-r--r--opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt5
-rw-r--r--opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt17
3 files changed, 66 insertions, 13 deletions
diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt
index b6c945d2..b52608a9 100644
--- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt
+++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt
@@ -31,10 +31,13 @@ import org.opendc.compute.topology.specs.HostJSONSpec
import org.opendc.compute.topology.specs.HostSpec
import org.opendc.compute.topology.specs.PowerSourceSpec
import org.opendc.compute.topology.specs.TopologySpec
-import org.opendc.simulator.compute.cpu.getPowerModel
import org.opendc.simulator.compute.models.CpuModel
+import org.opendc.simulator.compute.models.GpuModel
import org.opendc.simulator.compute.models.MachineModel
import org.opendc.simulator.compute.models.MemoryUnit
+import org.opendc.simulator.compute.power.getPowerModel
+import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicyFactory
+import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicyFactory.DistributionPolicyType
import java.io.File
import java.io.InputStream
@@ -166,29 +169,63 @@ private fun HostJSONSpec.toHostSpec(clusterName: String): HostSpec {
}
val unknownMemoryUnit = MemoryUnit(memory.vendor, memory.modelName, memory.memorySpeed.toMHz(), memory.memorySize.toMiB().toLong())
+ val gpuUnits =
+ List(gpu?.count ?: 0) {
+ GpuModel(
+ globalCoreId++,
+ gpu!!.coreCount,
+ gpu.coreSpeed.toMHz(),
+ gpu.memoryBandwidth.toKibps(),
+ gpu.memorySize.toMiB().toLong(),
+ gpu.vendor,
+ gpu.modelName,
+ gpu.architecture,
+ )
+ }
+
val machineModel =
MachineModel(
units,
unknownMemoryUnit,
+ gpuUnits,
+ // TODO: Pass through
+ DistributionPolicyFactory.getDistributionStrategy(DistributionPolicyType.MaxMinFairness),
+ DistributionPolicyFactory.getDistributionStrategy(DistributionPolicyType.MaxMinFairness),
)
- val powerModel =
+ val cpuPowerModel =
getPowerModel(
- powerModel.modelType,
- powerModel.power.toWatts(),
- powerModel.maxPower.toWatts(),
- powerModel.idlePower.toWatts(),
- powerModel.calibrationFactor,
- powerModel.asymUtil,
- powerModel.dvfs,
+ cpuPowerModel.modelType,
+ cpuPowerModel.power.toWatts(),
+ cpuPowerModel.maxPower.toWatts(),
+ cpuPowerModel.idlePower.toWatts(),
+ cpuPowerModel.calibrationFactor,
+ cpuPowerModel.asymUtil,
+ cpuPowerModel.dvfs,
)
+ val gpuPowerModel =
+ if (gpuUnits.isEmpty()) {
+ null
+ } else {
+ getPowerModel(
+ gpuPowerModel.modelType,
+ gpuPowerModel.power.toWatts(),
+ gpuPowerModel.maxPower.toWatts(),
+ gpuPowerModel.idlePower.toWatts(),
+ gpuPowerModel.calibrationFactor,
+ gpuPowerModel.asymUtil,
+ gpuPowerModel.dvfs,
+ )
+ }
+
val hostSpec =
HostSpec(
createUniqueName(this.name, hostNames),
clusterName,
machineModel,
- powerModel,
+ cpuPowerModel,
+ gpuPowerModel,
)
return hostSpec
}
diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt
index e4ec89e1..30a75896 100644
--- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt
+++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt
@@ -22,8 +22,8 @@
package org.opendc.compute.topology.specs
-import org.opendc.simulator.compute.cpu.CpuPowerModel
import org.opendc.simulator.compute.models.MachineModel
+import org.opendc.simulator.compute.power.PowerModel
/**
* Description of a physical host that will be simulated by OpenDC and host the virtual machines.
@@ -36,7 +36,8 @@ public data class HostSpec(
val name: String,
val clusterName: String,
val model: MachineModel,
- val cpuPowerModel: CpuPowerModel,
+ val cpuPowerModel: PowerModel,
+ val gpuPowerModel: PowerModel?,
val embodiedCarbon: Double = 1000.0,
val expectedLifetime: Double = 5.0,
)
diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt
index 8cbf818b..62c3906a 100644
--- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt
+++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt
@@ -24,6 +24,7 @@ package org.opendc.compute.topology.specs
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
+import org.opendc.common.units.DataRate
import org.opendc.common.units.DataSize
import org.opendc.common.units.Frequency
import org.opendc.common.units.Power
@@ -76,7 +77,9 @@ public data class HostJSONSpec(
val cpu: CPUJSONSpec,
val count: Int = 1,
val memory: MemoryJSONSpec,
- val powerModel: PowerModelSpec = PowerModelSpec.DFLT,
+ val gpu: GPUJSONSpec? = null,
+ val cpuPowerModel: PowerModelSpec = PowerModelSpec.DFLT,
+ val gpuPowerModel: PowerModelSpec = PowerModelSpec.DFLT,
)
/**
@@ -118,6 +121,18 @@ public data class MemoryJSONSpec(
)
@Serializable
+public data class GPUJSONSpec(
+ val count: Int = 1,
+ val coreCount: Int,
+ val coreSpeed: Frequency,
+ val memorySize: DataSize = DataSize.ofMiB(-1),
+ val memoryBandwidth: DataRate = DataRate.ofKibps(-1),
+ val vendor: String = "unknown",
+ val modelName: String = "unknown",
+ val architecture: String = "unknown",
+)
+
+@Serializable
public data class PowerModelSpec(
val modelType: String,
val power: Power = Power.ofWatts(400),