diff options
| author | Niels Thiele <noleu66@posteo.net> | 2025-06-22 12:31:21 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-06-22 12:31:21 +0200 |
| commit | 0203254b709614fa732c114aa25916f61b8b3275 (patch) | |
| tree | 63232140a8e60e16e1668a51eb58954d8609fbdc /opendc-compute/opendc-compute-topology/src | |
| parent | 8f846655347195bf6f22a4a102aa06f0ab127da1 (diff) | |
Implemented Single GPU Support & outline of host-level allocation policies (#342)
* renamed performance counter to distinguish different resource types
* added GPU, modelled similar to CPU
* added GPUs to machine model
* list of GPUs instead of single instance
* renamed memory speed to bandwidth
* enabled parsing of GPU resources
* split powermodel into cpu and GPU powermodel
* added gpu parsing tests
* added idea of host level scheduling
* added tests for multi gpu parsing
* renamed powermodel to cpupowermodel
* clarified naming of cpu and gpu components
* added resource type to flow suplier and edge
* added resourcetype
* added GPU components and resource type to fragments
* added GPU to workload and updated resource usage retrieval
* implemented first version of multi resource
* added name to workload
* renamed perfomance counters
* removed commented out code
* removed deprecated comments
* included demand and supply into calculations
* resolving rebase mismatches
* moved resource type from flowedge class to common package
* added available resources to machinees
* cleaner separation if workload is started of simmachine or vm
* Replaced exception with dedicated enum
* Only looping over resources that are actually used
* using hashmaps to handle resourcetype instead of arrays for readability
* fixed condition
* tracking finished workloads per resource type
* removed resource type from flowedge
* made supply and demand distribution resource specific
* added power model for GPU
* removed unused test setup
* removed depracated comments
* removed unused parameter
* added ID for GPU
* added GPUs and GPU performance counters (naively)
* implemented capturing of GPU statistics
* added reminders for future implementations
* renamed properties for better identification
* added capturing GPU statistics
* implemented first tests for GPUs
* unified access to performance counters
* added interface for general compute resource handling
* implemented multi resource support in simmachine
* added individual edge to VM per resource
* extended compute resource interface
* implemented multi-resource support in PSU
* implemented generic retrieval of computeresources
* implemented mult-resource suppport in vm
* made method use more resource specific
* implemented simple GPU tests
* rolled back frquency and demand use
* made naming independent of used resource
* using workloads resources instead of VMs to determine available resource
* implemented determination of used resources in workload
* removed logging statements
* implemented reading from workload
* fixed naming for host-level allocation
* fixed next deadline calculation
* fixed forwarding supply
* reduced memory footprint
* made GPU powermodel nullable
* maded Gpu powermodel configurable in topology
* implemented tests for basic gpu scheduler
* added gpu properties
* implemented weights, filter and simple cpu-gpu scheduler
* spotless apply
* spotless apply pt. 2
* fixed capitalization
* spotless kotlin run
* implemented coloumn export
* todo update
* removed code comments
* Merged PerformanceCounter classes into one & removed interface
* removed GPU specific powermodel
* Rebase master: kept both versions of TopologyFactories
* renamed CpuPowermodel to resource independent Powermodel
Moved it from Cpu package to power package
* implementated default of getResourceType & removed overrides if possible
* split getResourceType into Consumer and Supplier
* added power as resource type
* reduced supply demand from arrayList to single value
* combining GPUs into one large GPU, until full multi-gpu support
* merged distribution policy enum with corresponding factory
* added comment
* post-rebase fixes
* aligned naming
* Added GPU metrics to task output
* Updates power resource type to uppercase.
Standardizes the `ResourceType.Power` enum to `ResourceType.POWER`
for consistency with other resource types and improved readability.
* Removes deprecated test assertions
Removes commented-out assertions in GPU tests.
These assertions are no longer needed and clutter the test code.
* Renames MaxMinFairnessStrategy to Policy
Renames MaxMinFairnessStrategy to MaxMinFairnessPolicy for
clarity and consistency with naming conventions. This change
affects the factory and distributor to use the updated name.
* applies spotless
* nulls GPUs as it is not used
Diffstat (limited to 'opendc-compute/opendc-compute-topology/src')
3 files changed, 66 insertions, 13 deletions
diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt index b6c945d2..b52608a9 100644 --- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt +++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt @@ -31,10 +31,13 @@ import org.opendc.compute.topology.specs.HostJSONSpec import org.opendc.compute.topology.specs.HostSpec import org.opendc.compute.topology.specs.PowerSourceSpec import org.opendc.compute.topology.specs.TopologySpec -import org.opendc.simulator.compute.cpu.getPowerModel import org.opendc.simulator.compute.models.CpuModel +import org.opendc.simulator.compute.models.GpuModel import org.opendc.simulator.compute.models.MachineModel import org.opendc.simulator.compute.models.MemoryUnit +import org.opendc.simulator.compute.power.getPowerModel +import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicyFactory +import org.opendc.simulator.engine.graph.distributionPolicies.DistributionPolicyFactory.DistributionPolicyType import java.io.File import java.io.InputStream @@ -166,29 +169,63 @@ private fun HostJSONSpec.toHostSpec(clusterName: String): HostSpec { } val unknownMemoryUnit = MemoryUnit(memory.vendor, memory.modelName, memory.memorySpeed.toMHz(), memory.memorySize.toMiB().toLong()) + val gpuUnits = + List(gpu?.count ?: 0) { + GpuModel( + globalCoreId++, + gpu!!.coreCount, + gpu.coreSpeed.toMHz(), + gpu.memoryBandwidth.toKibps(), + gpu.memorySize.toMiB().toLong(), + gpu.vendor, + gpu.modelName, + gpu.architecture, + ) + } + val machineModel = MachineModel( units, unknownMemoryUnit, + gpuUnits, + // TODO: Pass through + DistributionPolicyFactory.getDistributionStrategy(DistributionPolicyType.MaxMinFairness), + DistributionPolicyFactory.getDistributionStrategy(DistributionPolicyType.MaxMinFairness), ) - val powerModel = + val cpuPowerModel = getPowerModel( - powerModel.modelType, - powerModel.power.toWatts(), - powerModel.maxPower.toWatts(), - powerModel.idlePower.toWatts(), - powerModel.calibrationFactor, - powerModel.asymUtil, - powerModel.dvfs, + cpuPowerModel.modelType, + cpuPowerModel.power.toWatts(), + cpuPowerModel.maxPower.toWatts(), + cpuPowerModel.idlePower.toWatts(), + cpuPowerModel.calibrationFactor, + cpuPowerModel.asymUtil, + cpuPowerModel.dvfs, ) + val gpuPowerModel = + if (gpuUnits.isEmpty()) { + null + } else { + getPowerModel( + gpuPowerModel.modelType, + gpuPowerModel.power.toWatts(), + gpuPowerModel.maxPower.toWatts(), + gpuPowerModel.idlePower.toWatts(), + gpuPowerModel.calibrationFactor, + gpuPowerModel.asymUtil, + gpuPowerModel.dvfs, + ) + } + val hostSpec = HostSpec( createUniqueName(this.name, hostNames), clusterName, machineModel, - powerModel, + cpuPowerModel, + gpuPowerModel, ) return hostSpec } diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt index e4ec89e1..30a75896 100644 --- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt +++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/HostSpec.kt @@ -22,8 +22,8 @@ package org.opendc.compute.topology.specs -import org.opendc.simulator.compute.cpu.CpuPowerModel import org.opendc.simulator.compute.models.MachineModel +import org.opendc.simulator.compute.power.PowerModel /** * Description of a physical host that will be simulated by OpenDC and host the virtual machines. @@ -36,7 +36,8 @@ public data class HostSpec( val name: String, val clusterName: String, val model: MachineModel, - val cpuPowerModel: CpuPowerModel, + val cpuPowerModel: PowerModel, + val gpuPowerModel: PowerModel?, val embodiedCarbon: Double = 1000.0, val expectedLifetime: Double = 5.0, ) diff --git a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt index 8cbf818b..62c3906a 100644 --- a/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt +++ b/opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt @@ -24,6 +24,7 @@ package org.opendc.compute.topology.specs import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable +import org.opendc.common.units.DataRate import org.opendc.common.units.DataSize import org.opendc.common.units.Frequency import org.opendc.common.units.Power @@ -76,7 +77,9 @@ public data class HostJSONSpec( val cpu: CPUJSONSpec, val count: Int = 1, val memory: MemoryJSONSpec, - val powerModel: PowerModelSpec = PowerModelSpec.DFLT, + val gpu: GPUJSONSpec? = null, + val cpuPowerModel: PowerModelSpec = PowerModelSpec.DFLT, + val gpuPowerModel: PowerModelSpec = PowerModelSpec.DFLT, ) /** @@ -118,6 +121,18 @@ public data class MemoryJSONSpec( ) @Serializable +public data class GPUJSONSpec( + val count: Int = 1, + val coreCount: Int, + val coreSpeed: Frequency, + val memorySize: DataSize = DataSize.ofMiB(-1), + val memoryBandwidth: DataRate = DataRate.ofKibps(-1), + val vendor: String = "unknown", + val modelName: String = "unknown", + val architecture: String = "unknown", +) + +@Serializable public data class PowerModelSpec( val modelType: String, val power: Power = Power.ofWatts(400), |
