From 0203254b709614fa732c114aa25916f61b8b3275 Mon Sep 17 00:00:00 2001
From: Niels Thiele <noleu66@posteo.net>
Date: Sun, 22 Jun 2025 12:31:21 +0200
Subject: Implemented Single GPU Support & outline of host-level allocation
 policies (#342)

* renamed performance counter to distinguish different resource types

* added GPU, modelled similar to CPU

* added GPUs to machine model

* list of GPUs instead of single instance

* renamed memory speed to bandwidth

* enabled parsing of GPU resources

* split powermodel into cpu and GPU powermodel

* added gpu parsing tests

* added idea of host level scheduling

* added tests for multi gpu parsing

* renamed powermodel to cpupowermodel

* clarified naming of cpu and gpu components

* added resource type to flow suplier and edge

* added resourcetype

* added GPU components and resource type to fragments

* added GPU to workload and updated resource usage retrieval

* implemented first version of multi resource

* added name to workload

* renamed perfomance counters

* removed commented out code

* removed deprecated comments

* included demand and supply into calculations

* resolving rebase mismatches

* moved resource type from flowedge class to common package

* added available resources to machinees

* cleaner separation if workload is started of simmachine or vm

* Replaced exception with dedicated enum

* Only looping over resources that are actually used

* using hashmaps to handle resourcetype instead of arrays for readability

* fixed condition

* tracking finished workloads per resource type

* removed resource type from flowedge

* made supply and demand distribution resource specific

* added power model for GPU

* removed unused test setup

* removed depracated comments

* removed unused parameter

* added ID for GPU

* added GPUs and GPU performance counters (naively)

* implemented capturing of GPU statistics

* added reminders for future implementations

* renamed properties for better identification

* added capturing GPU statistics

* implemented first tests for GPUs

* unified access to performance counters

* added interface for general compute resource handling

* implemented multi resource support in simmachine

* added individual edge to VM per resource

* extended compute resource interface

* implemented multi-resource support in PSU

* implemented generic retrieval of computeresources

* implemented mult-resource suppport in vm

* made method use more resource specific

* implemented simple GPU tests

* rolled back frquency and demand use

* made naming independent of used resource

* using workloads resources instead of VMs to determine available resource

* implemented determination of used resources in workload

* removed logging statements

* implemented reading from workload

* fixed naming for host-level allocation

* fixed next deadline calculation

* fixed forwarding supply

* reduced memory footprint

* made GPU powermodel nullable

* maded Gpu powermodel configurable in topology

* implemented tests for basic gpu scheduler

* added gpu properties

* implemented weights, filter and simple cpu-gpu scheduler

* spotless apply

* spotless apply pt. 2

* fixed capitalization

* spotless kotlin run

* implemented coloumn export

* todo update

* removed code comments

* Merged PerformanceCounter classes into one & removed interface

* removed GPU  specific powermodel

* Rebase master: kept both versions of TopologyFactories

* renamed CpuPowermodel to resource independent Powermodel

Moved it from Cpu package to power package

* implementated default of getResourceType & removed overrides if possible

* split getResourceType into Consumer and Supplier

* added power as resource type

* reduced supply demand from arrayList to single value

* combining GPUs into one large GPU, until full multi-gpu support

* merged distribution policy enum with corresponding factory

* added comment

* post-rebase fixes

* aligned naming

* Added GPU metrics to task output

* Updates power resource type to uppercase.

Standardizes the `ResourceType.Power` enum to `ResourceType.POWER`
for consistency with other resource types and improved readability.

* Removes deprecated test assertions

Removes commented-out assertions in GPU tests.

These assertions are no longer needed and clutter the test code.

* Renames MaxMinFairnessStrategy to Policy

Renames MaxMinFairnessStrategy to MaxMinFairnessPolicy for
clarity and consistency with naming conventions. This change
affects the factory and distributor to use the updated name.

* applies spotless

* nulls GPUs as it is not used
---
 .../src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt          | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'opendc-web')

diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt
index 406c9772..309763f1 100644
--- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt
+++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt
@@ -36,10 +36,10 @@ import org.opendc.compute.topology.specs.HostSpec
 import org.opendc.compute.topology.specs.PowerSourceSpec
 import org.opendc.compute.workload.ComputeWorkloadLoader
 import org.opendc.experiments.base.runner.replay
-import org.opendc.simulator.compute.cpu.CpuPowerModels
 import org.opendc.simulator.compute.models.CpuModel
 import org.opendc.simulator.compute.models.MachineModel
 import org.opendc.simulator.compute.models.MemoryUnit
+import org.opendc.simulator.compute.power.PowerModels
 import org.opendc.simulator.kotlin.runSimulation
 import org.opendc.web.proto.runner.Job
 import org.opendc.web.proto.runner.Scenario
@@ -353,14 +353,15 @@ public class OpenDCRunner(
                 }
 
             val energyConsumptionW = machine.cpus.sumOf { it.energyConsumptionW }
-            val powerModel = CpuPowerModels.linear(2 * energyConsumptionW, energyConsumptionW * 0.5)
+            val cpuPowerModel = PowerModels.linear(2 * energyConsumptionW, energyConsumptionW * 0.5)
 
             val spec =
                 HostSpec(
                     "node-$clusterId-$position",
                     clusterId,
                     MachineModel(processors, memoryUnits[0]),
-                    powerModel,
+                    cpuPowerModel,
+                    null,
                 )
 
             res += spec
-- 
cgit v1.2.3