From a735f1768677fc996da77b239819c55dcd623f5e Mon Sep 17 00:00:00 2001 From: Niels Thiele Date: Mon, 15 Sep 2025 15:34:38 +0200 Subject: Implements fixes to run m100 traces with GPUs (#362) * Updated output format to reduce size * using sum of gpu capacities instead of single max * passing provisioned GPU cores to host view * fix supply update trigger * fixing floating point error, leading to negative demand * fixing double mismatch, due to floating point in precision * adding additional check if demand can be satisfied in the simple way * adds workload invalidation if remaining duration for all resources is 0 * invalidating flow distributors after demand update * spotless apply * updating tests * exporting power consumption of compute resources directly from gpu instead of PSU * using big decimal to avoid floating point in-precision * rolls back to pass-through version of PSU, before GPU implementation * places flowdistributor between PSU and compute resources * adds check to avoid null exception if supply is pushed without demand * fixing task id type * Adds memorizing GPU scheduler * adds boundary for negative remaining work * implemented tests for GPU scheduler filter * Revert "Updated output format to reduce size" This reverts commit 7171de8e0512a863df4962f64560ac7bad1fb48d. * spotless aply --------- Co-authored-by: DanteNiewenhuis --- .../experiments/base/DistributionPoliciesTest.kt | 40 +++++++++++----------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'opendc-experiments/opendc-experiments-base/src/test/kotlin') diff --git a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/DistributionPoliciesTest.kt b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/DistributionPoliciesTest.kt index 256c067d..730f9fd0 100644 --- a/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/DistributionPoliciesTest.kt +++ b/opendc-experiments/opendc-experiments-base/src/test/kotlin/org/opendc/experiments/base/DistributionPoliciesTest.kt @@ -537,16 +537,16 @@ class DistributionPoliciesTest { // Best effort should distribute proportionally based on demand while using round-robin assertAll( // Task 0 - { assertEquals(3000.0, monitor.taskGpuDemands[0]?.get(1), "Task 0 GPU demand should be 3000.0") }, - { assertEquals(3000.0, monitor.taskGpuSupplied[0]?.get(1), "Task 0 GPU supply should be 1000.0") }, + { assertEquals(3000.0, monitor.taskGpuDemands[0]?.get(0), "Task 0 GPU demand should be 3000.0") }, + { assertEquals(3000.0, monitor.taskGpuSupplied[0]?.get(0), "Task 0 GPU supply should be 1000.0") }, // Task 1 - { assertEquals(2500.0, monitor.taskGpuDemands[1]?.get(1), "Task 1 GPU demand should be 2500.0") }, - { assertEquals(1000.0, monitor.taskGpuSupplied[1]?.get(1), "Task 0 GPU supply should be 1000.0") }, + { assertEquals(2500.0, monitor.taskGpuDemands[1]?.get(0), "Task 1 GPU demand should be 2500.0") }, + { assertEquals(1000.0, monitor.taskGpuSupplied[1]?.get(0), "Task 1 GPU supply should be 1000.0") }, // Host - { assertEquals(2750.0, monitor.hostGpuDemands["DualGpuHost"]?.get(1)?.get(0), "GPU 0 demand at host should be 2000.0") }, - { assertEquals(2000.0, monitor.hostGpuSupplied["DualGpuHost"]?.get(1)?.get(0), "GPU 0 supplied at host should be 2000.0") }, - { assertEquals(2750.0, monitor.hostGpuDemands["DualGpuHost"]?.get(1)?.get(1), "GPU 1 demand at host should be 2000.0") }, - { assertEquals(2000.0, monitor.hostGpuSupplied["DualGpuHost"]?.get(1)?.get(1), "GPU 1 supplied at host should be 2000.0") }, + { assertEquals(2750.0, monitor.hostGpuDemands["DualGpuHost"]?.get(0)?.get(0), "GPU 0 demand at host should be 2000.0") }, + { assertEquals(2000.0, monitor.hostGpuSupplied["DualGpuHost"]?.get(0)?.get(0), "GPU 0 supplied at host should be 2000.0") }, + { assertEquals(2750.0, monitor.hostGpuDemands["DualGpuHost"]?.get(0)?.get(1), "GPU 1 demand at host should be 2000.0") }, + { assertEquals(2000.0, monitor.hostGpuSupplied["DualGpuHost"]?.get(0)?.get(1), "GPU 1 supplied at host should be 2000.0") }, ) } @@ -677,21 +677,21 @@ class DistributionPoliciesTest { // Best effort should distribute fairly among all tasks in a round-robin manner assertAll( // Task Demands at start - { assertEquals(2000.0, monitor.taskGpuDemands[0]?.get(1), "Task 0 demand should be 2000.0") }, - { assertEquals(2000.0, monitor.taskGpuDemands[1]?.get(1), "Task 1 demand should be 2000.0") }, - { assertEquals(2000.0, monitor.taskGpuDemands[2]?.get(1), "Task 2 demand should be 2000.0") }, + { assertEquals(2000.0, monitor.taskGpuDemands[0]?.get(0), "Task 0 demand should be 2000.0") }, + { assertEquals(2000.0, monitor.taskGpuDemands[1]?.get(0), "Task 1 demand should be 2000.0") }, + { assertEquals(2000.0, monitor.taskGpuDemands[2]?.get(0), "Task 2 demand should be 2000.0") }, // Task supplies at start - { assertEquals(2000.0, monitor.taskGpuSupplied[0]?.get(1), "Task 0 supply at the start should be 2000.0") }, - { assertEquals(0.0, monitor.taskGpuSupplied[1]?.get(1), "Task 1 supply at the start should be 2000.0") }, - { assertEquals(2000.0, monitor.taskGpuSupplied[2]?.get(1), "Task 2 supply at the start should be 0.0") }, + { assertEquals(2000.0, monitor.taskGpuSupplied[0]?.get(0), "Task 0 supply at the start should be 2000.0") }, + { assertEquals(2000.0, monitor.taskGpuSupplied[1]?.get(0), "Task 1 supply at the start should be 0.0") }, + { assertEquals(0.0, monitor.taskGpuSupplied[2]?.get(0), "Task 2 supply at the start should be 2000.0") }, // Task supplies second step - { assertEquals(0.0, monitor.taskGpuSupplied[0]?.get(2), "Task 0 supply at the second step should be 2000.0") }, - { assertEquals(2000.0, monitor.taskGpuSupplied[1]?.get(2), "Task 1 supply at the second step should be 0.0") }, - { assertEquals(2000.0, monitor.taskGpuSupplied[2]?.get(2), "Task 2 supply at the second step should be 2000.0") }, + { assertEquals(0.0, monitor.taskGpuSupplied[0]?.get(1), "Task 0 supply at the second step should be 2000.0") }, + { assertEquals(2000.0, monitor.taskGpuSupplied[1]?.get(1), "Task 1 supply at the second step should be 0.0") }, + { assertEquals(2000.0, monitor.taskGpuSupplied[2]?.get(1), "Task 2 supply at the second step should be 2000.0") }, // Task supplies third step - { assertEquals(2000.0, monitor.taskGpuSupplied[0]?.get(3), "Task 0 supply at the third step should be 2000.0") }, - { assertEquals(2000.0, monitor.taskGpuSupplied[1]?.get(3), "Task 1 supply at the third step should be 2000.0") }, - { assertEquals(0.0, monitor.taskGpuSupplied[2]?.get(3), "Task 2 supply at the third step should be 0.0") }, + { assertEquals(2000.0, monitor.taskGpuSupplied[0]?.get(2), "Task 0 supply at the third step should be 2000.0") }, + { assertEquals(0.0, monitor.taskGpuSupplied[1]?.get(2), "Task 1 supply at the third step should be 0.0") }, + { assertEquals(2000.0, monitor.taskGpuSupplied[2]?.get(2), "Task 2 supply at the third step should be 2000.0") }, // Host // At start { assertEquals(3000.0, monitor.hostGpuDemands["DualGpuHost"]?.get(1)?.get(0), "GPU 0 demand at host should be 2000.0") }, -- cgit v1.2.3