From 07743e75891e8b3ebcefe4771f92af8003ef0b1f Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 30 Sep 2022 20:27:43 +0200 Subject: refactor(web/runner): Use experiment base for web runner This change updates the OpenDC web runner to use the new `opendc-experiments-base` module for setting up the experimental environment and simulate the workload. --- opendc-web/opendc-web-runner/build.gradle.kts | 1 + .../kotlin/org/opendc/web/runner/OpenDCRunner.kt | 119 ++++++++++++--------- 2 files changed, 69 insertions(+), 51 deletions(-) (limited to 'opendc-web') diff --git a/opendc-web/opendc-web-runner/build.gradle.kts b/opendc-web/opendc-web-runner/build.gradle.kts index a5723994..6a10267a 100644 --- a/opendc-web/opendc-web-runner/build.gradle.kts +++ b/opendc-web/opendc-web-runner/build.gradle.kts @@ -51,6 +51,7 @@ dependencies { api(projects.opendcWeb.opendcWebClient) implementation(projects.opendcCompute.opendcComputeSimulator) implementation(projects.opendcCompute.opendcComputeWorkload) + implementation(projects.opendcExperiments.opendcExperimentsCompute) implementation(projects.opendcSimulator.opendcSimulatorCore) implementation(projects.opendcTrace.opendcTraceApi) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt index 9a1319b6..cf887f54 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt @@ -23,11 +23,12 @@ package org.opendc.web.runner import mu.KotlinLogging +import org.opendc.compute.service.ComputeService import org.opendc.compute.workload.* -import org.opendc.compute.workload.telemetry.ComputeMetricReader import org.opendc.compute.workload.topology.HostSpec import org.opendc.compute.workload.topology.Topology -import org.opendc.compute.workload.topology.apply +import org.opendc.experiments.compute.* +import org.opendc.experiments.provisioner.Provisioner import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.model.ProcessingNode @@ -74,7 +75,8 @@ public class OpenDCRunner( /** * The [ForkJoinPool] that is used to execute the simulation jobs. */ - private val pool = ForkJoinPool(parallelism, RunnerThreadFactory(Thread.currentThread().contextClassLoader), null, false) + private val pool = + ForkJoinPool(parallelism, RunnerThreadFactory(Thread.currentThread().contextClassLoader), null, false) /** * A [ScheduledExecutorService] to manage the heartbeat of simulation jobs as well as tracking the deadline of @@ -129,11 +131,22 @@ public class OpenDCRunner( val id = job.id val scenario = job.scenario - val heartbeat = scheduler.scheduleWithFixedDelay({ manager.heartbeat(id) }, 0, heartbeatInterval.toMillis(), TimeUnit.MILLISECONDS) + val heartbeat = scheduler.scheduleWithFixedDelay( + { manager.heartbeat(id) }, + 0, + heartbeatInterval.toMillis(), + TimeUnit.MILLISECONDS + ) try { val topology = convertTopology(scenario.topology) - val jobs = (0 until scenario.portfolio.targets.repeats).map { repeat -> SimulationTask(scenario, repeat, topology) } + val jobs = (0 until scenario.portfolio.targets.repeats).map { repeat -> + SimulationTask( + scenario, + repeat, + topology + ) + } val results = invokeAll(jobs).map { it.rawResult } logger.info { "Finished simulation for job $id" } @@ -195,60 +208,64 @@ public class OpenDCRunner( // Schedule task that interrupts the simulation if it runs for too long. val currentThread = Thread.currentThread() - val interruptTask = scheduler.schedule({ currentThread.interrupt() }, jobTimeout.toMillis(), TimeUnit.MILLISECONDS) + val interruptTask = + scheduler.schedule({ currentThread.interrupt() }, jobTimeout.toMillis(), TimeUnit.MILLISECONDS) try { - runBlockingSimulation { - val workloadName = scenario.workload.trace.id - val workloadFraction = scenario.workload.samplingFraction - - val seeder = Random(repeat.toLong()) - - val phenomena = scenario.phenomena - val computeScheduler = createComputeScheduler(scenario.schedulerName, seeder) - val workload = trace(workloadName).sampleByLoad(workloadFraction) - val vms = workload.resolve(workloadLoader, seeder) - - val failureModel = - if (phenomena.failures) - grid5000(Duration.ofDays(7)) - else - null - - val simulator = ComputeServiceHelper( - coroutineContext, - clock, - computeScheduler, - seed = 0L, - ) - val reader = ComputeMetricReader(this, clock, simulator.service, monitor) - - try { - // Instantiate the topology onto the simulator - simulator.apply(topology) - // Run workload trace - simulator.run(vms, failureModel = failureModel, interference = phenomena.interference) - - val serviceMetrics = simulator.service.getSchedulerStats() - logger.debug { - "Scheduler " + - "Success=${serviceMetrics.attemptsSuccess} " + - "Failure=${serviceMetrics.attemptsFailure} " + - "Error=${serviceMetrics.attemptsError} " + - "Pending=${serviceMetrics.serversPending} " + - "Active=${serviceMetrics.serversActive}" - } - } finally { - simulator.close() - reader.close() - } - } + runSimulation(monitor) } finally { interruptTask.cancel(false) } return monitor.collectResults() } + + /** + * Run a single simulation of the scenario. + */ + private fun runSimulation(monitor: WebComputeMonitor) = runBlockingSimulation { + val serviceDomain = "compute.opendc.org" + val seed = repeat.toLong() + + val scenario = scenario + + Provisioner(coroutineContext, clock, seed).use { provisioner -> + provisioner.runSteps( + setupComputeService( + serviceDomain, + { createComputeScheduler(scenario.schedulerName, Random(it.seeder.nextLong())) } + ), + registerComputeMonitor(serviceDomain, monitor), + setupHosts(serviceDomain, topology.resolve()) + ) + + val service = provisioner.registry.resolve(serviceDomain, ComputeService::class.java)!! + + val workload = + trace(scenario.workload.trace.id).sampleByLoad(scenario.workload.samplingFraction) + val vms = workload.resolve(workloadLoader, Random(seed)) + + val phenomena = scenario.phenomena + val failureModel = + if (phenomena.failures) + grid5000(Duration.ofDays(7)) + else + null + + // Run workload trace + service.replay(clock, vms, seed, failureModel = failureModel, interference = phenomena.interference) + + val serviceMetrics = service.getSchedulerStats() + logger.debug { + "Scheduler " + + "Success=${serviceMetrics.attemptsSuccess} " + + "Failure=${serviceMetrics.attemptsFailure} " + + "Error=${serviceMetrics.attemptsError} " + + "Pending=${serviceMetrics.serversPending} " + + "Active=${serviceMetrics.serversActive}" + } + } + } } /** -- cgit v1.2.3 From 448b4cafe3c757812138a8ca7580975191ac2f9c Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 30 Sep 2022 20:57:16 +0200 Subject: refactor(exp/compute): Integrate compute workload classes This change integrates the classes from the old `opendc-compute-workload` module into the `opendc-experiments-compute` module. This new module contains helper classes for setting up experiments with the OpenDC compute service. --- opendc-web/opendc-web-runner/build.gradle.kts | 2 -- .../src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt | 5 ++--- .../main/kotlin/org/opendc/web/runner/internal/WebComputeMonitor.kt | 6 +++--- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'opendc-web') diff --git a/opendc-web/opendc-web-runner/build.gradle.kts b/opendc-web/opendc-web-runner/build.gradle.kts index 6a10267a..2679a97f 100644 --- a/opendc-web/opendc-web-runner/build.gradle.kts +++ b/opendc-web/opendc-web-runner/build.gradle.kts @@ -49,8 +49,6 @@ val cliJar by tasks.creating(Jar::class) { dependencies { api(projects.opendcWeb.opendcWebClient) - implementation(projects.opendcCompute.opendcComputeSimulator) - implementation(projects.opendcCompute.opendcComputeWorkload) implementation(projects.opendcExperiments.opendcExperimentsCompute) implementation(projects.opendcSimulator.opendcSimulatorCore) implementation(projects.opendcTrace.opendcTraceApi) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt index cf887f54..bf0bb8f2 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt @@ -24,10 +24,9 @@ package org.opendc.web.runner import mu.KotlinLogging import org.opendc.compute.service.ComputeService -import org.opendc.compute.workload.* -import org.opendc.compute.workload.topology.HostSpec -import org.opendc.compute.workload.topology.Topology import org.opendc.experiments.compute.* +import org.opendc.experiments.compute.topology.HostSpec +import org.opendc.experiments.compute.topology.Topology import org.opendc.experiments.provisioner.Provisioner import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/WebComputeMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/WebComputeMonitor.kt index 4c3d1cfa..76377c08 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/WebComputeMonitor.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/WebComputeMonitor.kt @@ -22,9 +22,9 @@ package org.opendc.web.runner.internal -import org.opendc.compute.workload.telemetry.ComputeMonitor -import org.opendc.compute.workload.telemetry.table.HostTableReader -import org.opendc.compute.workload.telemetry.table.ServiceTableReader +import org.opendc.experiments.compute.telemetry.ComputeMonitor +import org.opendc.experiments.compute.telemetry.table.HostTableReader +import org.opendc.experiments.compute.telemetry.table.ServiceTableReader import kotlin.math.max import kotlin.math.roundToLong -- cgit v1.2.3 From bd476d11ab24fe745bb54e97a11133706bb96cb1 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 3 Oct 2022 17:20:34 +0200 Subject: refactor(exp/compute): Remove Topology interface This change removes the Topology interface from the `opendc-experiments-compute` module, which was meant for provisioning the experimental topology. Howerver, with the stateless `HostSpec` class, it is not needed to resolve the topology everytime. --- .../kotlin/org/opendc/web/runner/OpenDCRunner.kt | 102 ++++++++++----------- 1 file changed, 48 insertions(+), 54 deletions(-) (limited to 'opendc-web') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt index bf0bb8f2..74f7c8c1 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt @@ -26,7 +26,6 @@ import mu.KotlinLogging import org.opendc.compute.service.ComputeService import org.opendc.experiments.compute.* import org.opendc.experiments.compute.topology.HostSpec -import org.opendc.experiments.compute.topology.Topology import org.opendc.experiments.provisioner.Provisioner import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit @@ -37,6 +36,7 @@ import org.opendc.simulator.compute.power.SimplePowerDriver import org.opendc.simulator.core.runBlockingSimulation import org.opendc.web.proto.runner.Job import org.opendc.web.proto.runner.Scenario +import org.opendc.web.proto.runner.Topology import org.opendc.web.runner.internal.WebComputeMonitor import java.io.File import java.time.Duration @@ -200,7 +200,7 @@ public class OpenDCRunner( private inner class SimulationTask( private val scenario: Scenario, private val repeat: Int, - private val topology: Topology, + private val topology: List, ) : RecursiveTask() { override fun compute(): WebComputeMonitor.Results { val monitor = WebComputeMonitor() @@ -235,7 +235,7 @@ public class OpenDCRunner( { createComputeScheduler(scenario.schedulerName, Random(it.seeder.nextLong())) } ), registerComputeMonitor(serviceDomain, monitor), - setupHosts(serviceDomain, topology.resolve()) + setupHosts(serviceDomain, topology) ) val service = provisioner.registry.resolve(serviceDomain, ComputeService::class.java)!! @@ -270,62 +270,56 @@ public class OpenDCRunner( /** * Convert the specified [topology] into an [Topology] understood by OpenDC. */ - private fun convertTopology(topology: org.opendc.web.proto.runner.Topology): Topology { - return object : Topology { - - override fun resolve(): List { - val res = mutableListOf() - val random = Random(0) - - val machines = topology.rooms.asSequence() - .flatMap { room -> - room.tiles.flatMap { tile -> - val rack = tile.rack - rack?.machines?.map { machine -> rack to machine } ?: emptyList() - } - } - for ((rack, machine) in machines) { - val clusterId = rack.id - val position = machine.position - - val processors = machine.cpus.flatMap { cpu -> - val cores = cpu.numberOfCores - val speed = cpu.clockRateMhz - // TODO Remove hard coding of vendor - val node = ProcessingNode("Intel", "amd64", cpu.name, cores) - List(cores) { coreId -> - ProcessingUnit(node, coreId, speed) - } - } - val memoryUnits = machine.memory.map { memory -> - MemoryUnit( - "Samsung", - memory.name, - memory.speedMbPerS, - memory.sizeMb.toLong() - ) - } - - val energyConsumptionW = machine.cpus.sumOf { it.energyConsumptionW } - val powerModel = LinearPowerModel(2 * energyConsumptionW, energyConsumptionW * 0.5) - val powerDriver = SimplePowerDriver(powerModel) - - val spec = HostSpec( - UUID(random.nextLong(), random.nextLong()), - "node-$clusterId-$position", - mapOf("cluster" to clusterId), - MachineModel(processors, memoryUnits), - powerDriver - ) - - res += spec + private fun convertTopology(topology: Topology): List { + val res = mutableListOf() + val random = Random(0) + + val machines = topology.rooms.asSequence() + .flatMap { room -> + room.tiles.flatMap { tile -> + val rack = tile.rack + rack?.machines?.map { machine -> rack to machine } ?: emptyList() } + } - return res + for ((rack, machine) in machines) { + val clusterId = rack.id + val position = machine.position + + val processors = machine.cpus.flatMap { cpu -> + val cores = cpu.numberOfCores + val speed = cpu.clockRateMhz + // TODO Remove hard coding of vendor + val node = ProcessingNode("Intel", "amd64", cpu.name, cores) + List(cores) { coreId -> + ProcessingUnit(node, coreId, speed) + } + } + val memoryUnits = machine.memory.map { memory -> + MemoryUnit( + "Samsung", + memory.name, + memory.speedMbPerS, + memory.sizeMb.toLong() + ) } - override fun toString(): String = "WebRunnerTopologyFactory" + val energyConsumptionW = machine.cpus.sumOf { it.energyConsumptionW } + val powerModel = LinearPowerModel(2 * energyConsumptionW, energyConsumptionW * 0.5) + val powerDriver = SimplePowerDriver(powerModel) + + val spec = HostSpec( + UUID(random.nextLong(), random.nextLong()), + "node-$clusterId-$position", + mapOf("cluster" to clusterId), + MachineModel(processors, memoryUnits), + powerDriver + ) + + res += spec } + + return res } /** -- cgit v1.2.3