From cc9310efad6177909ff2f7415384d7c393383106 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 5 May 2021 23:14:56 +0200 Subject: chore: Address deprecations due to Kotlin 1.5 This change addresses the deprecations that were caused by the migration to Kotlin 1.5. --- .../src/main/kotlin/org/opendc/runner/web/TopologyParser.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt index 2dd63340..e96a681d 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt @@ -73,7 +73,7 @@ public class TopologyParser(private val collection: MongoCollection) { ) } - val energyConsumptionW = machine.getList("cpus", Document::class.java).sumBy { it.getInteger("energyConsumptionW") }.toDouble() + val energyConsumptionW = machine.getList("cpus", Document::class.java).sumOf { it.getInteger("energyConsumptionW") }.toDouble() nodes.add( MachineDef( -- cgit v1.2.3 From b29f90e5ad5bcac29cde86e56c06e0b65a52cedc Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 21 Jun 2021 20:57:06 +0200 Subject: simulator: Re-organize compute simulator module This change re-organizes the classes of the compute simulator module to make a clearer distinction between the hardware, firmware and software interfaces in this module. --- .../src/main/kotlin/org/opendc/runner/web/TopologyParser.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt index e96a681d..2135ee1d 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt @@ -32,7 +32,7 @@ import org.bson.Document import org.bson.types.ObjectId import org.opendc.format.environment.EnvironmentReader import org.opendc.format.environment.MachineDef -import org.opendc.simulator.compute.SimMachineModel +import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit @@ -80,7 +80,7 @@ public class TopologyParser(private val collection: MongoCollection) { UUID(random.nextLong(), random.nextLong()), "node-$clusterId-$position", mapOf("cluster" to clusterId), - SimMachineModel(processors, memoryUnits), + MachineModel(processors, memoryUnits), LinearPowerModel(2 * energyConsumptionW, energyConsumptionW * 0.5) ) ) -- cgit v1.2.3 From be34a55c2c2fe94a6883c6b97d2abe4c43288e8a Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 23 Jun 2021 16:54:31 +0200 Subject: format: Remove performance interference from trace readers This change updates the trace reader implementation to remove their dependency on the performance interference model. In a future commit, we will instead pass the performance interference model via the host/hypervisor. --- .../src/main/kotlin/org/opendc/runner/web/Main.kt | 28 +++++----------------- 1 file changed, 6 insertions(+), 22 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt index 09f7de35..f5d0b65e 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt @@ -47,10 +47,9 @@ import org.opendc.compute.service.scheduler.filters.ComputeFilter import org.opendc.compute.service.scheduler.weights.* import org.opendc.experiments.capelin.* import org.opendc.experiments.capelin.model.Workload -import org.opendc.experiments.capelin.trace.Sc20ParquetTraceReader -import org.opendc.experiments.capelin.trace.Sc20RawParquetTraceReader +import org.opendc.experiments.capelin.trace.ParquetTraceReader +import org.opendc.experiments.capelin.trace.RawParquetTraceReader import org.opendc.format.environment.EnvironmentReader -import org.opendc.format.trace.sc20.Sc20PerformanceInterferenceReader import org.opendc.simulator.core.runBlockingSimulation import org.opendc.telemetry.sdk.toOtelClock import java.io.File @@ -167,19 +166,7 @@ public class RunnerCli : CliktCommand(name = "runner") { tracePath, scenario.getEmbedded(listOf("trace", "traceId"), String::class.java) ) - val traceReader = Sc20RawParquetTraceReader(traceDir) - val performanceInterferenceReader = let { - val path = File(traceDir, "performance-interference-model.json") - val operational = scenario.get("operational", Document::class.java) - val enabled = operational.getBoolean("performanceInterferenceEnabled") - - if (!enabled || !path.exists()) { - return@let null - } - - path.inputStream().use { Sc20PerformanceInterferenceReader(it) } - } - + val traceReader = RawParquetTraceReader(traceDir) val targets = portfolio.get("targets", Document::class.java) val topologyId = scenario.getEmbedded(listOf("topology", "topologyId"), ObjectId::class.java) val environment = topologyParser.read(topologyId) @@ -187,7 +174,7 @@ public class RunnerCli : CliktCommand(name = "runner") { val results = (0 until targets.getInteger("repeatsPerScenario")).map { logger.info { "Starting repeat $it" } withTimeout(runTimeout * 1000) { - runRepeat(scenario, it, environment, traceReader, performanceInterferenceReader) + runRepeat(scenario, it, environment, traceReader) } } @@ -203,8 +190,7 @@ public class RunnerCli : CliktCommand(name = "runner") { scenario: Document, repeat: Int, environment: EnvironmentReader, - traceReader: Sc20RawParquetTraceReader, - performanceInterferenceReader: Sc20PerformanceInterferenceReader? + traceReader: RawParquetTraceReader, ): WebExperimentMonitor.Result { val monitor = WebExperimentMonitor() @@ -267,10 +253,8 @@ public class RunnerCli : CliktCommand(name = "runner") { else -> throw IllegalArgumentException("Unknown policy $policyName") } - val performanceInterferenceModel = performanceInterferenceReader?.construct(seeder) ?: emptyMap() - val trace = Sc20ParquetTraceReader( + val trace = ParquetTraceReader( listOf(traceReader), - performanceInterferenceModel, Workload(workloadName, workloadFraction), seed ) -- cgit v1.2.3 From e56967a29ac2b2d26cc085b1f3e27096dad6a170 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 24 Jun 2021 12:54:52 +0200 Subject: simulator: Re-implement performance interference model This change updates reimplements the performance interference model to work on top of the universal resource model in `opendc-simulator-resources`. This enables us to model interference and performance variability of other resources such as disk or network in the future. --- .../src/main/kotlin/org/opendc/runner/web/Main.kt | 29 +++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt index f5d0b65e..d0b97d90 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt @@ -48,12 +48,15 @@ import org.opendc.compute.service.scheduler.weights.* import org.opendc.experiments.capelin.* import org.opendc.experiments.capelin.model.Workload import org.opendc.experiments.capelin.trace.ParquetTraceReader +import org.opendc.experiments.capelin.trace.PerformanceInterferenceReader import org.opendc.experiments.capelin.trace.RawParquetTraceReader import org.opendc.format.environment.EnvironmentReader +import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel import org.opendc.simulator.core.runBlockingSimulation import org.opendc.telemetry.sdk.toOtelClock import java.io.File import kotlin.random.Random +import kotlin.random.asJavaRandom private val logger = KotlinLogging.logger {} @@ -61,7 +64,7 @@ private val logger = KotlinLogging.logger {} * Represents the CLI command for starting the OpenDC web runner. */ @OptIn(ExperimentalCoroutinesApi::class) -public class RunnerCli : CliktCommand(name = "runner") { +class RunnerCli : CliktCommand(name = "runner") { /** * The name of the database to use. */ @@ -167,14 +170,27 @@ public class RunnerCli : CliktCommand(name = "runner") { scenario.getEmbedded(listOf("trace", "traceId"), String::class.java) ) val traceReader = RawParquetTraceReader(traceDir) + val interferenceGroups = let { + val path = File(traceDir, "performance-interference-model.json") + val operational = scenario.get("operational", Document::class.java) + val enabled = operational.getBoolean("performanceInterferenceEnabled") + + if (!enabled || !path.exists()) { + return@let null + } + + PerformanceInterferenceReader(path.inputStream()).use { reader -> reader.read() } + } + val targets = portfolio.get("targets", Document::class.java) val topologyId = scenario.getEmbedded(listOf("topology", "topologyId"), ObjectId::class.java) val environment = topologyParser.read(topologyId) - val results = (0 until targets.getInteger("repeatsPerScenario")).map { - logger.info { "Starting repeat $it" } + val results = (0 until targets.getInteger("repeatsPerScenario")).map { repeat -> + logger.info { "Starting repeat $repeat" } withTimeout(runTimeout * 1000) { - runRepeat(scenario, it, environment, traceReader) + val interferenceModel = interferenceGroups?.let { VmInterferenceModel(it, Random(repeat.toLong()).asJavaRandom()) } + runRepeat(scenario, repeat, environment, traceReader, interferenceModel) } } @@ -191,6 +207,7 @@ public class RunnerCli : CliktCommand(name = "runner") { repeat: Int, environment: EnvironmentReader, traceReader: RawParquetTraceReader, + interferenceModel: VmInterferenceModel? ): WebExperimentMonitor.Result { val monitor = WebExperimentMonitor() @@ -260,7 +277,7 @@ public class RunnerCli : CliktCommand(name = "runner") { ) val failureFrequency = if (operational.getBoolean("failuresEnabled", false)) 24.0 * 7 else 0.0 - withComputeService(clock, meterProvider, environment, allocationPolicy) { scheduler -> + withComputeService(clock, meterProvider, environment, allocationPolicy, interferenceModel) { scheduler -> val failureDomain = if (failureFrequency > 0) { logger.debug { "ENABLING failures" } createFailureDomain( @@ -361,4 +378,4 @@ public class RunnerCli : CliktCommand(name = "runner") { /** * Main entry point of the runner. */ -public fun main(args: Array): Unit = RunnerCli().main(args) +fun main(args: Array): Unit = RunnerCli().main(args) -- cgit v1.2.3 From 6752b6d50faab447b3edc13bddf14f53401392f1 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 2 Jul 2021 17:52:12 +0200 Subject: runner: Use public API for scheduling simulation jobs This change updates the web runner to not require direct database access for scheduling simulation jobs. Instead, the runner polls the public REST API for available jobs and reports its results through there. --- .../src/main/kotlin/org/opendc/runner/web/Main.kt | 381 ------------------- .../org/opendc/runner/web/ScenarioManager.kt | 115 ------ .../kotlin/org/opendc/runner/web/TopologyParser.kt | 126 ------- .../org/opendc/runner/web/WebExperimentMonitor.kt | 189 ---------- .../main/kotlin/org/opendc/web/client/ApiClient.kt | 179 +++++++++ .../main/kotlin/org/opendc/web/client/ApiResult.kt | 43 +++ .../org/opendc/web/client/AuthConfiguration.kt | 32 ++ .../main/kotlin/org/opendc/web/client/model/Job.kt | 38 ++ .../kotlin/org/opendc/web/client/model/Machine.kt | 42 +++ .../org/opendc/web/client/model/MemoryUnit.kt | 37 ++ .../web/client/model/OperationalPhenomena.kt | 32 ++ .../org/opendc/web/client/model/Portfolio.kt | 38 ++ .../opendc/web/client/model/PortfolioTargets.kt | 28 ++ .../org/opendc/web/client/model/ProcessingUnit.kt | 37 ++ .../kotlin/org/opendc/web/client/model/Rack.kt | 39 ++ .../kotlin/org/opendc/web/client/model/Room.kt | 37 ++ .../kotlin/org/opendc/web/client/model/RoomTile.kt | 38 ++ .../kotlin/org/opendc/web/client/model/Scenario.kt | 39 ++ .../opendc/web/client/model/ScenarioTopology.kt | 28 ++ .../org/opendc/web/client/model/ScenarioTrace.kt | 28 ++ .../org/opendc/web/client/model/SimulationState.kt | 30 ++ .../kotlin/org/opendc/web/client/model/Topology.kt | 38 ++ .../src/main/kotlin/org/opendc/web/runner/Main.kt | 404 +++++++++++++++++++++ .../org/opendc/web/runner/ScenarioManager.kt | 86 +++++ .../org/opendc/web/runner/WebExperimentMonitor.kt | 189 ++++++++++ .../src/main/resources/log4j2.xml | 2 +- .../kotlin/org/opendc/web/client/ApiClientTest.kt | 264 ++++++++++++++ 27 files changed, 1727 insertions(+), 812 deletions(-) delete mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt delete mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/ScenarioManager.kt delete mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt delete mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/WebExperimentMonitor.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/ApiClient.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/ApiResult.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/AuthConfiguration.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Job.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Machine.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/MemoryUnit.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/OperationalPhenomena.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Portfolio.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/PortfolioTargets.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ProcessingUnit.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Rack.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Room.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/RoomTile.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Scenario.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ScenarioTopology.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ScenarioTrace.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/SimulationState.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Topology.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt create mode 100644 opendc-web/opendc-web-runner/src/test/kotlin/org/opendc/web/client/ApiClientTest.kt (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt deleted file mode 100644 index d0b97d90..00000000 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/Main.kt +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2020 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.runner.web - -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.* -import com.github.ajalt.clikt.parameters.types.file -import com.github.ajalt.clikt.parameters.types.int -import com.github.ajalt.clikt.parameters.types.long -import com.mongodb.MongoClientSettings -import com.mongodb.MongoCredential -import com.mongodb.ServerAddress -import com.mongodb.client.MongoClients -import com.mongodb.client.MongoDatabase -import com.mongodb.client.model.Filters -import io.opentelemetry.api.metrics.MeterProvider -import io.opentelemetry.sdk.metrics.SdkMeterProvider -import io.opentelemetry.sdk.metrics.export.MetricProducer -import kotlinx.coroutines.* -import kotlinx.coroutines.channels.Channel -import mu.KotlinLogging -import org.bson.Document -import org.bson.types.ObjectId -import org.opendc.compute.service.scheduler.FilterScheduler -import org.opendc.compute.service.scheduler.filters.ComputeCapabilitiesFilter -import org.opendc.compute.service.scheduler.filters.ComputeFilter -import org.opendc.compute.service.scheduler.weights.* -import org.opendc.experiments.capelin.* -import org.opendc.experiments.capelin.model.Workload -import org.opendc.experiments.capelin.trace.ParquetTraceReader -import org.opendc.experiments.capelin.trace.PerformanceInterferenceReader -import org.opendc.experiments.capelin.trace.RawParquetTraceReader -import org.opendc.format.environment.EnvironmentReader -import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel -import org.opendc.simulator.core.runBlockingSimulation -import org.opendc.telemetry.sdk.toOtelClock -import java.io.File -import kotlin.random.Random -import kotlin.random.asJavaRandom - -private val logger = KotlinLogging.logger {} - -/** - * Represents the CLI command for starting the OpenDC web runner. - */ -@OptIn(ExperimentalCoroutinesApi::class) -class RunnerCli : CliktCommand(name = "runner") { - /** - * The name of the database to use. - */ - private val mongoDb by option( - "--mongo-db", - help = "name of the database to use", - envvar = "OPENDC_DB" - ) - .default("opendc") - - /** - * The database host to connect to. - */ - private val mongoHost by option( - "--mongo-host", - help = "database host to connect to", - envvar = "OPENDC_DB_HOST" - ) - .default("localhost") - - /** - * The database port to connect to. - */ - private val mongoPort by option( - "--mongo-port", - help = "database port to connect to", - envvar = "OPENDC_DB_PORT" - ) - .int() - .default(27017) - - /** - * The database user to connect with. - */ - private val mongoUser by option( - "--mongo-user", - help = "database user to connect with", - envvar = "OPENDC_DB_USER" - ) - .default("opendc") - - /** - * The database password to connect with. - */ - private val mongoPassword by option( - "--mongo-password", - help = "database password to connect with", - envvar = "OPENDC_DB_PASSWORD" - ) - .convert { it.toCharArray() } - .required() - - /** - * The path to the traces directory. - */ - private val tracePath by option( - "--traces", - help = "path to the directory containing the traces", - envvar = "OPENDC_TRACES" - ) - .file(canBeFile = false) - .defaultLazy { File("traces/") } - - /** - * The maximum duration of a single experiment run. - */ - private val runTimeout by option( - "--run-timeout", - help = "maximum duration of experiment in seconds", - envvar = "OPENDC_RUN_TIMEOUT" - ) - .long() - .default(60 * 3) // Experiment may run for a maximum of three minutes - - /** - * Connect to the user-specified database. - */ - private fun createDatabase(): MongoDatabase { - val credential = MongoCredential.createScramSha1Credential( - mongoUser, - mongoDb, - mongoPassword - ) - - val settings = MongoClientSettings.builder() - .credential(credential) - .applyToClusterSettings { it.hosts(listOf(ServerAddress(mongoHost, mongoPort))) } - .build() - val client = MongoClients.create(settings) - return client.getDatabase(mongoDb) - } - - /** - * Run a single scenario. - */ - private suspend fun runScenario(portfolio: Document, scenario: Document, topologyParser: TopologyParser): List { - val id = scenario.getObjectId("_id") - - logger.info { "Constructing performance interference model" } - - val traceDir = File( - tracePath, - scenario.getEmbedded(listOf("trace", "traceId"), String::class.java) - ) - val traceReader = RawParquetTraceReader(traceDir) - val interferenceGroups = let { - val path = File(traceDir, "performance-interference-model.json") - val operational = scenario.get("operational", Document::class.java) - val enabled = operational.getBoolean("performanceInterferenceEnabled") - - if (!enabled || !path.exists()) { - return@let null - } - - PerformanceInterferenceReader(path.inputStream()).use { reader -> reader.read() } - } - - val targets = portfolio.get("targets", Document::class.java) - val topologyId = scenario.getEmbedded(listOf("topology", "topologyId"), ObjectId::class.java) - val environment = topologyParser.read(topologyId) - - val results = (0 until targets.getInteger("repeatsPerScenario")).map { repeat -> - logger.info { "Starting repeat $repeat" } - withTimeout(runTimeout * 1000) { - val interferenceModel = interferenceGroups?.let { VmInterferenceModel(it, Random(repeat.toLong()).asJavaRandom()) } - runRepeat(scenario, repeat, environment, traceReader, interferenceModel) - } - } - - logger.info { "Finished simulation for scenario $id" } - - return results - } - - /** - * Run a single repeat. - */ - private suspend fun runRepeat( - scenario: Document, - repeat: Int, - environment: EnvironmentReader, - traceReader: RawParquetTraceReader, - interferenceModel: VmInterferenceModel? - ): WebExperimentMonitor.Result { - val monitor = WebExperimentMonitor() - - try { - runBlockingSimulation { - val seed = repeat - val traceDocument = scenario.get("trace", Document::class.java) - val workloadName = traceDocument.getString("traceId") - val workloadFraction = traceDocument.get("loadSamplingFraction", Number::class.java).toDouble() - - val seeder = Random(seed) - - val chan = Channel(Channel.CONFLATED) - - val meterProvider: MeterProvider = SdkMeterProvider - .builder() - .setClock(clock.toOtelClock()) - .build() - val metricProducer = meterProvider as MetricProducer - - val operational = scenario.get("operational", Document::class.java) - val allocationPolicy = - when (val policyName = operational.getString("schedulerName")) { - "mem" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(MemoryWeigher() to -1.0) - ) - "mem-inv" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(MemoryWeigher() to 1.0) - ) - "core-mem" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(CoreMemoryWeigher() to -1.0) - ) - "core-mem-inv" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(CoreMemoryWeigher() to 1.0) - ) - "active-servers" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(ProvisionedCoresWeigher() to -1.0) - ) - "active-servers-inv" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(InstanceCountWeigher() to 1.0) - ) - "provisioned-cores" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(ProvisionedCoresWeigher() to -1.0) - ) - "provisioned-cores-inv" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(ProvisionedCoresWeigher() to 1.0) - ) - "random" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(RandomWeigher(java.util.Random(seeder.nextLong())) to 1.0) - ) - else -> throw IllegalArgumentException("Unknown policy $policyName") - } - - val trace = ParquetTraceReader( - listOf(traceReader), - Workload(workloadName, workloadFraction), - seed - ) - val failureFrequency = if (operational.getBoolean("failuresEnabled", false)) 24.0 * 7 else 0.0 - - withComputeService(clock, meterProvider, environment, allocationPolicy, interferenceModel) { scheduler -> - val failureDomain = if (failureFrequency > 0) { - logger.debug { "ENABLING failures" } - createFailureDomain( - this, - clock, - seeder.nextInt(), - failureFrequency, - scheduler, - chan - ) - } else { - null - } - - withMonitor(monitor, clock, meterProvider as MetricProducer, scheduler) { - processTrace( - clock, - trace, - scheduler, - chan, - monitor - ) - } - - failureDomain?.cancel() - } - - val monitorResults = collectMetrics(metricProducer) - logger.debug { "Finish SUBMIT=${monitorResults.submittedVms} FAIL=${monitorResults.unscheduledVms} QUEUE=${monitorResults.queuedVms} RUNNING=${monitorResults.runningVms}" } - } - } catch (cause: Throwable) { - logger.warn(cause) { "Experiment failed" } - } - - return monitor.getResult() - } - - private val POLL_INTERVAL = 5000L // ms = 5 s - private val HEARTBEAT_INTERVAL = 60000L // ms = 1 min - - override fun run(): Unit = runBlocking(Dispatchers.Default) { - logger.info { "Starting OpenDC web runner" } - logger.info { "Connecting to MongoDB instance" } - val database = createDatabase() - val manager = ScenarioManager(database.getCollection("scenarios")) - val portfolios = database.getCollection("portfolios") - val topologies = database.getCollection("topologies") - val topologyParser = TopologyParser(topologies) - - logger.info { "Watching for queued scenarios" } - - while (true) { - val scenario = manager.findNext() - - if (scenario == null) { - delay(POLL_INTERVAL) - continue - } - - val id = scenario.getObjectId("_id") - - logger.info { "Found queued scenario $id: attempting to claim" } - - if (!manager.claim(id)) { - logger.info { "Failed to claim scenario" } - continue - } - - coroutineScope { - // Launch heartbeat process - val heartbeat = launch { - while (true) { - delay(HEARTBEAT_INTERVAL) - manager.heartbeat(id) - } - } - - try { - val portfolio = portfolios.find(Filters.eq("_id", scenario.getObjectId("portfolioId"))).first()!! - val results = runScenario(portfolio, scenario, topologyParser) - - logger.info { "Writing results to database" } - - manager.finish(id, results) - - logger.info { "Successfully finished scenario $id" } - } catch (e: Exception) { - logger.error(e) { "Scenario failed to finish" } - manager.fail(id) - } finally { - heartbeat.cancel() - } - } - } - } -} - -/** - * Main entry point of the runner. - */ -fun main(args: Array): Unit = RunnerCli().main(args) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/ScenarioManager.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/ScenarioManager.kt deleted file mode 100644 index a3907051..00000000 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/ScenarioManager.kt +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2020 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.runner.web - -import com.mongodb.client.MongoCollection -import com.mongodb.client.model.Filters -import com.mongodb.client.model.Updates -import org.bson.Document -import org.bson.types.ObjectId -import java.time.Instant - -/** - * Manages the queue of scenarios that need to be processed. - */ -public class ScenarioManager(private val collection: MongoCollection) { - /** - * Find the next scenario that the simulator needs to process. - */ - public fun findNext(): Document? { - return collection - .find(Filters.eq("simulation.state", "QUEUED")) - .first() - } - - /** - * Claim the scenario in the database with the specified id. - */ - public fun claim(id: ObjectId): Boolean { - val res = collection.findOneAndUpdate( - Filters.and( - Filters.eq("_id", id), - Filters.eq("simulation.state", "QUEUED") - ), - Updates.combine( - Updates.set("simulation.state", "RUNNING"), - Updates.set("simulation.heartbeat", Instant.now()) - ) - ) - return res != null - } - - /** - * Update the heartbeat of the specified scenario. - */ - public fun heartbeat(id: ObjectId) { - collection.findOneAndUpdate( - Filters.and( - Filters.eq("_id", id), - Filters.eq("simulation.state", "RUNNING") - ), - Updates.set("simulation.heartbeat", Instant.now()) - ) - } - - /** - * Mark the scenario as failed. - */ - public fun fail(id: ObjectId) { - collection.findOneAndUpdate( - Filters.eq("_id", id), - Updates.combine( - Updates.set("simulation.state", "FAILED"), - Updates.set("simulation.heartbeat", Instant.now()) - ) - ) - } - - /** - * Persist the specified results. - */ - public fun finish(id: ObjectId, results: List) { - collection.findOneAndUpdate( - Filters.eq("_id", id), - Updates.combine( - Updates.set("simulation.state", "FINISHED"), - Updates.unset("simulation.time"), - Updates.set("results.total_requested_burst", results.map { it.totalRequestedBurst }), - Updates.set("results.total_granted_burst", results.map { it.totalGrantedBurst }), - Updates.set("results.total_overcommitted_burst", results.map { it.totalOvercommittedBurst }), - Updates.set("results.total_interfered_burst", results.map { it.totalInterferedBurst }), - Updates.set("results.mean_cpu_usage", results.map { it.meanCpuUsage }), - Updates.set("results.mean_cpu_demand", results.map { it.meanCpuDemand }), - Updates.set("results.mean_num_deployed_images", results.map { it.meanNumDeployedImages }), - Updates.set("results.max_num_deployed_images", results.map { it.maxNumDeployedImages }), - Updates.set("results.total_power_draw", results.map { it.totalPowerDraw }), - Updates.set("results.total_failure_slices", results.map { it.totalFailureSlices }), - Updates.set("results.total_failure_vm_slices", results.map { it.totalFailureVmSlices }), - Updates.set("results.total_vms_submitted", results.map { it.totalVmsSubmitted }), - Updates.set("results.total_vms_queued", results.map { it.totalVmsQueued }), - Updates.set("results.total_vms_finished", results.map { it.totalVmsFinished }), - Updates.set("results.total_vms_failed", results.map { it.totalVmsFailed }) - ) - ) - } -} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt deleted file mode 100644 index 2135ee1d..00000000 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/TopologyParser.kt +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2020 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.runner.web - -import com.mongodb.client.AggregateIterable -import com.mongodb.client.MongoCollection -import com.mongodb.client.model.Aggregates -import com.mongodb.client.model.Field -import com.mongodb.client.model.Filters -import com.mongodb.client.model.Projections -import org.bson.Document -import org.bson.types.ObjectId -import org.opendc.format.environment.EnvironmentReader -import org.opendc.format.environment.MachineDef -import org.opendc.simulator.compute.model.MachineModel -import org.opendc.simulator.compute.model.MemoryUnit -import org.opendc.simulator.compute.model.ProcessingNode -import org.opendc.simulator.compute.model.ProcessingUnit -import org.opendc.simulator.compute.power.LinearPowerModel -import java.util.* - -/** - * A helper class that converts the MongoDB topology into an OpenDC environment. - */ -public class TopologyParser(private val collection: MongoCollection) { - - /** - * Parse the topology from the specified [id]. - */ - public fun read(id: ObjectId): EnvironmentReader { - val nodes = mutableListOf() - val random = Random(0) - - for (machine in fetchMachines(id)) { - val clusterId = machine.get("rack_id").toString() - val position = machine.getInteger("position") - - val processors = machine.getList("cpus", Document::class.java).flatMap { cpu -> - val cores = cpu.getInteger("numberOfCores") - val speed = cpu.get("clockRateMhz", Number::class.java).toDouble() - // TODO Remove hardcoding of vendor - val node = ProcessingNode("Intel", "amd64", cpu.getString("name"), cores) - List(cores) { coreId -> - ProcessingUnit(node, coreId, speed) - } - } - val memoryUnits = machine.getList("memories", Document::class.java).map { memory -> - MemoryUnit( - "Samsung", - memory.getString("name"), - memory.get("speedMbPerS", Number::class.java).toDouble(), - memory.get("sizeMb", Number::class.java).toLong() - ) - } - - val energyConsumptionW = machine.getList("cpus", Document::class.java).sumOf { it.getInteger("energyConsumptionW") }.toDouble() - - nodes.add( - MachineDef( - UUID(random.nextLong(), random.nextLong()), - "node-$clusterId-$position", - mapOf("cluster" to clusterId), - MachineModel(processors, memoryUnits), - LinearPowerModel(2 * energyConsumptionW, energyConsumptionW * 0.5) - ) - ) - } - - return object : EnvironmentReader { - override fun read(): List = nodes - override fun close() {} - } - } - - /** - * Fetch the metadata of the topology. - */ - private fun fetchName(id: ObjectId): String { - return collection.aggregate( - listOf( - Aggregates.match(Filters.eq("_id", id)), - Aggregates.project(Projections.include("name")) - ) - ) - .first()!! - .getString("name") - } - - /** - * Fetch a topology from the database with the specified [id]. - */ - private fun fetchMachines(id: ObjectId): AggregateIterable { - return collection.aggregate( - listOf( - Aggregates.match(Filters.eq("_id", id)), - Aggregates.project(Projections.fields(Document("racks", "\$rooms.tiles.rack"))), - Aggregates.unwind("\$racks"), - Aggregates.unwind("\$racks"), - Aggregates.replaceRoot("\$racks"), - Aggregates.addFields(Field("machines.rack_id", "\$_id")), - Aggregates.unwind("\$machines"), - Aggregates.replaceRoot("\$machines") - ) - ) - } -} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/WebExperimentMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/WebExperimentMonitor.kt deleted file mode 100644 index c913f82f..00000000 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/runner/web/WebExperimentMonitor.kt +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2020 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.runner.web - -import mu.KotlinLogging -import org.opendc.compute.api.Server -import org.opendc.compute.api.ServerState -import org.opendc.compute.service.driver.Host -import org.opendc.compute.service.driver.HostState -import org.opendc.experiments.capelin.monitor.ExperimentMonitor -import org.opendc.experiments.capelin.telemetry.HostEvent -import kotlin.math.max - -/** - * An [ExperimentMonitor] that tracks the aggregate metrics for each repeat. - */ -public class WebExperimentMonitor : ExperimentMonitor { - private val logger = KotlinLogging.logger {} - - override fun reportVmStateChange(time: Long, server: Server, newState: ServerState) {} - - override fun reportHostStateChange(time: Long, host: Host, newState: HostState) { - logger.debug { "Host ${host.uid} changed state $newState [$time]" } - } - - override fun reportHostSlice( - time: Long, - requestedBurst: Long, - grantedBurst: Long, - overcommissionedBurst: Long, - interferedBurst: Long, - cpuUsage: Double, - cpuDemand: Double, - powerDraw: Double, - numberOfDeployedImages: Int, - host: Host, - ) { - processHostEvent( - HostEvent( - time, - 5 * 60 * 1000L, - host, - numberOfDeployedImages, - requestedBurst, - grantedBurst, - overcommissionedBurst, - interferedBurst, - cpuUsage, - cpuDemand, - powerDraw, - host.model.cpuCount - ) - ) - } - - private var hostAggregateMetrics: AggregateHostMetrics = AggregateHostMetrics() - private val hostMetrics: MutableMap = mutableMapOf() - - private fun processHostEvent(event: HostEvent) { - val slices = event.duration / SLICE_LENGTH - - hostAggregateMetrics = AggregateHostMetrics( - hostAggregateMetrics.totalRequestedBurst + event.requestedBurst, - hostAggregateMetrics.totalGrantedBurst + event.grantedBurst, - hostAggregateMetrics.totalOvercommittedBurst + event.overcommissionedBurst, - hostAggregateMetrics.totalInterferedBurst + event.interferedBurst, - hostAggregateMetrics.totalPowerDraw + (event.duration * event.powerDraw) / 3600, - hostAggregateMetrics.totalFailureSlices + if (event.host.state != HostState.UP) slices else 0, - hostAggregateMetrics.totalFailureVmSlices + if (event.host.state != HostState.UP) event.vmCount * slices else 0 - ) - - hostMetrics.compute(event.host) { _, prev -> - HostMetrics( - (event.cpuUsage.takeIf { event.host.state == HostState.UP } ?: 0.0) + (prev?.cpuUsage ?: 0.0), - (event.cpuDemand.takeIf { event.host.state == HostState.UP } ?: 0.0) + (prev?.cpuDemand ?: 0.0), - event.vmCount + (prev?.vmCount ?: 0), - 1 + (prev?.count ?: 0) - ) - } - } - - private val SLICE_LENGTH: Long = 5 * 60 * 1000 - - public data class AggregateHostMetrics( - val totalRequestedBurst: Long = 0, - val totalGrantedBurst: Long = 0, - val totalOvercommittedBurst: Long = 0, - val totalInterferedBurst: Long = 0, - val totalPowerDraw: Double = 0.0, - val totalFailureSlices: Long = 0, - val totalFailureVmSlices: Long = 0, - ) - - public data class HostMetrics( - val cpuUsage: Double, - val cpuDemand: Double, - val vmCount: Long, - val count: Long - ) - - private var provisionerMetrics: AggregateProvisionerMetrics = AggregateProvisionerMetrics() - - override fun reportProvisionerMetrics( - time: Long, - totalHostCount: Int, - availableHostCount: Int, - totalVmCount: Int, - activeVmCount: Int, - inactiveVmCount: Int, - waitingVmCount: Int, - failedVmCount: Int - ) { - provisionerMetrics = AggregateProvisionerMetrics( - max(totalVmCount, provisionerMetrics.vmTotalCount), - max(waitingVmCount, provisionerMetrics.vmWaitingCount), - max(activeVmCount, provisionerMetrics.vmActiveCount), - max(inactiveVmCount, provisionerMetrics.vmInactiveCount), - max(failedVmCount, provisionerMetrics.vmFailedCount), - ) - } - - public data class AggregateProvisionerMetrics( - val vmTotalCount: Int = 0, - val vmWaitingCount: Int = 0, - val vmActiveCount: Int = 0, - val vmInactiveCount: Int = 0, - val vmFailedCount: Int = 0 - ) - - override fun close() {} - - public fun getResult(): Result { - return Result( - hostAggregateMetrics.totalRequestedBurst, - hostAggregateMetrics.totalGrantedBurst, - hostAggregateMetrics.totalOvercommittedBurst, - hostAggregateMetrics.totalInterferedBurst, - hostMetrics.map { it.value.cpuUsage / it.value.count }.average(), - hostMetrics.map { it.value.cpuDemand / it.value.count }.average(), - hostMetrics.map { it.value.vmCount.toDouble() / it.value.count }.average(), - hostMetrics.map { it.value.vmCount.toDouble() / it.value.count }.maxOrNull() ?: 0.0, - hostAggregateMetrics.totalPowerDraw, - hostAggregateMetrics.totalFailureSlices, - hostAggregateMetrics.totalFailureVmSlices, - provisionerMetrics.vmTotalCount, - provisionerMetrics.vmWaitingCount, - provisionerMetrics.vmInactiveCount, - provisionerMetrics.vmFailedCount, - ) - } - - public data class Result( - public val totalRequestedBurst: Long, - public val totalGrantedBurst: Long, - public val totalOvercommittedBurst: Long, - public val totalInterferedBurst: Long, - public val meanCpuUsage: Double, - public val meanCpuDemand: Double, - public val meanNumDeployedImages: Double, - public val maxNumDeployedImages: Double, - public val totalPowerDraw: Double, - public val totalFailureSlices: Long, - public val totalFailureVmSlices: Long, - public val totalVmsSubmitted: Int, - public val totalVmsQueued: Int, - public val totalVmsFinished: Int, - public val totalVmsFailed: Int - ) -} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/ApiClient.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/ApiClient.kt new file mode 100644 index 00000000..9f2656c4 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/ApiClient.kt @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client + +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule +import io.ktor.client.* +import io.ktor.client.features.auth.* +import io.ktor.client.features.auth.providers.* +import io.ktor.client.features.json.* +import io.ktor.client.request.* +import io.ktor.client.statement.* +import io.ktor.http.* +import org.opendc.web.client.model.* +import java.net.URI + +/** + * Client implementation for the OpenDC REST API (version 2). + * + * @param baseUrl The base url of the API. + * @param auth The authentication configuration for the client. + * @param client The HTTP client to use. + */ +public class ApiClient( + private val baseUrl: URI, + private val auth: AuthConfiguration, + private val audience: String = "https://api.opendc.org/v2/", + client: HttpClient = HttpClient {} +) : AutoCloseable { + /** + * The Ktor [HttpClient] that is used to communicate with the REST API. + */ + private val client = client.config { + install(JsonFeature) { + serializer = JacksonSerializer { + registerModule(JavaTimeModule()) + } + } + install(Auth) { + bearer { + loadTokens { requestToken() } + refreshTokens { requestToken() } + } + } + expectSuccess = false + } + + /** + * Retrieve the topology with the specified [id]. + */ + public suspend fun getPortfolio(id: String): Portfolio? { + val url = URLBuilder(Url(baseUrl)) + .path("portfolios", id) + .build() + return when (val result = client.get>(url)) { + is ApiResult.Success -> result.data + else -> null + } + } + + /** + * Retrieve the scenario with the specified [id]. + */ + public suspend fun getScenario(id: String): Scenario? { + val url = URLBuilder(Url(baseUrl)) + .path("scenarios", id) + .build() + return when (val result = client.get>(url)) { + is ApiResult.Success -> result.data + else -> null + } + } + + /** + * Retrieve the topology with the specified [id]. + */ + public suspend fun getTopology(id: String): Topology? { + val url = URLBuilder(Url(baseUrl)) + .path("topologies", id) + .build() + return when (val result = client.get>(url)) { + is ApiResult.Success -> result.data + else -> null + } + } + + /** + * Retrieve the available jobs. + */ + public suspend fun getJobs(): List { + val url = URLBuilder(Url(baseUrl)) + .path("jobs") + .build() + return when (val result = client.get>>(url)) { + is ApiResult.Success -> result.data + else -> emptyList() + } + } + + /** + * Update the specified job. + * + * @param id The identifier of the job. + * @param state The new state of the job. + * @param results The results of the job. + */ + public suspend fun updateJob(id: String, state: SimulationState, results: Map = emptyMap()): Boolean { + val url = URLBuilder(Url(baseUrl)) + .path("jobs", id) + .build() + + data class Request( + val state: SimulationState, + val results: Map + ) + + val res = client.post { + url(url) + contentType(ContentType.Application.Json) + body = Request(state, results) + } + return res.status.isSuccess() + } + + /** + * Request the auth token for the API. + */ + private suspend fun requestToken(): BearerTokens { + data class Request( + val audience: String, + @JsonProperty("grant_type") + val grantType: String, + @JsonProperty("client_id") + val clientId: String, + @JsonProperty("client_secret") + val clientSecret: String + ) + + data class Response( + @JsonProperty("access_token") + val accessToken: String, + @JsonProperty("token_type") + val tokenType: String, + val scope: String = "", + @JsonProperty("expires_in") + val expiresIn: Long + ) + + val result = client.post { + url(Url("https://${auth.domain}/oauth/token")) + contentType(ContentType.Application.Json) + body = Request(audience, "client_credentials", auth.clientId, auth.clientSecret) + } + + return BearerTokens(result.accessToken, "") + } + + override fun close() = client.close() +} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/ApiResult.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/ApiResult.kt new file mode 100644 index 00000000..a3df01c5 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/ApiResult.kt @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client + +import com.fasterxml.jackson.annotation.JsonSubTypes +import com.fasterxml.jackson.annotation.JsonTypeInfo + +/** + * Generic response model for the OpenDC API. + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.DEDUCTION) +@JsonSubTypes(JsonSubTypes.Type(ApiResult.Success::class), JsonSubTypes.Type(ApiResult.Failure::class)) +public sealed class ApiResult { + /** + * A response indicating everything is okay. + */ + public data class Success(val data: T) : ApiResult() + + /** + * A response indicating a failure. + */ + public data class Failure(val message: String, val errors: List = emptyList()) : ApiResult() +} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/AuthConfiguration.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/AuthConfiguration.kt new file mode 100644 index 00000000..5dbf2f59 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/AuthConfiguration.kt @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client + +/** + * The authentication configuration for the API client. + */ +public data class AuthConfiguration( + val domain: String, + val clientId: String, + val clientSecret: String +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Job.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Job.kt new file mode 100644 index 00000000..eeb65e49 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Job.kt @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonProperty +import java.time.LocalDateTime + +/** + * A description of a simulation job. + */ +public data class Job( + @JsonProperty("_id") + val id: String, + val scenarioId: String, + val state: SimulationState, + val heartbeat: LocalDateTime, + val results: Map +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Machine.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Machine.kt new file mode 100644 index 00000000..c6757c5c --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Machine.kt @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * A machine in a rack. + */ +@JsonIgnoreProperties("id_legacy") +public data class Machine( + @JsonProperty("_id") + val id: String, + val position: Int, + val cpus: List = emptyList(), + val gpus: List = emptyList(), + @JsonProperty("memories") + val memory: List = emptyList(), + @JsonProperty("storages") + val storage: List = emptyList() +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/MemoryUnit.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/MemoryUnit.kt new file mode 100644 index 00000000..11e794e8 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/MemoryUnit.kt @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * A memory unit in a system. + */ +public data class MemoryUnit( + @JsonProperty("_id") + val id: String, + val name: String, + val speedMbPerS: Double, + val sizeMb: Double, + val energyConsumptionW: Double +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/OperationalPhenomena.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/OperationalPhenomena.kt new file mode 100644 index 00000000..ef5b4902 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/OperationalPhenomena.kt @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +/** + * Object describing the enabled operational phenomena for a scenario. + */ +public data class OperationalPhenomena( + val failuresEnabled: Boolean, + val performanceInterferenceEnabled: Boolean, + val schedulerName: String +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Portfolio.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Portfolio.kt new file mode 100644 index 00000000..6904920b --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Portfolio.kt @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * A portfolio in OpenDC. + */ +public data class Portfolio( + @JsonProperty("_id") + val id: String, + val projectId: String, + val name: String, + @JsonProperty("scenarioIds") + val scenarios: Set, + val targets: PortfolioTargets +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/PortfolioTargets.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/PortfolioTargets.kt new file mode 100644 index 00000000..07c11c19 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/PortfolioTargets.kt @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +/** + * The targets of a portfolio. + */ +public data class PortfolioTargets(val enabledMetrics: Set, val repeatsPerScenario: Int) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ProcessingUnit.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ProcessingUnit.kt new file mode 100644 index 00000000..449b5c43 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ProcessingUnit.kt @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * A CPU model. + */ +public data class ProcessingUnit( + @JsonProperty("_id") + val id: String, + val name: String, + val clockRateMhz: Double, + val numberOfCores: Int, + val energyConsumptionW: Double +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Rack.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Rack.kt new file mode 100644 index 00000000..a0464388 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Rack.kt @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * A rack in a datacenter. + */ +@JsonIgnoreProperties("id_legacy") +public class Rack( + @JsonProperty("_id") + val id: String, + val name: String, + val capacity: Int, + val powerCapacityW: Double, + val machines: List +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Room.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Room.kt new file mode 100644 index 00000000..e961d6db --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Room.kt @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * A room in a datacenter. + */ +@JsonIgnoreProperties("id_legacy") +public data class Room( + @JsonProperty("_id") + val id: String, + val name: String, + val tiles: Set +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/RoomTile.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/RoomTile.kt new file mode 100644 index 00000000..3bee3204 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/RoomTile.kt @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * A room tile. + */ +@JsonIgnoreProperties("id_legacy") +public data class RoomTile( + @JsonProperty("_id") + val id: String, + val positionX: Double, + val positionY: Double, + val rack: Rack? = null +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Scenario.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Scenario.kt new file mode 100644 index 00000000..851ff980 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Scenario.kt @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * A simulation scenario. + */ +public data class Scenario( + @JsonProperty("_id") + val id: String, + val portfolioId: String, + val name: String, + val trace: ScenarioTrace, + val topology: ScenarioTopology, + @JsonProperty("operational") + val operationalPhenomena: OperationalPhenomena +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ScenarioTopology.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ScenarioTopology.kt new file mode 100644 index 00000000..2b90f7ef --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ScenarioTopology.kt @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +/** + * The topology details for a scenario. + */ +public data class ScenarioTopology(val topologyId: String) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ScenarioTrace.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ScenarioTrace.kt new file mode 100644 index 00000000..adff6d97 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/ScenarioTrace.kt @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +/** + * The trace details of a scenario. + */ +public data class ScenarioTrace(val traceId: String, val loadSamplingFraction: Double) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/SimulationState.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/SimulationState.kt new file mode 100644 index 00000000..2eadd747 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/SimulationState.kt @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +/** + * The state of a simulation job. + */ +public enum class SimulationState { + QUEUED, CLAIMED, RUNNING, FINISHED, FAILED +} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Topology.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Topology.kt new file mode 100644 index 00000000..b59aba42 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Topology.kt @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client.model + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties +import com.fasterxml.jackson.annotation.JsonProperty + +/** + * Model for an OpenDC topology. + */ +@JsonIgnoreProperties("id_legacy", "datacenter_id_legacy", "datetimeLastUpdated", "datetimeLastEdited") +public data class Topology( + @JsonProperty("_id") + val id: String, + val projectId: String, + val name: String, + val rooms: Set, +) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt new file mode 100644 index 00000000..5b5ef802 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.runner + +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.options.* +import com.github.ajalt.clikt.parameters.types.file +import com.github.ajalt.clikt.parameters.types.long +import io.opentelemetry.api.metrics.MeterProvider +import io.opentelemetry.sdk.metrics.SdkMeterProvider +import io.opentelemetry.sdk.metrics.export.MetricProducer +import kotlinx.coroutines.* +import kotlinx.coroutines.channels.Channel +import mu.KotlinLogging +import org.opendc.compute.service.scheduler.FilterScheduler +import org.opendc.compute.service.scheduler.filters.ComputeCapabilitiesFilter +import org.opendc.compute.service.scheduler.filters.ComputeFilter +import org.opendc.compute.service.scheduler.weights.* +import org.opendc.experiments.capelin.* +import org.opendc.experiments.capelin.model.Workload +import org.opendc.experiments.capelin.trace.ParquetTraceReader +import org.opendc.experiments.capelin.trace.PerformanceInterferenceReader +import org.opendc.experiments.capelin.trace.RawParquetTraceReader +import org.opendc.format.environment.EnvironmentReader +import org.opendc.format.environment.MachineDef +import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel +import org.opendc.simulator.compute.model.MachineModel +import org.opendc.simulator.compute.model.MemoryUnit +import org.opendc.simulator.compute.model.ProcessingNode +import org.opendc.simulator.compute.model.ProcessingUnit +import org.opendc.simulator.compute.power.LinearPowerModel +import org.opendc.simulator.core.runBlockingSimulation +import org.opendc.telemetry.sdk.toOtelClock +import org.opendc.web.client.ApiClient +import org.opendc.web.client.AuthConfiguration +import org.opendc.web.client.model.Scenario +import org.opendc.web.client.model.Topology +import java.io.File +import java.net.URI +import java.util.* +import kotlin.random.Random +import kotlin.random.asJavaRandom +import org.opendc.web.client.model.Portfolio as ClientPortfolio + +private val logger = KotlinLogging.logger {} + +/** + * Represents the CLI command for starting the OpenDC web runner. + */ +class RunnerCli : CliktCommand(name = "runner") { + /** + * The URL to the OpenDC API. + */ + private val apiUrl by option( + "--api-url", + help = "url to the OpenDC API", + envvar = "OPENDC_API_URL" + ) + .convert { URI(it) } + .default(URI("https://api.opendc.org/v2")) + + /** + * The auth domain to use. + */ + private val authDomain by option( + "--auth-domain", + help = "auth domain of the OpenDC API", + envvar = "AUTH0_DOMAIN" + ) + .required() + + /** + * The auth client ID to use. + */ + private val authClientId by option( + "--auth-id", + help = "auth client id of the OpenDC API", + envvar = "AUTH0_CLIENT_ID" + ) + .required() + + /** + * The auth client secret to use. + */ + private val authClientSecret by option( + "--auth-secret", + help = "auth client secret of the OpenDC API", + envvar = "AUTH0_CLIENT_SECRET" + ) + .required() + + /** + * The path to the traces directory. + */ + private val tracePath by option( + "--traces", + help = "path to the directory containing the traces", + envvar = "OPENDC_TRACES" + ) + .file(canBeFile = false) + .defaultLazy { File("traces/") } + + /** + * The maximum duration of a single experiment run. + */ + private val runTimeout by option( + "--run-timeout", + help = "maximum duration of experiment in seconds", + envvar = "OPENDC_RUN_TIMEOUT" + ) + .long() + .default(60L * 3) // Experiment may run for a maximum of three minutes + + /** + * Run a single scenario. + */ + private suspend fun runScenario(portfolio: ClientPortfolio, scenario: Scenario, environment: EnvironmentReader): List { + val id = scenario.id + + logger.info { "Constructing performance interference model" } + + val traceDir = File( + tracePath, + scenario.trace.traceId + ) + val traceReader = RawParquetTraceReader(traceDir) + val interferenceGroups = let { + val path = File(traceDir, "performance-interference-model.json") + val operational = scenario.operationalPhenomena + val enabled = operational.performanceInterferenceEnabled + + if (!enabled || !path.exists()) { + return@let null + } + + PerformanceInterferenceReader(path.inputStream()).use { reader -> reader.read() } + } + + val targets = portfolio.targets + val results = (0 until targets.repeatsPerScenario).map { repeat -> + logger.info { "Starting repeat $repeat" } + withTimeout(runTimeout * 1000) { + val interferenceModel = interferenceGroups?.let { VmInterferenceModel(it, Random(repeat.toLong()).asJavaRandom()) } + runRepeat(scenario, repeat, environment, traceReader, interferenceModel) + } + } + + logger.info { "Finished simulation for scenario $id" } + + return results + } + + /** + * Run a single repeat. + */ + private suspend fun runRepeat( + scenario: Scenario, + repeat: Int, + environment: EnvironmentReader, + traceReader: RawParquetTraceReader, + interferenceModel: VmInterferenceModel? + ): WebExperimentMonitor.Result { + val monitor = WebExperimentMonitor() + + try { + runBlockingSimulation { + val seed = repeat + val workloadName = scenario.trace.traceId + val workloadFraction = scenario.trace.loadSamplingFraction + + val seeder = Random(seed) + + val chan = Channel(Channel.CONFLATED) + + val meterProvider: MeterProvider = SdkMeterProvider + .builder() + .setClock(clock.toOtelClock()) + .build() + val metricProducer = meterProvider as MetricProducer + + val operational = scenario.operationalPhenomena + val allocationPolicy = + when (val policyName = operational.schedulerName) { + "mem" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(MemoryWeigher() to -1.0) + ) + "mem-inv" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(MemoryWeigher() to 1.0) + ) + "core-mem" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(CoreMemoryWeigher() to -1.0) + ) + "core-mem-inv" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(CoreMemoryWeigher() to 1.0) + ) + "active-servers" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(ProvisionedCoresWeigher() to -1.0) + ) + "active-servers-inv" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(InstanceCountWeigher() to 1.0) + ) + "provisioned-cores" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(ProvisionedCoresWeigher() to -1.0) + ) + "provisioned-cores-inv" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(ProvisionedCoresWeigher() to 1.0) + ) + "random" -> FilterScheduler( + filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), + weighers = listOf(RandomWeigher(java.util.Random(seeder.nextLong())) to 1.0) + ) + else -> throw IllegalArgumentException("Unknown policy $policyName") + } + + val trace = ParquetTraceReader( + listOf(traceReader), + Workload(workloadName, workloadFraction), + seed + ) + val failureFrequency = if (operational.failuresEnabled) 24.0 * 7 else 0.0 + + withComputeService(clock, meterProvider, environment, allocationPolicy, interferenceModel) { scheduler -> + val failureDomain = if (failureFrequency > 0) { + logger.debug { "ENABLING failures" } + createFailureDomain( + this, + clock, + seeder.nextInt(), + failureFrequency, + scheduler, + chan + ) + } else { + null + } + + withMonitor(monitor, clock, meterProvider as MetricProducer, scheduler) { + processTrace( + clock, + trace, + scheduler, + chan, + monitor + ) + } + + failureDomain?.cancel() + } + + val monitorResults = collectMetrics(metricProducer) + logger.debug { "Finish SUBMIT=${monitorResults.submittedVms} FAIL=${monitorResults.unscheduledVms} QUEUE=${monitorResults.queuedVms} RUNNING=${monitorResults.runningVms}" } + } + } catch (cause: Throwable) { + logger.warn(cause) { "Experiment failed" } + } + + return monitor.getResult() + } + + private val POLL_INTERVAL = 30000L // ms = 30 s + private val HEARTBEAT_INTERVAL = 60000L // ms = 1 min + + override fun run(): Unit = runBlocking(Dispatchers.Default) { + logger.info { "Starting OpenDC web runner" } + + val client = ApiClient(baseUrl = apiUrl, AuthConfiguration(authDomain, authClientId, authClientSecret)) + val manager = ScenarioManager(client) + + logger.info { "Watching for queued scenarios" } + + while (true) { + val scenario = manager.findNext() + + if (scenario == null) { + delay(POLL_INTERVAL) + continue + } + + val id = scenario.id + + logger.info { "Found queued scenario $id: attempting to claim" } + + if (!manager.claim(id)) { + logger.info { "Failed to claim scenario" } + continue + } + + coroutineScope { + // Launch heartbeat process + val heartbeat = launch { + while (true) { + manager.heartbeat(id) + delay(HEARTBEAT_INTERVAL) + } + } + + try { + val scenarioModel = client.getScenario(id)!! + val portfolio = client.getPortfolio(scenarioModel.portfolioId)!! + val environment = convert(client.getTopology(scenarioModel.topology.topologyId)!!) + val results = runScenario(portfolio, scenarioModel, environment) + + logger.info { "Writing results to database" } + + manager.finish(id, results) + + logger.info { "Successfully finished scenario $id" } + } catch (e: Exception) { + logger.error(e) { "Scenario failed to finish" } + manager.fail(id) + } finally { + heartbeat.cancel() + } + } + } + } + + /** + * Convert the specified [topology] into an [EnvironmentReader] understood by Capelin. + */ + private fun convert(topology: Topology): EnvironmentReader { + val nodes = mutableListOf() + val random = Random(0) + + val machines = topology.rooms.asSequence() + .flatMap { room -> + room.tiles.flatMap { tile -> + tile.rack?.machines?.map { machine -> tile.rack to machine } ?: emptyList() + } + } + for ((rack, machine) in machines) { + val clusterId = rack.id + val position = machine.position + + val processors = machine.cpus.flatMap { cpu -> + val cores = cpu.numberOfCores + val speed = cpu.clockRateMhz + // TODO Remove hard coding of vendor + val node = ProcessingNode("Intel", "amd64", cpu.name, cores) + List(cores) { coreId -> + ProcessingUnit(node, coreId, speed) + } + } + val memoryUnits = machine.memory.map { memory -> + MemoryUnit( + "Samsung", + memory.name, + memory.speedMbPerS, + memory.sizeMb.toLong() + ) + } + + val energyConsumptionW = machine.cpus.sumOf { it.energyConsumptionW } + + nodes.add( + MachineDef( + UUID(random.nextLong(), random.nextLong()), + "node-$clusterId-$position", + mapOf("cluster" to clusterId), + MachineModel(processors, memoryUnits), + LinearPowerModel(2 * energyConsumptionW, energyConsumptionW * 0.5) + ) + ) + } + + return object : EnvironmentReader { + override fun read(): List = nodes + override fun close() {} + } + } +} + +/** + * Main entry point of the runner. + */ +fun main(args: Array): Unit = RunnerCli().main(args) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt new file mode 100644 index 00000000..4044cec9 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.runner + +import org.opendc.web.client.ApiClient +import org.opendc.web.client.model.Job +import org.opendc.web.client.model.SimulationState + +/** + * Manages the queue of scenarios that need to be processed. + */ +public class ScenarioManager(private val client: ApiClient) { + /** + * Find the next job that the simulator needs to process. + */ + public suspend fun findNext(): Job? { + return client.getJobs().firstOrNull() + } + + /** + * Claim the simulation job with the specified id. + */ + public suspend fun claim(id: String): Boolean { + return client.updateJob(id, SimulationState.CLAIMED) + } + + /** + * Update the heartbeat of the specified scenario. + */ + public suspend fun heartbeat(id: String) { + client.updateJob(id, SimulationState.RUNNING) + } + + /** + * Mark the scenario as failed. + */ + public suspend fun fail(id: String) { + client.updateJob(id, SimulationState.FAILED) + } + + /** + * Persist the specified results. + */ + public suspend fun finish(id: String, results: List) { + client.updateJob( + id, SimulationState.FINISHED, + mapOf( + "total_requested_burst" to results.map { it.totalRequestedBurst }, + "total_granted_burst" to results.map { it.totalGrantedBurst }, + "total_overcommitted_burst" to results.map { it.totalOvercommittedBurst }, + "total_interfered_burst" to results.map { it.totalInterferedBurst }, + "mean_cpu_usage" to results.map { it.meanCpuUsage }, + "mean_cpu_demand" to results.map { it.meanCpuDemand }, + "mean_num_deployed_images" to results.map { it.meanNumDeployedImages }, + "max_num_deployed_images" to results.map { it.maxNumDeployedImages }, + "total_power_draw" to results.map { it.totalPowerDraw }, + "total_failure_slices" to results.map { it.totalFailureSlices }, + "total_failure_vm_slices" to results.map { it.totalFailureVmSlices }, + "total_vms_submitted" to results.map { it.totalVmsSubmitted }, + "total_vms_queued" to results.map { it.totalVmsQueued }, + "total_vms_finished" to results.map { it.totalVmsFinished }, + "total_vms_failed" to results.map { it.totalVmsFailed } + ) + ) + } +} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt new file mode 100644 index 00000000..d4445810 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.runner + +import mu.KotlinLogging +import org.opendc.compute.api.Server +import org.opendc.compute.api.ServerState +import org.opendc.compute.service.driver.Host +import org.opendc.compute.service.driver.HostState +import org.opendc.experiments.capelin.monitor.ExperimentMonitor +import org.opendc.experiments.capelin.telemetry.HostEvent +import kotlin.math.max + +/** + * An [ExperimentMonitor] that tracks the aggregate metrics for each repeat. + */ +public class WebExperimentMonitor : ExperimentMonitor { + private val logger = KotlinLogging.logger {} + + override fun reportVmStateChange(time: Long, server: Server, newState: ServerState) {} + + override fun reportHostStateChange(time: Long, host: Host, newState: HostState) { + logger.debug { "Host ${host.uid} changed state $newState [$time]" } + } + + override fun reportHostSlice( + time: Long, + requestedBurst: Long, + grantedBurst: Long, + overcommissionedBurst: Long, + interferedBurst: Long, + cpuUsage: Double, + cpuDemand: Double, + powerDraw: Double, + numberOfDeployedImages: Int, + host: Host, + ) { + processHostEvent( + HostEvent( + time, + 5 * 60 * 1000L, + host, + numberOfDeployedImages, + requestedBurst, + grantedBurst, + overcommissionedBurst, + interferedBurst, + cpuUsage, + cpuDemand, + powerDraw, + host.model.cpuCount + ) + ) + } + + private var hostAggregateMetrics: AggregateHostMetrics = AggregateHostMetrics() + private val hostMetrics: MutableMap = mutableMapOf() + + private fun processHostEvent(event: HostEvent) { + val slices = event.duration / SLICE_LENGTH + + hostAggregateMetrics = AggregateHostMetrics( + hostAggregateMetrics.totalRequestedBurst + event.requestedBurst, + hostAggregateMetrics.totalGrantedBurst + event.grantedBurst, + hostAggregateMetrics.totalOvercommittedBurst + event.overcommissionedBurst, + hostAggregateMetrics.totalInterferedBurst + event.interferedBurst, + hostAggregateMetrics.totalPowerDraw + (event.duration * event.powerDraw) / 3600, + hostAggregateMetrics.totalFailureSlices + if (event.host.state != HostState.UP) slices else 0, + hostAggregateMetrics.totalFailureVmSlices + if (event.host.state != HostState.UP) event.vmCount * slices else 0 + ) + + hostMetrics.compute(event.host) { _, prev -> + HostMetrics( + (event.cpuUsage.takeIf { event.host.state == HostState.UP } ?: 0.0) + (prev?.cpuUsage ?: 0.0), + (event.cpuDemand.takeIf { event.host.state == HostState.UP } ?: 0.0) + (prev?.cpuDemand ?: 0.0), + event.vmCount + (prev?.vmCount ?: 0), + 1 + (prev?.count ?: 0) + ) + } + } + + private val SLICE_LENGTH: Long = 5 * 60 * 1000 + + public data class AggregateHostMetrics( + val totalRequestedBurst: Long = 0, + val totalGrantedBurst: Long = 0, + val totalOvercommittedBurst: Long = 0, + val totalInterferedBurst: Long = 0, + val totalPowerDraw: Double = 0.0, + val totalFailureSlices: Long = 0, + val totalFailureVmSlices: Long = 0, + ) + + public data class HostMetrics( + val cpuUsage: Double, + val cpuDemand: Double, + val vmCount: Long, + val count: Long + ) + + private var provisionerMetrics: AggregateProvisionerMetrics = AggregateProvisionerMetrics() + + override fun reportProvisionerMetrics( + time: Long, + totalHostCount: Int, + availableHostCount: Int, + totalVmCount: Int, + activeVmCount: Int, + inactiveVmCount: Int, + waitingVmCount: Int, + failedVmCount: Int + ) { + provisionerMetrics = AggregateProvisionerMetrics( + max(totalVmCount, provisionerMetrics.vmTotalCount), + max(waitingVmCount, provisionerMetrics.vmWaitingCount), + max(activeVmCount, provisionerMetrics.vmActiveCount), + max(inactiveVmCount, provisionerMetrics.vmInactiveCount), + max(failedVmCount, provisionerMetrics.vmFailedCount), + ) + } + + public data class AggregateProvisionerMetrics( + val vmTotalCount: Int = 0, + val vmWaitingCount: Int = 0, + val vmActiveCount: Int = 0, + val vmInactiveCount: Int = 0, + val vmFailedCount: Int = 0 + ) + + override fun close() {} + + public fun getResult(): Result { + return Result( + hostAggregateMetrics.totalRequestedBurst, + hostAggregateMetrics.totalGrantedBurst, + hostAggregateMetrics.totalOvercommittedBurst, + hostAggregateMetrics.totalInterferedBurst, + hostMetrics.map { it.value.cpuUsage / it.value.count }.average(), + hostMetrics.map { it.value.cpuDemand / it.value.count }.average(), + hostMetrics.map { it.value.vmCount.toDouble() / it.value.count }.average(), + hostMetrics.map { it.value.vmCount.toDouble() / it.value.count }.maxOrNull() ?: 0.0, + hostAggregateMetrics.totalPowerDraw, + hostAggregateMetrics.totalFailureSlices, + hostAggregateMetrics.totalFailureVmSlices, + provisionerMetrics.vmTotalCount, + provisionerMetrics.vmWaitingCount, + provisionerMetrics.vmInactiveCount, + provisionerMetrics.vmFailedCount, + ) + } + + public data class Result( + public val totalRequestedBurst: Long, + public val totalGrantedBurst: Long, + public val totalOvercommittedBurst: Long, + public val totalInterferedBurst: Long, + public val meanCpuUsage: Double, + public val meanCpuDemand: Double, + public val meanNumDeployedImages: Double, + public val maxNumDeployedImages: Double, + public val totalPowerDraw: Double, + public val totalFailureSlices: Long, + public val totalFailureVmSlices: Long, + public val totalVmsSubmitted: Int, + public val totalVmsQueued: Int, + public val totalVmsFinished: Int, + public val totalVmsFailed: Int + ) +} diff --git a/opendc-web/opendc-web-runner/src/main/resources/log4j2.xml b/opendc-web/opendc-web-runner/src/main/resources/log4j2.xml index 503bc5dc..ad99cc00 100644 --- a/opendc-web/opendc-web-runner/src/main/resources/log4j2.xml +++ b/opendc-web/opendc-web-runner/src/main/resources/log4j2.xml @@ -36,7 +36,7 @@ - + diff --git a/opendc-web/opendc-web-runner/src/test/kotlin/org/opendc/web/client/ApiClientTest.kt b/opendc-web/opendc-web-runner/src/test/kotlin/org/opendc/web/client/ApiClientTest.kt new file mode 100644 index 00000000..3a0730a6 --- /dev/null +++ b/opendc-web/opendc-web-runner/src/test/kotlin/org/opendc/web/client/ApiClientTest.kt @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.client + +import io.ktor.client.* +import io.ktor.client.engine.mock.* +import io.ktor.http.* +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Test +import java.net.URI + +/** + * Test suite for the [ApiClient] class. + */ +class ApiClientTest { + /** + * The Ktor [HttpClient] instance. + */ + private val ktor = HttpClient(MockEngine) { + engine { + addHandler { request -> + when (request.url.fullPath) { + "/oauth/token" -> { + val responseHeaders = headersOf("Content-Type" to listOf(ContentType.Application.Json.toString())) + respond( + """ + { + "access_token": "eyJz93a...k4laUWw", + "token_type": "Bearer", + "expires_in": 86400 + } + """.trimIndent(), + headers = responseHeaders + ) + } + "/portfolios/5fda5daa97dca438e7cb0a4c" -> { + val responseHeaders = headersOf("Content-Type" to listOf(ContentType.Application.Json.toString())) + respond( + """ + { + "data": { + "_id": "string", + "projectId": "string", + "name": "string", + "scenarioIds": [ + "string" + ], + "targets": { + "enabledMetrics": [ + "string" + ], + "repeatsPerScenario": 0 + } + } + } + """.trimIndent(), + headers = responseHeaders + ) + } + "/portfolios/x" -> { + val responseHeaders = headersOf("Content-Type" to listOf(ContentType.Application.Json.toString())) + respond( + """ + { + "message": "Not Found" + } + """.trimIndent(), + headers = responseHeaders, status = HttpStatusCode.NotFound + ) + } + "/scenarios/5fda5db297dca438e7cb0a4d" -> { + val responseHeaders = headersOf("Content-Type" to listOf(ContentType.Application.Json.toString())) + respond( + """ + { + "data": { + "_id": "string", + "portfolioId": "string", + "name": "string", + "trace": { + "traceId": "string", + "loadSamplingFraction": 0 + }, + "topology": { + "topologyId": "string" + }, + "operational": { + "failuresEnabled": true, + "performanceInterferenceEnabled": true, + "schedulerName": "string" + } + } + } + """.trimIndent(), + headers = responseHeaders + ) + } + "/scenarios/x" -> { + val responseHeaders = headersOf("Content-Type" to listOf(ContentType.Application.Json.toString())) + respond( + """ + { + "message": "Not Found" + } + """.trimIndent(), + headers = responseHeaders, status = HttpStatusCode.NotFound + ) + } + "/topologies/5f9825a6cf6e4c24e380b86f" -> { + val responseHeaders = headersOf("Content-Type" to listOf(ContentType.Application.Json.toString())) + respond( + """ + { + "data": { + "_id": "string", + "projectId": "string", + "name": "string", + "rooms": [ + { + "_id": "string", + "name": "string", + "tiles": [ + { + "_id": "string", + "positionX": 0, + "positionY": 0, + "rack": { + "_id": "string", + "name": "string", + "capacity": 0, + "powerCapacityW": 0, + "machines": [ + { + "_id": "string", + "position": 0, + "cpus": [ + { + "_id": "string", + "name": "string", + "clockRateMhz": 0, + "numberOfCores": 0 + } + ], + "gpus": [ + { + "_id": "string", + "name": "string", + "clockRateMhz": 0, + "numberOfCores": 0 + } + ], + "memories": [ + { + "_id": "string", + "name": "string", + "speedMbPerS": 0, + "sizeMb": 0 + } + ], + "storages": [ + { + "_id": "string", + "name": "string", + "speedMbPerS": 0, + "sizeMb": 0 + } + ] + } + ] + } + } + ] + } + ] + } + } + """.trimIndent(), + headers = responseHeaders + ) + } + "/topologies/x" -> { + val responseHeaders = + headersOf("Content-Type" to listOf(ContentType.Application.Json.toString())) + respond( + """ + { + "message": "Not Found" + } + """.trimIndent(), + headers = responseHeaders, status = HttpStatusCode.NotFound + ) + } + else -> error("Unhandled ${request.url}") + } + } + } + } + + private val auth = AuthConfiguration("auth.opendc.org", "a", "b") + + @Test + fun testPortfolioExists(): Unit = runBlocking { + val client = ApiClient(URI("http://localhost:8081"), auth, client = ktor) + val portfolio = client.getPortfolio("5fda5daa97dca438e7cb0a4c") + assertNotNull(portfolio) + } + + @Test + fun testPortfolioDoesNotExists(): Unit = runBlocking { + val client = ApiClient(URI("http://localhost:8081"), auth, client = ktor) + val portfolio = client.getPortfolio("x") + assertNull(portfolio) + } + + @Test + fun testScenarioExists(): Unit = runBlocking { + val client = ApiClient(URI("http://localhost:8081"), auth, client = ktor) + val scenario = client.getScenario("5fda5db297dca438e7cb0a4d") + assertNotNull(scenario) + } + + @Test + fun testScenarioDoesNotExists(): Unit = runBlocking { + val client = ApiClient(URI("http://localhost:8081"), auth, client = ktor) + val scenario = client.getScenario("x") + assertNull(scenario) + } + + @Test + fun testTopologyExists(): Unit = runBlocking { + val client = ApiClient(URI("http://localhost:8081"), auth, client = ktor) + val topology = client.getTopology("5f9825a6cf6e4c24e380b86f") + assertNotNull(topology) + } + + @Test + fun testTopologyDoesNotExists(): Unit = runBlocking { + val client = ApiClient(URI("http://localhost:8081"), auth, client = ktor) + val topology = client.getTopology("x") + assertNull(topology) + } +} -- cgit v1.2.3 From e200dbfdc076ac6263c9ac6f9dabdcc475f01d6e Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sat, 10 Jul 2021 12:41:00 +0200 Subject: fix(ui): Relax topology schema requirements This change fixes an issue where the topology generated by the frontend was not accepted by the API server. --- .../src/main/kotlin/org/opendc/web/client/model/Machine.kt | 3 ++- .../src/main/kotlin/org/opendc/web/client/model/Room.kt | 3 ++- .../src/main/kotlin/org/opendc/web/client/model/RoomTile.kt | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Machine.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Machine.kt index c6757c5c..86d2d46f 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Machine.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Machine.kt @@ -38,5 +38,6 @@ public data class Machine( @JsonProperty("memories") val memory: List = emptyList(), @JsonProperty("storages") - val storage: List = emptyList() + val storage: List = emptyList(), + val rackId: String? = null ) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Room.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Room.kt index e961d6db..f1b8f946 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Room.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/Room.kt @@ -33,5 +33,6 @@ public data class Room( @JsonProperty("_id") val id: String, val name: String, - val tiles: Set + val tiles: Set, + val topologyId: String? = null, ) diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/RoomTile.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/RoomTile.kt index 3bee3204..0b956262 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/RoomTile.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/client/model/RoomTile.kt @@ -34,5 +34,6 @@ public data class RoomTile( val id: String, val positionX: Double, val positionY: Double, - val rack: Rack? = null + val rack: Rack? = null, + val roomId: String? = null, ) -- cgit v1.2.3 From b8f64c1d3df2c990df8941cd036222fab2def9fa Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Sun, 22 Aug 2021 13:23:53 +0200 Subject: refactor(compute): Update FilterScheduler to follow OpenStack's Nova This change updates the FilterScheduler implementation to follow more closely the scheduler implementation in OpenStack's Nova. We now normalize the weights, support many of the filters and weights in OpenStack and support overcommitting resources. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 44 +--------------------- 1 file changed, 1 insertion(+), 43 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 5b5ef802..c5f5cd03 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -32,9 +32,6 @@ import io.opentelemetry.sdk.metrics.export.MetricProducer import kotlinx.coroutines.* import kotlinx.coroutines.channels.Channel import mu.KotlinLogging -import org.opendc.compute.service.scheduler.FilterScheduler -import org.opendc.compute.service.scheduler.filters.ComputeCapabilitiesFilter -import org.opendc.compute.service.scheduler.filters.ComputeFilter import org.opendc.compute.service.scheduler.weights.* import org.opendc.experiments.capelin.* import org.opendc.experiments.capelin.model.Workload @@ -199,46 +196,7 @@ class RunnerCli : CliktCommand(name = "runner") { val metricProducer = meterProvider as MetricProducer val operational = scenario.operationalPhenomena - val allocationPolicy = - when (val policyName = operational.schedulerName) { - "mem" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(MemoryWeigher() to -1.0) - ) - "mem-inv" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(MemoryWeigher() to 1.0) - ) - "core-mem" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(CoreMemoryWeigher() to -1.0) - ) - "core-mem-inv" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(CoreMemoryWeigher() to 1.0) - ) - "active-servers" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(ProvisionedCoresWeigher() to -1.0) - ) - "active-servers-inv" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(InstanceCountWeigher() to 1.0) - ) - "provisioned-cores" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(ProvisionedCoresWeigher() to -1.0) - ) - "provisioned-cores-inv" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(ProvisionedCoresWeigher() to 1.0) - ) - "random" -> FilterScheduler( - filters = listOf(ComputeFilter(), ComputeCapabilitiesFilter()), - weighers = listOf(RandomWeigher(java.util.Random(seeder.nextLong())) to 1.0) - ) - else -> throw IllegalArgumentException("Unknown policy $policyName") - } + val allocationPolicy = createComputeScheduler(operational.schedulerName, seeder) val trace = ParquetTraceReader( listOf(traceReader), -- cgit v1.2.3 From f111081627280d4e7e1d7147c56cdce708e32433 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 25 Aug 2021 14:06:39 +0200 Subject: build: Upgrade to OpenTelemetry 1.5 This change upgrades the OpenTelemetry dependency to version 1.5, which contains various breaking changes in the metrics API. --- .../src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt index d4445810..140f067a 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt @@ -45,10 +45,10 @@ public class WebExperimentMonitor : ExperimentMonitor { override fun reportHostSlice( time: Long, - requestedBurst: Long, - grantedBurst: Long, - overcommissionedBurst: Long, - interferedBurst: Long, + requestedBurst: Double, + grantedBurst: Double, + overcommissionedBurst: Double, + interferedBurst: Double, cpuUsage: Double, cpuDemand: Double, powerDraw: Double, -- cgit v1.2.3 From bb6066e1cecc55a50ac29da200bf3beba1ddd80b Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 25 Aug 2021 18:16:20 +0200 Subject: fix(capelin): Eliminate unnecessary double to long conversions This change eliminates the unnecessary conversions from double to long in the Capelin metric processing code. --- .../org/opendc/web/runner/WebExperimentMonitor.kt | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt index 140f067a..82e2a334 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt @@ -43,16 +43,16 @@ public class WebExperimentMonitor : ExperimentMonitor { logger.debug { "Host ${host.uid} changed state $newState [$time]" } } - override fun reportHostSlice( + override fun reportHostData( time: Long, - requestedBurst: Double, - grantedBurst: Double, - overcommissionedBurst: Double, - interferedBurst: Double, + totalWork: Double, + grantedWork: Double, + overcommittedWork: Double, + interferedWork: Double, cpuUsage: Double, cpuDemand: Double, powerDraw: Double, - numberOfDeployedImages: Int, + instanceCount: Int, host: Host, ) { processHostEvent( @@ -60,11 +60,11 @@ public class WebExperimentMonitor : ExperimentMonitor { time, 5 * 60 * 1000L, host, - numberOfDeployedImages, - requestedBurst, - grantedBurst, - overcommissionedBurst, - interferedBurst, + instanceCount, + totalWork.toLong(), + grantedWork.toLong(), + overcommittedWork.toLong(), + interferedWork.toLong(), cpuUsage, cpuDemand, powerDraw, @@ -120,7 +120,7 @@ public class WebExperimentMonitor : ExperimentMonitor { private var provisionerMetrics: AggregateProvisionerMetrics = AggregateProvisionerMetrics() - override fun reportProvisionerMetrics( + override fun reportServiceData( time: Long, totalHostCount: Int, availableHostCount: Int, -- cgit v1.2.3 From 9fcce6ade8714f7f0a9073fe5b7ddd3f0b35c375 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 31 Aug 2021 15:44:47 +0200 Subject: refactor(format): Remove environment reader from format library This change removes the environment reader from the format library since they are highly specific for the particular experiment. In the future, we hope to have a single format to setup the entire datacenter (perhaps similar to the format used by the web runner). --- .../opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index c5f5cd03..53d50357 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -34,12 +34,12 @@ import kotlinx.coroutines.channels.Channel import mu.KotlinLogging import org.opendc.compute.service.scheduler.weights.* import org.opendc.experiments.capelin.* +import org.opendc.experiments.capelin.env.EnvironmentReader +import org.opendc.experiments.capelin.env.MachineDef import org.opendc.experiments.capelin.model.Workload import org.opendc.experiments.capelin.trace.ParquetTraceReader import org.opendc.experiments.capelin.trace.PerformanceInterferenceReader import org.opendc.experiments.capelin.trace.RawParquetTraceReader -import org.opendc.format.environment.EnvironmentReader -import org.opendc.format.environment.MachineDef import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit -- cgit v1.2.3 From befec2f1ddf3a6e6d15d9d1b9fd1ecbbc4f38960 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 26 Aug 2021 10:34:18 +0200 Subject: feat(capelin): Report up/downtime metrics in experiment monitor --- .../src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt | 2 ++ 1 file changed, 2 insertions(+) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt index 82e2a334..281c8dbb 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt @@ -53,6 +53,8 @@ public class WebExperimentMonitor : ExperimentMonitor { cpuDemand: Double, powerDraw: Double, instanceCount: Int, + uptime: Long, + downtime: Long, host: Host, ) { processHostEvent( -- cgit v1.2.3 From aaedd4f3eed83d0c3ebc829fec08a1749a2bfba4 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 27 Aug 2021 16:41:55 +0200 Subject: refactor(capelin): Move metric collection outside Capelin code This change moves the metric collection outside the Capelin codebase in a separate module so other modules can also benefit from the compute metric collection code. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 20 ++- .../org/opendc/web/runner/ScenarioManager.kt | 10 +- .../org/opendc/web/runner/WebComputeMonitor.kt | 145 ++++++++++++++++ .../org/opendc/web/runner/WebExperimentMonitor.kt | 191 --------------------- 4 files changed, 164 insertions(+), 202 deletions(-) create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt delete mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 53d50357..65527141 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -47,6 +47,8 @@ import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit import org.opendc.simulator.compute.power.LinearPowerModel import org.opendc.simulator.core.runBlockingSimulation +import org.opendc.telemetry.compute.collectServiceMetrics +import org.opendc.telemetry.compute.withMonitor import org.opendc.telemetry.sdk.toOtelClock import org.opendc.web.client.ApiClient import org.opendc.web.client.AuthConfiguration @@ -131,7 +133,7 @@ class RunnerCli : CliktCommand(name = "runner") { /** * Run a single scenario. */ - private suspend fun runScenario(portfolio: ClientPortfolio, scenario: Scenario, environment: EnvironmentReader): List { + private suspend fun runScenario(portfolio: ClientPortfolio, scenario: Scenario, environment: EnvironmentReader): List { val id = scenario.id logger.info { "Constructing performance interference model" } @@ -176,8 +178,8 @@ class RunnerCli : CliktCommand(name = "runner") { environment: EnvironmentReader, traceReader: RawParquetTraceReader, interferenceModel: VmInterferenceModel? - ): WebExperimentMonitor.Result { - val monitor = WebExperimentMonitor() + ): WebComputeMonitor.Result { + val monitor = WebComputeMonitor() try { runBlockingSimulation { @@ -220,7 +222,7 @@ class RunnerCli : CliktCommand(name = "runner") { null } - withMonitor(monitor, clock, meterProvider as MetricProducer, scheduler) { + withMonitor(scheduler, clock, meterProvider as MetricProducer, monitor) { processTrace( clock, trace, @@ -233,8 +235,14 @@ class RunnerCli : CliktCommand(name = "runner") { failureDomain?.cancel() } - val monitorResults = collectMetrics(metricProducer) - logger.debug { "Finish SUBMIT=${monitorResults.submittedVms} FAIL=${monitorResults.unscheduledVms} QUEUE=${monitorResults.queuedVms} RUNNING=${monitorResults.runningVms}" } + val monitorResults = collectServiceMetrics(clock.millis(), metricProducer) + logger.debug { + "Finish " + + "SUBMIT=${monitorResults.instanceCount} " + + "FAIL=${monitorResults.failedInstanceCount} " + + "QUEUE=${monitorResults.queuedInstanceCount} " + + "RUNNING=${monitorResults.runningInstanceCount}" + } } } catch (cause: Throwable) { logger.warn(cause) { "Experiment failed" } diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt index 4044cec9..e0e3488f 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt @@ -61,14 +61,14 @@ public class ScenarioManager(private val client: ApiClient) { /** * Persist the specified results. */ - public suspend fun finish(id: String, results: List) { + public suspend fun finish(id: String, results: List) { client.updateJob( id, SimulationState.FINISHED, mapOf( - "total_requested_burst" to results.map { it.totalRequestedBurst }, - "total_granted_burst" to results.map { it.totalGrantedBurst }, - "total_overcommitted_burst" to results.map { it.totalOvercommittedBurst }, - "total_interfered_burst" to results.map { it.totalInterferedBurst }, + "total_requested_burst" to results.map { it.totalWork }, + "total_granted_burst" to results.map { it.totalGrantedWork }, + "total_overcommitted_burst" to results.map { it.totalOvercommittedWork }, + "total_interfered_burst" to results.map { it.totalInterferedWork }, "mean_cpu_usage" to results.map { it.meanCpuUsage }, "mean_cpu_demand" to results.map { it.meanCpuDemand }, "mean_num_deployed_images" to results.map { it.meanNumDeployedImages }, diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt new file mode 100644 index 00000000..c8e58dde --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.runner + +import mu.KotlinLogging +import org.opendc.compute.service.driver.Host +import org.opendc.compute.service.driver.HostState +import org.opendc.telemetry.compute.ComputeMonitor +import org.opendc.telemetry.compute.table.HostData +import org.opendc.telemetry.compute.table.ServiceData +import kotlin.math.max + +/** + * A [ComputeMonitor] that tracks the aggregate metrics for each repeat. + */ +public class WebComputeMonitor : ComputeMonitor { + private val logger = KotlinLogging.logger {} + + override fun onStateChange(time: Long, host: Host, newState: HostState) { + logger.debug { "Host ${host.uid} changed state $newState [$time]" } + } + + override fun record(data: HostData) { + val duration = 5 * 60 * 1000L + val slices = duration / SLICE_LENGTH + + hostAggregateMetrics = AggregateHostMetrics( + hostAggregateMetrics.totalWork + data.totalWork, + hostAggregateMetrics.totalGrantedWork + data.grantedWork, + hostAggregateMetrics.totalOvercommittedWork + data.overcommittedWork, + hostAggregateMetrics.totalInterferedWork + data.overcommittedWork, + hostAggregateMetrics.totalPowerDraw + (duration * data.powerDraw) / 3600, + hostAggregateMetrics.totalFailureSlices + if (data.host.state != HostState.UP) slices else 0, + hostAggregateMetrics.totalFailureVmSlices + if (data.host.state != HostState.UP) data.instanceCount * slices else 0 + ) + + hostMetrics.compute(data.host) { _, prev -> + HostMetrics( + (data.cpuUsage.takeIf { data.host.state == HostState.UP } ?: 0.0) + (prev?.cpuUsage ?: 0.0), + (data.cpuDemand.takeIf { data.host.state == HostState.UP } ?: 0.0) + (prev?.cpuDemand ?: 0.0), + data.instanceCount + (prev?.instanceCount ?: 0), + 1 + (prev?.count ?: 0) + ) + } + } + + private var hostAggregateMetrics: AggregateHostMetrics = AggregateHostMetrics() + private val hostMetrics: MutableMap = mutableMapOf() + private val SLICE_LENGTH: Long = 5 * 60 * 1000 + + data class AggregateHostMetrics( + val totalWork: Double = 0.0, + val totalGrantedWork: Double = 0.0, + val totalOvercommittedWork: Double = 0.0, + val totalInterferedWork: Double = 0.0, + val totalPowerDraw: Double = 0.0, + val totalFailureSlices: Long = 0, + val totalFailureVmSlices: Long = 0, + ) + + data class HostMetrics( + val cpuUsage: Double, + val cpuDemand: Double, + val instanceCount: Long, + val count: Long + ) + + private var serviceMetrics: AggregateServiceMetrics = AggregateServiceMetrics() + + override fun record(data: ServiceData) { + serviceMetrics = AggregateServiceMetrics( + max(data.instanceCount, serviceMetrics.vmTotalCount), + max(data.queuedInstanceCount, serviceMetrics.vmWaitingCount), + max(data.runningInstanceCount, serviceMetrics.vmActiveCount), + max(data.finishedInstanceCount, serviceMetrics.vmInactiveCount), + max(data.failedInstanceCount, serviceMetrics.vmFailedCount), + ) + } + + public data class AggregateServiceMetrics( + val vmTotalCount: Int = 0, + val vmWaitingCount: Int = 0, + val vmActiveCount: Int = 0, + val vmInactiveCount: Int = 0, + val vmFailedCount: Int = 0 + ) + + public fun getResult(): Result { + return Result( + hostAggregateMetrics.totalWork, + hostAggregateMetrics.totalGrantedWork, + hostAggregateMetrics.totalOvercommittedWork, + hostAggregateMetrics.totalInterferedWork, + hostMetrics.map { it.value.cpuUsage / it.value.count }.average(), + hostMetrics.map { it.value.cpuDemand / it.value.count }.average(), + hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.average(), + hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.maxOrNull() ?: 0.0, + hostAggregateMetrics.totalPowerDraw, + hostAggregateMetrics.totalFailureSlices, + hostAggregateMetrics.totalFailureVmSlices, + serviceMetrics.vmTotalCount, + serviceMetrics.vmWaitingCount, + serviceMetrics.vmInactiveCount, + serviceMetrics.vmFailedCount, + ) + } + + data class Result( + val totalWork: Double, + val totalGrantedWork: Double, + val totalOvercommittedWork: Double, + val totalInterferedWork: Double, + val meanCpuUsage: Double, + val meanCpuDemand: Double, + val meanNumDeployedImages: Double, + val maxNumDeployedImages: Double, + val totalPowerDraw: Double, + val totalFailureSlices: Long, + val totalFailureVmSlices: Long, + val totalVmsSubmitted: Int, + val totalVmsQueued: Int, + val totalVmsFinished: Int, + val totalVmsFailed: Int + ) +} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt deleted file mode 100644 index 281c8dbb..00000000 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebExperimentMonitor.kt +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2021 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.web.runner - -import mu.KotlinLogging -import org.opendc.compute.api.Server -import org.opendc.compute.api.ServerState -import org.opendc.compute.service.driver.Host -import org.opendc.compute.service.driver.HostState -import org.opendc.experiments.capelin.monitor.ExperimentMonitor -import org.opendc.experiments.capelin.telemetry.HostEvent -import kotlin.math.max - -/** - * An [ExperimentMonitor] that tracks the aggregate metrics for each repeat. - */ -public class WebExperimentMonitor : ExperimentMonitor { - private val logger = KotlinLogging.logger {} - - override fun reportVmStateChange(time: Long, server: Server, newState: ServerState) {} - - override fun reportHostStateChange(time: Long, host: Host, newState: HostState) { - logger.debug { "Host ${host.uid} changed state $newState [$time]" } - } - - override fun reportHostData( - time: Long, - totalWork: Double, - grantedWork: Double, - overcommittedWork: Double, - interferedWork: Double, - cpuUsage: Double, - cpuDemand: Double, - powerDraw: Double, - instanceCount: Int, - uptime: Long, - downtime: Long, - host: Host, - ) { - processHostEvent( - HostEvent( - time, - 5 * 60 * 1000L, - host, - instanceCount, - totalWork.toLong(), - grantedWork.toLong(), - overcommittedWork.toLong(), - interferedWork.toLong(), - cpuUsage, - cpuDemand, - powerDraw, - host.model.cpuCount - ) - ) - } - - private var hostAggregateMetrics: AggregateHostMetrics = AggregateHostMetrics() - private val hostMetrics: MutableMap = mutableMapOf() - - private fun processHostEvent(event: HostEvent) { - val slices = event.duration / SLICE_LENGTH - - hostAggregateMetrics = AggregateHostMetrics( - hostAggregateMetrics.totalRequestedBurst + event.requestedBurst, - hostAggregateMetrics.totalGrantedBurst + event.grantedBurst, - hostAggregateMetrics.totalOvercommittedBurst + event.overcommissionedBurst, - hostAggregateMetrics.totalInterferedBurst + event.interferedBurst, - hostAggregateMetrics.totalPowerDraw + (event.duration * event.powerDraw) / 3600, - hostAggregateMetrics.totalFailureSlices + if (event.host.state != HostState.UP) slices else 0, - hostAggregateMetrics.totalFailureVmSlices + if (event.host.state != HostState.UP) event.vmCount * slices else 0 - ) - - hostMetrics.compute(event.host) { _, prev -> - HostMetrics( - (event.cpuUsage.takeIf { event.host.state == HostState.UP } ?: 0.0) + (prev?.cpuUsage ?: 0.0), - (event.cpuDemand.takeIf { event.host.state == HostState.UP } ?: 0.0) + (prev?.cpuDemand ?: 0.0), - event.vmCount + (prev?.vmCount ?: 0), - 1 + (prev?.count ?: 0) - ) - } - } - - private val SLICE_LENGTH: Long = 5 * 60 * 1000 - - public data class AggregateHostMetrics( - val totalRequestedBurst: Long = 0, - val totalGrantedBurst: Long = 0, - val totalOvercommittedBurst: Long = 0, - val totalInterferedBurst: Long = 0, - val totalPowerDraw: Double = 0.0, - val totalFailureSlices: Long = 0, - val totalFailureVmSlices: Long = 0, - ) - - public data class HostMetrics( - val cpuUsage: Double, - val cpuDemand: Double, - val vmCount: Long, - val count: Long - ) - - private var provisionerMetrics: AggregateProvisionerMetrics = AggregateProvisionerMetrics() - - override fun reportServiceData( - time: Long, - totalHostCount: Int, - availableHostCount: Int, - totalVmCount: Int, - activeVmCount: Int, - inactiveVmCount: Int, - waitingVmCount: Int, - failedVmCount: Int - ) { - provisionerMetrics = AggregateProvisionerMetrics( - max(totalVmCount, provisionerMetrics.vmTotalCount), - max(waitingVmCount, provisionerMetrics.vmWaitingCount), - max(activeVmCount, provisionerMetrics.vmActiveCount), - max(inactiveVmCount, provisionerMetrics.vmInactiveCount), - max(failedVmCount, provisionerMetrics.vmFailedCount), - ) - } - - public data class AggregateProvisionerMetrics( - val vmTotalCount: Int = 0, - val vmWaitingCount: Int = 0, - val vmActiveCount: Int = 0, - val vmInactiveCount: Int = 0, - val vmFailedCount: Int = 0 - ) - - override fun close() {} - - public fun getResult(): Result { - return Result( - hostAggregateMetrics.totalRequestedBurst, - hostAggregateMetrics.totalGrantedBurst, - hostAggregateMetrics.totalOvercommittedBurst, - hostAggregateMetrics.totalInterferedBurst, - hostMetrics.map { it.value.cpuUsage / it.value.count }.average(), - hostMetrics.map { it.value.cpuDemand / it.value.count }.average(), - hostMetrics.map { it.value.vmCount.toDouble() / it.value.count }.average(), - hostMetrics.map { it.value.vmCount.toDouble() / it.value.count }.maxOrNull() ?: 0.0, - hostAggregateMetrics.totalPowerDraw, - hostAggregateMetrics.totalFailureSlices, - hostAggregateMetrics.totalFailureVmSlices, - provisionerMetrics.vmTotalCount, - provisionerMetrics.vmWaitingCount, - provisionerMetrics.vmInactiveCount, - provisionerMetrics.vmFailedCount, - ) - } - - public data class Result( - public val totalRequestedBurst: Long, - public val totalGrantedBurst: Long, - public val totalOvercommittedBurst: Long, - public val totalInterferedBurst: Long, - public val meanCpuUsage: Double, - public val meanCpuDemand: Double, - public val meanNumDeployedImages: Double, - public val maxNumDeployedImages: Double, - public val totalPowerDraw: Double, - public val totalFailureSlices: Long, - public val totalFailureVmSlices: Long, - public val totalVmsSubmitted: Int, - public val totalVmsQueued: Int, - public val totalVmsFinished: Int, - public val totalVmsFailed: Int - ) -} -- cgit v1.2.3 From 18ff316a6b6ab984ebf8283ea48ed98ec69d8295 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 2 Sep 2021 13:20:05 +0200 Subject: refactor(capelin): Restructure input reading classes --- .../opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 65527141..5d481270 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -32,7 +32,6 @@ import io.opentelemetry.sdk.metrics.export.MetricProducer import kotlinx.coroutines.* import kotlinx.coroutines.channels.Channel import mu.KotlinLogging -import org.opendc.compute.service.scheduler.weights.* import org.opendc.experiments.capelin.* import org.opendc.experiments.capelin.env.EnvironmentReader import org.opendc.experiments.capelin.env.MachineDef @@ -152,7 +151,7 @@ class RunnerCli : CliktCommand(name = "runner") { return@let null } - PerformanceInterferenceReader(path.inputStream()).use { reader -> reader.read() } + PerformanceInterferenceReader().read(path.inputStream()) } val targets = portfolio.targets -- cgit v1.2.3 From d24cc0cc9c4fe2145f0337d65e9a75f631365973 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 10 Sep 2021 10:59:44 +0200 Subject: refactor(compute): Integrate fault injection into compute simulator This change moves the fault injection logic directly into the opendc-compute-simulator module, so that it can operate at a higher abstraction. In the future, we might again split the module if we can re-use some of its logic. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 5d481270..b565e90d 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -30,8 +30,8 @@ import io.opentelemetry.api.metrics.MeterProvider import io.opentelemetry.sdk.metrics.SdkMeterProvider import io.opentelemetry.sdk.metrics.export.MetricProducer import kotlinx.coroutines.* -import kotlinx.coroutines.channels.Channel import mu.KotlinLogging +import org.opendc.compute.simulator.SimHost import org.opendc.experiments.capelin.* import org.opendc.experiments.capelin.env.EnvironmentReader import org.opendc.experiments.capelin.env.MachineDef @@ -188,8 +188,6 @@ class RunnerCli : CliktCommand(name = "runner") { val seeder = Random(seed) - val chan = Channel(Channel.CONFLATED) - val meterProvider: MeterProvider = SdkMeterProvider .builder() .setClock(clock.toOtelClock()) @@ -207,31 +205,31 @@ class RunnerCli : CliktCommand(name = "runner") { val failureFrequency = if (operational.failuresEnabled) 24.0 * 7 else 0.0 withComputeService(clock, meterProvider, environment, allocationPolicy, interferenceModel) { scheduler -> - val failureDomain = if (failureFrequency > 0) { + val faultInjector = if (failureFrequency > 0) { logger.debug { "ENABLING failures" } - createFailureDomain( - this, + createFaultInjector( + coroutineContext, clock, + scheduler.hosts.map { it as SimHost }.toSet(), seeder.nextInt(), failureFrequency, - scheduler, - chan ) } else { null } withMonitor(scheduler, clock, meterProvider as MetricProducer, monitor) { + faultInjector?.start() + processTrace( clock, trace, scheduler, - chan, monitor ) - } - failureDomain?.cancel() + faultInjector?.close() + } } val monitorResults = collectServiceMetrics(clock.millis(), metricProducer) -- cgit v1.2.3 From 3ca64e0110adab65526a0ccfd5b252e9f047ab10 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 14 Sep 2021 14:41:05 +0200 Subject: refactor(telemetry): Create separate MeterProvider per service/host This change refactors the telemetry implementation by creating a separate MeterProvider per service or host. This means we have to keep track of multiple metric producers, but that we can attach resource information to each of the MeterProviders like we would in a real world scenario. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 72 ++++++++++------------ .../org/opendc/web/runner/WebComputeMonitor.kt | 40 +++++------- 2 files changed, 48 insertions(+), 64 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index b565e90d..b9d5a3f5 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -28,10 +28,8 @@ import com.github.ajalt.clikt.parameters.types.file import com.github.ajalt.clikt.parameters.types.long import io.opentelemetry.api.metrics.MeterProvider import io.opentelemetry.sdk.metrics.SdkMeterProvider -import io.opentelemetry.sdk.metrics.export.MetricProducer import kotlinx.coroutines.* import mu.KotlinLogging -import org.opendc.compute.simulator.SimHost import org.opendc.experiments.capelin.* import org.opendc.experiments.capelin.env.EnvironmentReader import org.opendc.experiments.capelin.env.MachineDef @@ -39,6 +37,8 @@ import org.opendc.experiments.capelin.model.Workload import org.opendc.experiments.capelin.trace.ParquetTraceReader import org.opendc.experiments.capelin.trace.PerformanceInterferenceReader import org.opendc.experiments.capelin.trace.RawParquetTraceReader +import org.opendc.experiments.capelin.util.ComputeServiceSimulator +import org.opendc.experiments.capelin.util.createComputeScheduler import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit @@ -46,8 +46,9 @@ import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit import org.opendc.simulator.compute.power.LinearPowerModel import org.opendc.simulator.core.runBlockingSimulation +import org.opendc.telemetry.compute.ComputeMetricExporter import org.opendc.telemetry.compute.collectServiceMetrics -import org.opendc.telemetry.compute.withMonitor +import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader import org.opendc.telemetry.sdk.toOtelClock import org.opendc.web.client.ApiClient import org.opendc.web.client.AuthConfiguration @@ -55,9 +56,8 @@ import org.opendc.web.client.model.Scenario import org.opendc.web.client.model.Topology import java.io.File import java.net.URI +import java.time.Duration import java.util.* -import kotlin.random.Random -import kotlin.random.asJavaRandom import org.opendc.web.client.model.Portfolio as ClientPortfolio private val logger = KotlinLogging.logger {} @@ -158,7 +158,7 @@ class RunnerCli : CliktCommand(name = "runner") { val results = (0 until targets.repeatsPerScenario).map { repeat -> logger.info { "Starting repeat $repeat" } withTimeout(runTimeout * 1000) { - val interferenceModel = interferenceGroups?.let { VmInterferenceModel(it, Random(repeat.toLong()).asJavaRandom()) } + val interferenceModel = interferenceGroups?.let { VmInterferenceModel(it, Random(repeat.toLong())) } runRepeat(scenario, repeat, environment, traceReader, interferenceModel) } } @@ -182,63 +182,55 @@ class RunnerCli : CliktCommand(name = "runner") { try { runBlockingSimulation { - val seed = repeat val workloadName = scenario.trace.traceId val workloadFraction = scenario.trace.loadSamplingFraction - val seeder = Random(seed) + val seeder = Random(repeat.toLong()) val meterProvider: MeterProvider = SdkMeterProvider .builder() .setClock(clock.toOtelClock()) .build() - val metricProducer = meterProvider as MetricProducer val operational = scenario.operationalPhenomena - val allocationPolicy = createComputeScheduler(operational.schedulerName, seeder) + val computeScheduler = createComputeScheduler(operational.schedulerName, seeder) val trace = ParquetTraceReader( listOf(traceReader), Workload(workloadName, workloadFraction), - seed + repeat ) - val failureFrequency = if (operational.failuresEnabled) 24.0 * 7 else 0.0 - - withComputeService(clock, meterProvider, environment, allocationPolicy, interferenceModel) { scheduler -> - val faultInjector = if (failureFrequency > 0) { - logger.debug { "ENABLING failures" } - createFaultInjector( - coroutineContext, - clock, - scheduler.hosts.map { it as SimHost }.toSet(), - seeder.nextInt(), - failureFrequency, - ) - } else { + val failureModel = + if (operational.failuresEnabled) + grid5000(Duration.ofDays(7), repeat) + else null - } - withMonitor(scheduler, clock, meterProvider as MetricProducer, monitor) { - faultInjector?.start() + val simulator = ComputeServiceSimulator( + coroutineContext, + clock, + computeScheduler, + environment.read(), + failureModel, + interferenceModel.takeIf { operational.performanceInterferenceEnabled } + ) - processTrace( - clock, - trace, - scheduler, - monitor - ) + val metricReader = CoroutineMetricReader(this, simulator.producers, ComputeMetricExporter(clock, monitor)) - faultInjector?.close() - } + try { + simulator.run(trace) + } finally { + simulator.close() + metricReader.close() } - val monitorResults = collectServiceMetrics(clock.millis(), metricProducer) + val serviceMetrics = collectServiceMetrics(clock.millis(), simulator.producers[0]) logger.debug { "Finish " + - "SUBMIT=${monitorResults.instanceCount} " + - "FAIL=${monitorResults.failedInstanceCount} " + - "QUEUE=${monitorResults.queuedInstanceCount} " + - "RUNNING=${monitorResults.runningInstanceCount}" + "SUBMIT=${serviceMetrics.instanceCount} " + + "FAIL=${serviceMetrics.failedInstanceCount} " + + "QUEUE=${serviceMetrics.queuedInstanceCount} " + + "RUNNING=${serviceMetrics.runningInstanceCount}" } } } catch (cause: Throwable) { diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt index c8e58dde..4b813310 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt @@ -22,27 +22,19 @@ package org.opendc.web.runner -import mu.KotlinLogging -import org.opendc.compute.service.driver.Host -import org.opendc.compute.service.driver.HostState import org.opendc.telemetry.compute.ComputeMonitor import org.opendc.telemetry.compute.table.HostData import org.opendc.telemetry.compute.table.ServiceData import kotlin.math.max +import kotlin.math.roundToLong /** * A [ComputeMonitor] that tracks the aggregate metrics for each repeat. */ -public class WebComputeMonitor : ComputeMonitor { - private val logger = KotlinLogging.logger {} - - override fun onStateChange(time: Long, host: Host, newState: HostState) { - logger.debug { "Host ${host.uid} changed state $newState [$time]" } - } - +class WebComputeMonitor : ComputeMonitor { override fun record(data: HostData) { - val duration = 5 * 60 * 1000L - val slices = duration / SLICE_LENGTH + val duration = data.uptime + val slices = data.downtime / SLICE_LENGTH hostAggregateMetrics = AggregateHostMetrics( hostAggregateMetrics.totalWork + data.totalWork, @@ -50,14 +42,14 @@ public class WebComputeMonitor : ComputeMonitor { hostAggregateMetrics.totalOvercommittedWork + data.overcommittedWork, hostAggregateMetrics.totalInterferedWork + data.overcommittedWork, hostAggregateMetrics.totalPowerDraw + (duration * data.powerDraw) / 3600, - hostAggregateMetrics.totalFailureSlices + if (data.host.state != HostState.UP) slices else 0, - hostAggregateMetrics.totalFailureVmSlices + if (data.host.state != HostState.UP) data.instanceCount * slices else 0 + hostAggregateMetrics.totalFailureSlices + slices, + hostAggregateMetrics.totalFailureVmSlices + data.instanceCount * slices ) - hostMetrics.compute(data.host) { _, prev -> + hostMetrics.compute(data.host.id) { _, prev -> HostMetrics( - (data.cpuUsage.takeIf { data.host.state == HostState.UP } ?: 0.0) + (prev?.cpuUsage ?: 0.0), - (data.cpuDemand.takeIf { data.host.state == HostState.UP } ?: 0.0) + (prev?.cpuDemand ?: 0.0), + data.cpuUsage + (prev?.cpuUsage ?: 0.0), + data.cpuDemand + (prev?.cpuDemand ?: 0.0), data.instanceCount + (prev?.instanceCount ?: 0), 1 + (prev?.count ?: 0) ) @@ -65,7 +57,7 @@ public class WebComputeMonitor : ComputeMonitor { } private var hostAggregateMetrics: AggregateHostMetrics = AggregateHostMetrics() - private val hostMetrics: MutableMap = mutableMapOf() + private val hostMetrics: MutableMap = mutableMapOf() private val SLICE_LENGTH: Long = 5 * 60 * 1000 data class AggregateHostMetrics( @@ -74,8 +66,8 @@ public class WebComputeMonitor : ComputeMonitor { val totalOvercommittedWork: Double = 0.0, val totalInterferedWork: Double = 0.0, val totalPowerDraw: Double = 0.0, - val totalFailureSlices: Long = 0, - val totalFailureVmSlices: Long = 0, + val totalFailureSlices: Double = 0.0, + val totalFailureVmSlices: Double = 0.0, ) data class HostMetrics( @@ -97,7 +89,7 @@ public class WebComputeMonitor : ComputeMonitor { ) } - public data class AggregateServiceMetrics( + data class AggregateServiceMetrics( val vmTotalCount: Int = 0, val vmWaitingCount: Int = 0, val vmActiveCount: Int = 0, @@ -105,7 +97,7 @@ public class WebComputeMonitor : ComputeMonitor { val vmFailedCount: Int = 0 ) - public fun getResult(): Result { + fun getResult(): Result { return Result( hostAggregateMetrics.totalWork, hostAggregateMetrics.totalGrantedWork, @@ -116,8 +108,8 @@ public class WebComputeMonitor : ComputeMonitor { hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.average(), hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.maxOrNull() ?: 0.0, hostAggregateMetrics.totalPowerDraw, - hostAggregateMetrics.totalFailureSlices, - hostAggregateMetrics.totalFailureVmSlices, + hostAggregateMetrics.totalFailureSlices.roundToLong(), + hostAggregateMetrics.totalFailureVmSlices.roundToLong(), serviceMetrics.vmTotalCount, serviceMetrics.vmWaitingCount, serviceMetrics.vmInactiveCount, -- cgit v1.2.3 From 8d899e29dbd757f6df320212d6e0d77ce8216ab9 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 14 Sep 2021 15:38:38 +0200 Subject: refactor(telemetry): Standardize compute scheduler metrics This change updates the OpenDC compute service implementation with multiple meters that follow the OpenTelemetry conventions. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 11 ++++++----- .../main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt | 10 +++++----- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index b9d5a3f5..960d5ebd 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -226,11 +226,12 @@ class RunnerCli : CliktCommand(name = "runner") { val serviceMetrics = collectServiceMetrics(clock.millis(), simulator.producers[0]) logger.debug { - "Finish " + - "SUBMIT=${serviceMetrics.instanceCount} " + - "FAIL=${serviceMetrics.failedInstanceCount} " + - "QUEUE=${serviceMetrics.queuedInstanceCount} " + - "RUNNING=${serviceMetrics.runningInstanceCount}" + "Scheduler " + + "Success=${serviceMetrics.attemptsSuccess} " + + "Failure=${serviceMetrics.attemptsFailure} " + + "Error=${serviceMetrics.attemptsError} " + + "Pending=${serviceMetrics.serversPending} " + + "Active=${serviceMetrics.serversActive}" } } } catch (cause: Throwable) { diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt index 4b813310..5f2c474b 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt @@ -81,11 +81,11 @@ class WebComputeMonitor : ComputeMonitor { override fun record(data: ServiceData) { serviceMetrics = AggregateServiceMetrics( - max(data.instanceCount, serviceMetrics.vmTotalCount), - max(data.queuedInstanceCount, serviceMetrics.vmWaitingCount), - max(data.runningInstanceCount, serviceMetrics.vmActiveCount), - max(data.finishedInstanceCount, serviceMetrics.vmInactiveCount), - max(data.failedInstanceCount, serviceMetrics.vmFailedCount), + max(data.attemptsSuccess, serviceMetrics.vmTotalCount), + max(data.serversPending, serviceMetrics.vmWaitingCount), + max(data.serversActive, serviceMetrics.vmActiveCount), + max(0, serviceMetrics.vmInactiveCount), + max(data.attemptsFailure, serviceMetrics.vmFailedCount), ) } -- cgit v1.2.3 From 0d8bccc68705d036fbf60f312d9c34ca4392c6b2 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 7 Sep 2021 17:30:46 +0200 Subject: refactor(telemetry): Standardize SimHost metrics This change standardizes the metrics emitted by SimHost instances and their guests based on the OpenTelemetry semantic conventions. We now also report CPU time as opposed to CPU work as this metric is more commonly used. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 12 ++----- .../org/opendc/web/runner/ScenarioManager.kt | 8 ++--- .../org/opendc/web/runner/WebComputeMonitor.kt | 41 +++++++++++----------- 3 files changed, 26 insertions(+), 35 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 960d5ebd..483558e1 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -26,8 +26,6 @@ import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.file import com.github.ajalt.clikt.parameters.types.long -import io.opentelemetry.api.metrics.MeterProvider -import io.opentelemetry.sdk.metrics.SdkMeterProvider import kotlinx.coroutines.* import mu.KotlinLogging import org.opendc.experiments.capelin.* @@ -49,7 +47,6 @@ import org.opendc.simulator.core.runBlockingSimulation import org.opendc.telemetry.compute.ComputeMetricExporter import org.opendc.telemetry.compute.collectServiceMetrics import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader -import org.opendc.telemetry.sdk.toOtelClock import org.opendc.web.client.ApiClient import org.opendc.web.client.AuthConfiguration import org.opendc.web.client.model.Scenario @@ -187,11 +184,6 @@ class RunnerCli : CliktCommand(name = "runner") { val seeder = Random(repeat.toLong()) - val meterProvider: MeterProvider = SdkMeterProvider - .builder() - .setClock(clock.toOtelClock()) - .build() - val operational = scenario.operationalPhenomena val computeScheduler = createComputeScheduler(operational.schedulerName, seeder) @@ -215,7 +207,7 @@ class RunnerCli : CliktCommand(name = "runner") { interferenceModel.takeIf { operational.performanceInterferenceEnabled } ) - val metricReader = CoroutineMetricReader(this, simulator.producers, ComputeMetricExporter(clock, monitor)) + val metricReader = CoroutineMetricReader(this, simulator.producers, ComputeMetricExporter(clock, monitor), exportInterval = Duration.ofHours(1)) try { simulator.run(trace) @@ -224,7 +216,7 @@ class RunnerCli : CliktCommand(name = "runner") { metricReader.close() } - val serviceMetrics = collectServiceMetrics(clock.millis(), simulator.producers[0]) + val serviceMetrics = collectServiceMetrics(clock.instant(), simulator.producers[0]) logger.debug { "Scheduler " + "Success=${serviceMetrics.attemptsSuccess} " + diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt index e0e3488f..a0c281e8 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt @@ -65,10 +65,10 @@ public class ScenarioManager(private val client: ApiClient) { client.updateJob( id, SimulationState.FINISHED, mapOf( - "total_requested_burst" to results.map { it.totalWork }, - "total_granted_burst" to results.map { it.totalGrantedWork }, - "total_overcommitted_burst" to results.map { it.totalOvercommittedWork }, - "total_interfered_burst" to results.map { it.totalInterferedWork }, + "total_requested_burst" to results.map { it.totalActiveTime + it.totalIdleTime }, + "total_granted_burst" to results.map { it.totalActiveTime }, + "total_overcommitted_burst" to results.map { it.totalStealTime }, + "total_interfered_burst" to results.map { it.totalLostTime }, "mean_cpu_usage" to results.map { it.meanCpuUsage }, "mean_cpu_demand" to results.map { it.meanCpuDemand }, "mean_num_deployed_images" to results.map { it.meanNumDeployedImages }, diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt index 5f2c474b..bb412738 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt @@ -33,24 +33,23 @@ import kotlin.math.roundToLong */ class WebComputeMonitor : ComputeMonitor { override fun record(data: HostData) { - val duration = data.uptime val slices = data.downtime / SLICE_LENGTH hostAggregateMetrics = AggregateHostMetrics( - hostAggregateMetrics.totalWork + data.totalWork, - hostAggregateMetrics.totalGrantedWork + data.grantedWork, - hostAggregateMetrics.totalOvercommittedWork + data.overcommittedWork, - hostAggregateMetrics.totalInterferedWork + data.overcommittedWork, - hostAggregateMetrics.totalPowerDraw + (duration * data.powerDraw) / 3600, + hostAggregateMetrics.totalActiveTime + data.cpuActiveTime, + hostAggregateMetrics.totalIdleTime + data.cpuIdleTime, + hostAggregateMetrics.totalStealTime + data.cpuStealTime, + hostAggregateMetrics.totalLostTime + data.cpuLostTime, + hostAggregateMetrics.totalPowerDraw + data.powerTotal, hostAggregateMetrics.totalFailureSlices + slices, - hostAggregateMetrics.totalFailureVmSlices + data.instanceCount * slices + hostAggregateMetrics.totalFailureVmSlices + data.guestsRunning * slices ) hostMetrics.compute(data.host.id) { _, prev -> HostMetrics( data.cpuUsage + (prev?.cpuUsage ?: 0.0), data.cpuDemand + (prev?.cpuDemand ?: 0.0), - data.instanceCount + (prev?.instanceCount ?: 0), + data.guestsRunning + (prev?.instanceCount ?: 0), 1 + (prev?.count ?: 0) ) } @@ -58,13 +57,13 @@ class WebComputeMonitor : ComputeMonitor { private var hostAggregateMetrics: AggregateHostMetrics = AggregateHostMetrics() private val hostMetrics: MutableMap = mutableMapOf() - private val SLICE_LENGTH: Long = 5 * 60 * 1000 + private val SLICE_LENGTH: Long = 5 * 60 data class AggregateHostMetrics( - val totalWork: Double = 0.0, - val totalGrantedWork: Double = 0.0, - val totalOvercommittedWork: Double = 0.0, - val totalInterferedWork: Double = 0.0, + val totalActiveTime: Long = 0L, + val totalIdleTime: Long = 0L, + val totalStealTime: Long = 0L, + val totalLostTime: Long = 0L, val totalPowerDraw: Double = 0.0, val totalFailureSlices: Double = 0.0, val totalFailureVmSlices: Double = 0.0, @@ -99,10 +98,10 @@ class WebComputeMonitor : ComputeMonitor { fun getResult(): Result { return Result( - hostAggregateMetrics.totalWork, - hostAggregateMetrics.totalGrantedWork, - hostAggregateMetrics.totalOvercommittedWork, - hostAggregateMetrics.totalInterferedWork, + hostAggregateMetrics.totalActiveTime, + hostAggregateMetrics.totalIdleTime, + hostAggregateMetrics.totalStealTime, + hostAggregateMetrics.totalLostTime, hostMetrics.map { it.value.cpuUsage / it.value.count }.average(), hostMetrics.map { it.value.cpuDemand / it.value.count }.average(), hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.average(), @@ -118,10 +117,10 @@ class WebComputeMonitor : ComputeMonitor { } data class Result( - val totalWork: Double, - val totalGrantedWork: Double, - val totalOvercommittedWork: Double, - val totalInterferedWork: Double, + val totalActiveTime: Long, + val totalIdleTime: Long, + val totalStealTime: Long, + val totalLostTime: Long, val meanCpuUsage: Double, val meanCpuDemand: Double, val meanNumDeployedImages: Double, -- cgit v1.2.3 From 859ce303f0b9110c7110b918e5957c2156fa8b26 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 17 Sep 2021 17:48:02 +0200 Subject: refactor(capelin): Extract common code out of Capelin experiments This change creates a new module for doing simulations with virtual machine workloads. We have found that a lot of code in the Capelin experiments code is being re-used by non-experiment modules. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 483558e1..497a7281 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -28,14 +28,14 @@ import com.github.ajalt.clikt.parameters.types.file import com.github.ajalt.clikt.parameters.types.long import kotlinx.coroutines.* import mu.KotlinLogging -import org.opendc.experiments.capelin.* +import org.opendc.compute.workload.ComputeWorkloadRunner +import org.opendc.compute.workload.env.MachineDef +import org.opendc.compute.workload.grid5000 +import org.opendc.compute.workload.trace.RawParquetTraceReader +import org.opendc.compute.workload.util.PerformanceInterferenceReader import org.opendc.experiments.capelin.env.EnvironmentReader -import org.opendc.experiments.capelin.env.MachineDef import org.opendc.experiments.capelin.model.Workload import org.opendc.experiments.capelin.trace.ParquetTraceReader -import org.opendc.experiments.capelin.trace.PerformanceInterferenceReader -import org.opendc.experiments.capelin.trace.RawParquetTraceReader -import org.opendc.experiments.capelin.util.ComputeServiceSimulator import org.opendc.experiments.capelin.util.createComputeScheduler import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel import org.opendc.simulator.compute.model.MachineModel @@ -198,7 +198,7 @@ class RunnerCli : CliktCommand(name = "runner") { else null - val simulator = ComputeServiceSimulator( + val simulator = ComputeWorkloadRunner( coroutineContext, clock, computeScheduler, -- cgit v1.2.3 From b0ece0533825f5cd7983752330847071f4e438c4 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 15 Sep 2021 23:06:08 +0200 Subject: refactor(capelin): Support flexible topology creation This change adds support for creating flexible topologies by creating a TopologyFactory interface that is responsible for configuring the hosts of a compute service. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 116 +++++++++++---------- 1 file changed, 63 insertions(+), 53 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 497a7281..48183d71 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -29,11 +29,12 @@ import com.github.ajalt.clikt.parameters.types.long import kotlinx.coroutines.* import mu.KotlinLogging import org.opendc.compute.workload.ComputeWorkloadRunner -import org.opendc.compute.workload.env.MachineDef import org.opendc.compute.workload.grid5000 +import org.opendc.compute.workload.topology.HostSpec +import org.opendc.compute.workload.topology.Topology +import org.opendc.compute.workload.topology.apply import org.opendc.compute.workload.trace.RawParquetTraceReader import org.opendc.compute.workload.util.PerformanceInterferenceReader -import org.opendc.experiments.capelin.env.EnvironmentReader import org.opendc.experiments.capelin.model.Workload import org.opendc.experiments.capelin.trace.ParquetTraceReader import org.opendc.experiments.capelin.util.createComputeScheduler @@ -43,6 +44,7 @@ import org.opendc.simulator.compute.model.MemoryUnit import org.opendc.simulator.compute.model.ProcessingNode import org.opendc.simulator.compute.model.ProcessingUnit import org.opendc.simulator.compute.power.LinearPowerModel +import org.opendc.simulator.compute.power.SimplePowerDriver import org.opendc.simulator.core.runBlockingSimulation import org.opendc.telemetry.compute.ComputeMetricExporter import org.opendc.telemetry.compute.collectServiceMetrics @@ -50,12 +52,12 @@ import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader import org.opendc.web.client.ApiClient import org.opendc.web.client.AuthConfiguration import org.opendc.web.client.model.Scenario -import org.opendc.web.client.model.Topology import java.io.File import java.net.URI import java.time.Duration import java.util.* import org.opendc.web.client.model.Portfolio as ClientPortfolio +import org.opendc.web.client.model.Topology as ClientTopology private val logger = KotlinLogging.logger {} @@ -129,7 +131,7 @@ class RunnerCli : CliktCommand(name = "runner") { /** * Run a single scenario. */ - private suspend fun runScenario(portfolio: ClientPortfolio, scenario: Scenario, environment: EnvironmentReader): List { + private suspend fun runScenario(portfolio: ClientPortfolio, scenario: Scenario, topology: Topology): List { val id = scenario.id logger.info { "Constructing performance interference model" } @@ -156,7 +158,7 @@ class RunnerCli : CliktCommand(name = "runner") { logger.info { "Starting repeat $repeat" } withTimeout(runTimeout * 1000) { val interferenceModel = interferenceGroups?.let { VmInterferenceModel(it, Random(repeat.toLong())) } - runRepeat(scenario, repeat, environment, traceReader, interferenceModel) + runRepeat(scenario, repeat, topology, traceReader, interferenceModel) } } @@ -171,7 +173,7 @@ class RunnerCli : CliktCommand(name = "runner") { private suspend fun runRepeat( scenario: Scenario, repeat: Int, - environment: EnvironmentReader, + topology: Topology, traceReader: RawParquetTraceReader, interferenceModel: VmInterferenceModel? ): WebComputeMonitor.Result { @@ -202,7 +204,6 @@ class RunnerCli : CliktCommand(name = "runner") { coroutineContext, clock, computeScheduler, - environment.read(), failureModel, interferenceModel.takeIf { operational.performanceInterferenceEnabled } ) @@ -210,6 +211,9 @@ class RunnerCli : CliktCommand(name = "runner") { val metricReader = CoroutineMetricReader(this, simulator.producers, ComputeMetricExporter(clock, monitor), exportInterval = Duration.ofHours(1)) try { + // Instantiate the topology onto the simulator + simulator.apply(topology) + // Run workload trace simulator.run(trace) } finally { simulator.close() @@ -292,56 +296,62 @@ class RunnerCli : CliktCommand(name = "runner") { } /** - * Convert the specified [topology] into an [EnvironmentReader] understood by Capelin. + * Convert the specified [topology] into an [Topology] understood by OpenDC. */ - private fun convert(topology: Topology): EnvironmentReader { - val nodes = mutableListOf() - val random = Random(0) - - val machines = topology.rooms.asSequence() - .flatMap { room -> - room.tiles.flatMap { tile -> - tile.rack?.machines?.map { machine -> tile.rack to machine } ?: emptyList() - } - } - for ((rack, machine) in machines) { - val clusterId = rack.id - val position = machine.position - - val processors = machine.cpus.flatMap { cpu -> - val cores = cpu.numberOfCores - val speed = cpu.clockRateMhz - // TODO Remove hard coding of vendor - val node = ProcessingNode("Intel", "amd64", cpu.name, cores) - List(cores) { coreId -> - ProcessingUnit(node, coreId, speed) - } - } - val memoryUnits = machine.memory.map { memory -> - MemoryUnit( - "Samsung", - memory.name, - memory.speedMbPerS, - memory.sizeMb.toLong() - ) - } + private fun convert(topology: ClientTopology): Topology { + return object : Topology { + + override fun resolve(): List { + val res = mutableListOf() + val random = Random(0) + + val machines = topology.rooms.asSequence() + .flatMap { room -> + room.tiles.flatMap { tile -> + tile.rack?.machines?.map { machine -> tile.rack to machine } ?: emptyList() + } + } + for ((rack, machine) in machines) { + val clusterId = rack.id + val position = machine.position + + val processors = machine.cpus.flatMap { cpu -> + val cores = cpu.numberOfCores + val speed = cpu.clockRateMhz + // TODO Remove hard coding of vendor + val node = ProcessingNode("Intel", "amd64", cpu.name, cores) + List(cores) { coreId -> + ProcessingUnit(node, coreId, speed) + } + } + val memoryUnits = machine.memory.map { memory -> + MemoryUnit( + "Samsung", + memory.name, + memory.speedMbPerS, + memory.sizeMb.toLong() + ) + } - val energyConsumptionW = machine.cpus.sumOf { it.energyConsumptionW } + val energyConsumptionW = machine.cpus.sumOf { it.energyConsumptionW } + val powerModel = LinearPowerModel(2 * energyConsumptionW, energyConsumptionW * 0.5) + val powerDriver = SimplePowerDriver(powerModel) - nodes.add( - MachineDef( - UUID(random.nextLong(), random.nextLong()), - "node-$clusterId-$position", - mapOf("cluster" to clusterId), - MachineModel(processors, memoryUnits), - LinearPowerModel(2 * energyConsumptionW, energyConsumptionW * 0.5) - ) - ) - } + val spec = HostSpec( + UUID(random.nextLong(), random.nextLong()), + "node-$clusterId-$position", + mapOf("cluster" to clusterId), + MachineModel(processors, memoryUnits), + powerDriver + ) + + res += spec + } + + return res + } - return object : EnvironmentReader { - override fun read(): List = nodes - override fun close() {} + override fun toString(): String = "WebRunnerTopologyFactory" } } } -- cgit v1.2.3 From b14df2a0924774c5aed15cedeb1027abf8ee5361 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 16 Sep 2021 16:52:00 +0200 Subject: refactor(capelin): Make workload sampling model extensible This change updates the workload sampling implementation to be more flexible in the way the workload is constructed. Users can now sample multiple workloads at the same time using multiple samplers and use them as a single workload to simulate. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 27 +++++++--------------- 1 file changed, 8 insertions(+), 19 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 48183d71..1b518fee 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -28,15 +28,12 @@ import com.github.ajalt.clikt.parameters.types.file import com.github.ajalt.clikt.parameters.types.long import kotlinx.coroutines.* import mu.KotlinLogging -import org.opendc.compute.workload.ComputeWorkloadRunner -import org.opendc.compute.workload.grid5000 +import org.opendc.compute.workload.* import org.opendc.compute.workload.topology.HostSpec import org.opendc.compute.workload.topology.Topology import org.opendc.compute.workload.topology.apply -import org.opendc.compute.workload.trace.RawParquetTraceReader import org.opendc.compute.workload.util.PerformanceInterferenceReader import org.opendc.experiments.capelin.model.Workload -import org.opendc.experiments.capelin.trace.ParquetTraceReader import org.opendc.experiments.capelin.util.createComputeScheduler import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel import org.opendc.simulator.compute.model.MachineModel @@ -136,13 +133,9 @@ class RunnerCli : CliktCommand(name = "runner") { logger.info { "Constructing performance interference model" } - val traceDir = File( - tracePath, - scenario.trace.traceId - ) - val traceReader = RawParquetTraceReader(traceDir) + val workloadLoader = ComputeWorkloadLoader(tracePath) val interferenceGroups = let { - val path = File(traceDir, "performance-interference-model.json") + val path = tracePath.resolve(scenario.trace.traceId).resolve("performance-interference-model.json") val operational = scenario.operationalPhenomena val enabled = operational.performanceInterferenceEnabled @@ -158,7 +151,7 @@ class RunnerCli : CliktCommand(name = "runner") { logger.info { "Starting repeat $repeat" } withTimeout(runTimeout * 1000) { val interferenceModel = interferenceGroups?.let { VmInterferenceModel(it, Random(repeat.toLong())) } - runRepeat(scenario, repeat, topology, traceReader, interferenceModel) + runRepeat(scenario, repeat, topology, workloadLoader, interferenceModel) } } @@ -174,7 +167,7 @@ class RunnerCli : CliktCommand(name = "runner") { scenario: Scenario, repeat: Int, topology: Topology, - traceReader: RawParquetTraceReader, + workloadLoader: ComputeWorkloadLoader, interferenceModel: VmInterferenceModel? ): WebComputeMonitor.Result { val monitor = WebComputeMonitor() @@ -188,15 +181,11 @@ class RunnerCli : CliktCommand(name = "runner") { val operational = scenario.operationalPhenomena val computeScheduler = createComputeScheduler(operational.schedulerName, seeder) + val workload = Workload(workloadName, trace(workloadName).sampleByLoad(workloadFraction)) - val trace = ParquetTraceReader( - listOf(traceReader), - Workload(workloadName, workloadFraction), - repeat - ) val failureModel = if (operational.failuresEnabled) - grid5000(Duration.ofDays(7), repeat) + grid5000(Duration.ofDays(7)) else null @@ -214,7 +203,7 @@ class RunnerCli : CliktCommand(name = "runner") { // Instantiate the topology onto the simulator simulator.apply(topology) // Run workload trace - simulator.run(trace) + simulator.run(workload.source.resolve(workloadLoader, seeder), seeder.nextLong()) } finally { simulator.close() metricReader.close() -- cgit v1.2.3 From 68ef3700ed2f69bcf0118bb69eda71e6b1f4d54f Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 21 Sep 2021 11:34:34 +0200 Subject: feat(trace): Add support for writing traces This change adds a new API for writing traces in a trace format. Currently, writing is only supported by the OpenDC VM format, but over time the other formats will also have support for writing added. --- .../opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 1b518fee..40a7ea62 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -33,8 +33,6 @@ import org.opendc.compute.workload.topology.HostSpec import org.opendc.compute.workload.topology.Topology import org.opendc.compute.workload.topology.apply import org.opendc.compute.workload.util.PerformanceInterferenceReader -import org.opendc.experiments.capelin.model.Workload -import org.opendc.experiments.capelin.util.createComputeScheduler import org.opendc.simulator.compute.kernel.interference.VmInterferenceModel import org.opendc.simulator.compute.model.MachineModel import org.opendc.simulator.compute.model.MemoryUnit @@ -181,7 +179,7 @@ class RunnerCli : CliktCommand(name = "runner") { val operational = scenario.operationalPhenomena val computeScheduler = createComputeScheduler(operational.schedulerName, seeder) - val workload = Workload(workloadName, trace(workloadName).sampleByLoad(workloadFraction)) + val workload = trace(workloadName).sampleByLoad(workloadFraction) val failureModel = if (operational.failuresEnabled) @@ -203,7 +201,7 @@ class RunnerCli : CliktCommand(name = "runner") { // Instantiate the topology onto the simulator simulator.apply(topology) // Run workload trace - simulator.run(workload.source.resolve(workloadLoader, seeder), seeder.nextLong()) + simulator.run(workload.resolve(workloadLoader, seeder), seeder.nextLong()) } finally { simulator.close() metricReader.close() -- cgit v1.2.3 From 30cd010f1f98262aa7f264bb3c3eb6028b8495c5 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 22 Sep 2021 12:43:01 +0200 Subject: refactor(telemetry): Do not require clock for ComputeMetricExporter This change drops the requirement for a clock parameter when constructing a ComputeMetricExporter, since it will now derive the timestamp from the recorded metrics. --- .../src/main/kotlin/org/opendc/web/runner/Main.kt | 13 +- .../org/opendc/web/runner/ScenarioManager.kt | 2 +- .../opendc/web/runner/WebComputeMetricExporter.kt | 137 +++++++++++++++++++++ .../org/opendc/web/runner/WebComputeMonitor.kt | 136 -------------------- 4 files changed, 144 insertions(+), 144 deletions(-) create mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMetricExporter.kt delete mode 100644 opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 40a7ea62..96b300d7 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -41,7 +41,6 @@ import org.opendc.simulator.compute.model.ProcessingUnit import org.opendc.simulator.compute.power.LinearPowerModel import org.opendc.simulator.compute.power.SimplePowerDriver import org.opendc.simulator.core.runBlockingSimulation -import org.opendc.telemetry.compute.ComputeMetricExporter import org.opendc.telemetry.compute.collectServiceMetrics import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader import org.opendc.web.client.ApiClient @@ -126,7 +125,7 @@ class RunnerCli : CliktCommand(name = "runner") { /** * Run a single scenario. */ - private suspend fun runScenario(portfolio: ClientPortfolio, scenario: Scenario, topology: Topology): List { + private suspend fun runScenario(portfolio: ClientPortfolio, scenario: Scenario, topology: Topology): List { val id = scenario.id logger.info { "Constructing performance interference model" } @@ -167,8 +166,8 @@ class RunnerCli : CliktCommand(name = "runner") { topology: Topology, workloadLoader: ComputeWorkloadLoader, interferenceModel: VmInterferenceModel? - ): WebComputeMonitor.Result { - val monitor = WebComputeMonitor() + ): WebComputeMetricExporter.Result { + val exporter = WebComputeMetricExporter() try { runBlockingSimulation { @@ -195,7 +194,7 @@ class RunnerCli : CliktCommand(name = "runner") { interferenceModel.takeIf { operational.performanceInterferenceEnabled } ) - val metricReader = CoroutineMetricReader(this, simulator.producers, ComputeMetricExporter(clock, monitor), exportInterval = Duration.ofHours(1)) + val metricReader = CoroutineMetricReader(this, simulator.producers, exporter, exportInterval = Duration.ofHours(1)) try { // Instantiate the topology onto the simulator @@ -207,7 +206,7 @@ class RunnerCli : CliktCommand(name = "runner") { metricReader.close() } - val serviceMetrics = collectServiceMetrics(clock.instant(), simulator.producers[0]) + val serviceMetrics = collectServiceMetrics(simulator.producers[0]) logger.debug { "Scheduler " + "Success=${serviceMetrics.attemptsSuccess} " + @@ -221,7 +220,7 @@ class RunnerCli : CliktCommand(name = "runner") { logger.warn(cause) { "Experiment failed" } } - return monitor.getResult() + return exporter.getResult() } private val POLL_INTERVAL = 30000L // ms = 30 s diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt index a0c281e8..1ee835a6 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/ScenarioManager.kt @@ -61,7 +61,7 @@ public class ScenarioManager(private val client: ApiClient) { /** * Persist the specified results. */ - public suspend fun finish(id: String, results: List) { + public suspend fun finish(id: String, results: List) { client.updateJob( id, SimulationState.FINISHED, mapOf( diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMetricExporter.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMetricExporter.kt new file mode 100644 index 00000000..7913660d --- /dev/null +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMetricExporter.kt @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.web.runner + +import org.opendc.telemetry.compute.ComputeMetricExporter +import org.opendc.telemetry.compute.ComputeMonitor +import org.opendc.telemetry.compute.table.HostData +import org.opendc.telemetry.compute.table.ServiceData +import kotlin.math.max +import kotlin.math.roundToLong + +/** + * A [ComputeMonitor] that tracks the aggregate metrics for each repeat. + */ +class WebComputeMetricExporter : ComputeMetricExporter() { + override fun record(data: HostData) { + val slices = data.downtime / SLICE_LENGTH + + hostAggregateMetrics = AggregateHostMetrics( + hostAggregateMetrics.totalActiveTime + data.cpuActiveTime, + hostAggregateMetrics.totalIdleTime + data.cpuIdleTime, + hostAggregateMetrics.totalStealTime + data.cpuStealTime, + hostAggregateMetrics.totalLostTime + data.cpuLostTime, + hostAggregateMetrics.totalPowerDraw + data.powerTotal, + hostAggregateMetrics.totalFailureSlices + slices, + hostAggregateMetrics.totalFailureVmSlices + data.guestsRunning * slices + ) + + hostMetrics.compute(data.host.id) { _, prev -> + HostMetrics( + data.cpuUsage + (prev?.cpuUsage ?: 0.0), + data.cpuDemand + (prev?.cpuDemand ?: 0.0), + data.guestsRunning + (prev?.instanceCount ?: 0), + 1 + (prev?.count ?: 0) + ) + } + } + + private var hostAggregateMetrics: AggregateHostMetrics = AggregateHostMetrics() + private val hostMetrics: MutableMap = mutableMapOf() + private val SLICE_LENGTH: Long = 5 * 60L + + data class AggregateHostMetrics( + val totalActiveTime: Long = 0L, + val totalIdleTime: Long = 0L, + val totalStealTime: Long = 0L, + val totalLostTime: Long = 0L, + val totalPowerDraw: Double = 0.0, + val totalFailureSlices: Double = 0.0, + val totalFailureVmSlices: Double = 0.0, + ) + + data class HostMetrics( + val cpuUsage: Double, + val cpuDemand: Double, + val instanceCount: Long, + val count: Long + ) + + private var serviceMetrics: AggregateServiceMetrics = AggregateServiceMetrics() + + override fun record(data: ServiceData) { + serviceMetrics = AggregateServiceMetrics( + max(data.attemptsSuccess, serviceMetrics.vmTotalCount), + max(data.serversPending, serviceMetrics.vmWaitingCount), + max(data.serversActive, serviceMetrics.vmActiveCount), + max(0, serviceMetrics.vmInactiveCount), + max(data.attemptsFailure, serviceMetrics.vmFailedCount), + ) + } + + data class AggregateServiceMetrics( + val vmTotalCount: Int = 0, + val vmWaitingCount: Int = 0, + val vmActiveCount: Int = 0, + val vmInactiveCount: Int = 0, + val vmFailedCount: Int = 0 + ) + + fun getResult(): Result { + return Result( + hostAggregateMetrics.totalActiveTime, + hostAggregateMetrics.totalIdleTime, + hostAggregateMetrics.totalStealTime, + hostAggregateMetrics.totalLostTime, + hostMetrics.map { it.value.cpuUsage / it.value.count }.average(), + hostMetrics.map { it.value.cpuDemand / it.value.count }.average(), + hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.average(), + hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.maxOrNull() ?: 0.0, + hostAggregateMetrics.totalPowerDraw, + hostAggregateMetrics.totalFailureSlices.roundToLong(), + hostAggregateMetrics.totalFailureVmSlices.roundToLong(), + serviceMetrics.vmTotalCount, + serviceMetrics.vmWaitingCount, + serviceMetrics.vmInactiveCount, + serviceMetrics.vmFailedCount, + ) + } + + data class Result( + val totalActiveTime: Long, + val totalIdleTime: Long, + val totalStealTime: Long, + val totalLostTime: Long, + val meanCpuUsage: Double, + val meanCpuDemand: Double, + val meanNumDeployedImages: Double, + val maxNumDeployedImages: Double, + val totalPowerDraw: Double, + val totalFailureSlices: Long, + val totalFailureVmSlices: Long, + val totalVmsSubmitted: Int, + val totalVmsQueued: Int, + val totalVmsFinished: Int, + val totalVmsFailed: Int + ) +} diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt deleted file mode 100644 index bb412738..00000000 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/WebComputeMonitor.kt +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2021 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.web.runner - -import org.opendc.telemetry.compute.ComputeMonitor -import org.opendc.telemetry.compute.table.HostData -import org.opendc.telemetry.compute.table.ServiceData -import kotlin.math.max -import kotlin.math.roundToLong - -/** - * A [ComputeMonitor] that tracks the aggregate metrics for each repeat. - */ -class WebComputeMonitor : ComputeMonitor { - override fun record(data: HostData) { - val slices = data.downtime / SLICE_LENGTH - - hostAggregateMetrics = AggregateHostMetrics( - hostAggregateMetrics.totalActiveTime + data.cpuActiveTime, - hostAggregateMetrics.totalIdleTime + data.cpuIdleTime, - hostAggregateMetrics.totalStealTime + data.cpuStealTime, - hostAggregateMetrics.totalLostTime + data.cpuLostTime, - hostAggregateMetrics.totalPowerDraw + data.powerTotal, - hostAggregateMetrics.totalFailureSlices + slices, - hostAggregateMetrics.totalFailureVmSlices + data.guestsRunning * slices - ) - - hostMetrics.compute(data.host.id) { _, prev -> - HostMetrics( - data.cpuUsage + (prev?.cpuUsage ?: 0.0), - data.cpuDemand + (prev?.cpuDemand ?: 0.0), - data.guestsRunning + (prev?.instanceCount ?: 0), - 1 + (prev?.count ?: 0) - ) - } - } - - private var hostAggregateMetrics: AggregateHostMetrics = AggregateHostMetrics() - private val hostMetrics: MutableMap = mutableMapOf() - private val SLICE_LENGTH: Long = 5 * 60 - - data class AggregateHostMetrics( - val totalActiveTime: Long = 0L, - val totalIdleTime: Long = 0L, - val totalStealTime: Long = 0L, - val totalLostTime: Long = 0L, - val totalPowerDraw: Double = 0.0, - val totalFailureSlices: Double = 0.0, - val totalFailureVmSlices: Double = 0.0, - ) - - data class HostMetrics( - val cpuUsage: Double, - val cpuDemand: Double, - val instanceCount: Long, - val count: Long - ) - - private var serviceMetrics: AggregateServiceMetrics = AggregateServiceMetrics() - - override fun record(data: ServiceData) { - serviceMetrics = AggregateServiceMetrics( - max(data.attemptsSuccess, serviceMetrics.vmTotalCount), - max(data.serversPending, serviceMetrics.vmWaitingCount), - max(data.serversActive, serviceMetrics.vmActiveCount), - max(0, serviceMetrics.vmInactiveCount), - max(data.attemptsFailure, serviceMetrics.vmFailedCount), - ) - } - - data class AggregateServiceMetrics( - val vmTotalCount: Int = 0, - val vmWaitingCount: Int = 0, - val vmActiveCount: Int = 0, - val vmInactiveCount: Int = 0, - val vmFailedCount: Int = 0 - ) - - fun getResult(): Result { - return Result( - hostAggregateMetrics.totalActiveTime, - hostAggregateMetrics.totalIdleTime, - hostAggregateMetrics.totalStealTime, - hostAggregateMetrics.totalLostTime, - hostMetrics.map { it.value.cpuUsage / it.value.count }.average(), - hostMetrics.map { it.value.cpuDemand / it.value.count }.average(), - hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.average(), - hostMetrics.map { it.value.instanceCount.toDouble() / it.value.count }.maxOrNull() ?: 0.0, - hostAggregateMetrics.totalPowerDraw, - hostAggregateMetrics.totalFailureSlices.roundToLong(), - hostAggregateMetrics.totalFailureVmSlices.roundToLong(), - serviceMetrics.vmTotalCount, - serviceMetrics.vmWaitingCount, - serviceMetrics.vmInactiveCount, - serviceMetrics.vmFailedCount, - ) - } - - data class Result( - val totalActiveTime: Long, - val totalIdleTime: Long, - val totalStealTime: Long, - val totalLostTime: Long, - val meanCpuUsage: Double, - val meanCpuDemand: Double, - val meanNumDeployedImages: Double, - val maxNumDeployedImages: Double, - val totalPowerDraw: Double, - val totalFailureSlices: Long, - val totalFailureVmSlices: Long, - val totalVmsSubmitted: Int, - val totalVmsQueued: Int, - val totalVmsFinished: Int, - val totalVmsFailed: Int - ) -} -- cgit v1.2.3 From 4cc1d40d421c8736f8b21b360b61d6b065158b7a Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 29 Sep 2021 23:56:16 +0200 Subject: refactor(simulator): Migrate to flow-based simulation This change renames the `opendc-simulator-resources` module into the `opendc-simulator-flow` module to indicate that the core simulation model of OpenDC is based around modelling and simulating flows. Previously, the distinction between resource consumer and provider, and input and output caused some confusion. By switching to a flow-based model, this distinction is now clear (as in, the water flows from source to consumer/sink). --- .../opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt index 96b300d7..59308e11 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/Main.kt @@ -123,7 +123,7 @@ class RunnerCli : CliktCommand(name = "runner") { .default(60L * 3) // Experiment may run for a maximum of three minutes /** - * Run a single scenario. + * Converge a single scenario. */ private suspend fun runScenario(portfolio: ClientPortfolio, scenario: Scenario, topology: Topology): List { val id = scenario.id @@ -158,7 +158,7 @@ class RunnerCli : CliktCommand(name = "runner") { } /** - * Run a single repeat. + * Converge a single repeat. */ private suspend fun runRepeat( scenario: Scenario, @@ -199,7 +199,7 @@ class RunnerCli : CliktCommand(name = "runner") { try { // Instantiate the topology onto the simulator simulator.apply(topology) - // Run workload trace + // Converge workload trace simulator.run(workload.resolve(workloadLoader, seeder), seeder.nextLong()) } finally { simulator.close() -- cgit v1.2.3