From c7eec7904e08029b3ab31d3e7b21afa1ea9ab7e6 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Wed, 4 May 2022 16:24:53 +0200 Subject: refactor(compute/service): Remove OpenTelemetry from "compute" modules This change removes the OpenTelemetry integration from the OpenDC Compute modules. Previously, we chose to integrate OpenTelemetry to provide a unified way to report metrics to the users. Although this worked as expected, the overhead of the OpenTelemetry when collecting metrics during simulation was considerable and lacked more optimization opportunities (other than providing a separate API implementation). Furthermore, since we were tied to OpenTelemetry's SDK implementation, we experienced issues with throttling and registering multiple instruments. We will instead use another approach, where we expose the core metrics in OpenDC via specialized interfaces (see the commits before) such that access is fast and can be done without having to interface with OpenTelemetry. In addition, we will provide an adapter to that is able to forward these metrics to OpenTelemetry implementations, so we can still integrate with the wider ecosystem. --- .../kotlin/org/opendc/workflow/service/WorkflowServiceTest.kt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'opendc-workflow') diff --git a/opendc-workflow/opendc-workflow-service/src/test/kotlin/org/opendc/workflow/service/WorkflowServiceTest.kt b/opendc-workflow/opendc-workflow-service/src/test/kotlin/org/opendc/workflow/service/WorkflowServiceTest.kt index d5f06587..73d1b23b 100644 --- a/opendc-workflow/opendc-workflow-service/src/test/kotlin/org/opendc/workflow/service/WorkflowServiceTest.kt +++ b/opendc-workflow/opendc-workflow-service/src/test/kotlin/org/opendc/workflow/service/WorkflowServiceTest.kt @@ -32,7 +32,6 @@ import org.opendc.compute.service.scheduler.filters.RamFilter import org.opendc.compute.service.scheduler.filters.VCpuFilter import org.opendc.compute.service.scheduler.weights.VCpuWeigher import org.opendc.compute.workload.ComputeServiceHelper -import org.opendc.compute.workload.telemetry.NoopTelemetryManager import org.opendc.compute.workload.topology.HostSpec import org.opendc.simulator.compute.kernel.SimSpaceSharedHypervisorProvider import org.opendc.simulator.compute.model.MachineModel @@ -70,7 +69,12 @@ internal class WorkflowServiceTest { weighers = listOf(VCpuWeigher(1.0, multiplier = 1.0)) ) - val computeHelper = ComputeServiceHelper(coroutineContext, clock, NoopTelemetryManager(), computeScheduler, schedulingQuantum = Duration.ofSeconds(1)) + val computeHelper = ComputeServiceHelper( + coroutineContext, + clock, + computeScheduler, + schedulingQuantum = Duration.ofSeconds(1) + ) val hostCount = 4 repeat(hostCount) { computeHelper.registerHost(createHostSpec(it)) } -- cgit v1.2.3 From b82ae73d064590094f79e26de355060135ed13fd Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 5 May 2022 11:33:47 +0200 Subject: refactor(workflow/service): Remove OpenTelemetry from "workflow" modules This change removes the OpenTelemetry integration from the OpenDC Workflow modules. Previously, we chose to integrate OpenTelemetry to provide a unified way to report metrics to the users. See the previous commit removing it from the "Compute" modules for the reasoning behind this change. --- .../opendc-workflow-service/build.gradle.kts | 2 - .../org/opendc/workflow/service/WorkflowService.kt | 7 +- .../service/internal/WorkflowServiceImpl.kt | 67 +---------------- .../service/scheduler/telemetry/SchedulerStats.kt | 42 +++++++++++ .../workflow/service/telemetry/SchedulerStats.kt | 42 ----------- .../opendc-workflow-workload/build.gradle.kts | 2 - .../workflow/workload/WorkflowServiceHelper.kt | 86 +++------------------- 7 files changed, 56 insertions(+), 192 deletions(-) create mode 100644 opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/scheduler/telemetry/SchedulerStats.kt delete mode 100644 opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/telemetry/SchedulerStats.kt (limited to 'opendc-workflow') diff --git a/opendc-workflow/opendc-workflow-service/build.gradle.kts b/opendc-workflow/opendc-workflow-service/build.gradle.kts index 60b5eb13..b6365885 100644 --- a/opendc-workflow/opendc-workflow-service/build.gradle.kts +++ b/opendc-workflow/opendc-workflow-service/build.gradle.kts @@ -30,7 +30,6 @@ plugins { dependencies { api(projects.opendcWorkflow.opendcWorkflowApi) api(projects.opendcCompute.opendcComputeApi) - api(projects.opendcTelemetry.opendcTelemetryApi) implementation(projects.opendcCommon) implementation(libs.kotlin.logging) @@ -38,7 +37,6 @@ dependencies { testImplementation(projects.opendcCompute.opendcComputeWorkload) testImplementation(projects.opendcSimulator.opendcSimulatorCore) testImplementation(projects.opendcTrace.opendcTraceApi) - testImplementation(projects.opendcTelemetry.opendcTelemetrySdk) testRuntimeOnly(projects.opendcTrace.opendcTraceGwf) testRuntimeOnly(libs.log4j.slf4j) } diff --git a/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/WorkflowService.kt b/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/WorkflowService.kt index b8bc0e33..2436c387 100644 --- a/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/WorkflowService.kt +++ b/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/WorkflowService.kt @@ -22,7 +22,6 @@ package org.opendc.workflow.service -import io.opentelemetry.api.metrics.MeterProvider import org.opendc.compute.api.ComputeClient import org.opendc.workflow.api.Job import org.opendc.workflow.service.internal.WorkflowServiceImpl @@ -30,7 +29,7 @@ import org.opendc.workflow.service.scheduler.job.JobAdmissionPolicy import org.opendc.workflow.service.scheduler.job.JobOrderPolicy import org.opendc.workflow.service.scheduler.task.TaskEligibilityPolicy import org.opendc.workflow.service.scheduler.task.TaskOrderPolicy -import org.opendc.workflow.service.telemetry.SchedulerStats +import org.opendc.workflow.service.scheduler.telemetry.SchedulerStats import java.time.Clock import java.time.Duration import kotlin.coroutines.CoroutineContext @@ -63,7 +62,7 @@ public interface WorkflowService : AutoCloseable { * @param context The [CoroutineContext] to use in the service. * @param clock The clock instance to use. * @param meterProvider The meter provider to use. - * @param compute The compute client to use. + * @param compute The "Compute" client to use. * @param schedulingQuantum The scheduling quantum to use (minimum duration between scheduling cycles). * @param jobAdmissionPolicy The job admission policy to use. * @param jobOrderPolicy The job order policy to use. @@ -73,7 +72,6 @@ public interface WorkflowService : AutoCloseable { public operator fun invoke( context: CoroutineContext, clock: Clock, - meterProvider: MeterProvider, compute: ComputeClient, schedulingQuantum: Duration, jobAdmissionPolicy: JobAdmissionPolicy, @@ -84,7 +82,6 @@ public interface WorkflowService : AutoCloseable { return WorkflowServiceImpl( context, clock, - meterProvider, compute, schedulingQuantum, jobAdmissionPolicy, diff --git a/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/internal/WorkflowServiceImpl.kt b/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/internal/WorkflowServiceImpl.kt index 9c7f18a2..899810a2 100644 --- a/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/internal/WorkflowServiceImpl.kt +++ b/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/internal/WorkflowServiceImpl.kt @@ -22,8 +22,6 @@ package org.opendc.workflow.service.internal -import io.opentelemetry.api.metrics.Meter -import io.opentelemetry.api.metrics.MeterProvider import kotlinx.coroutines.* import org.opendc.common.util.Pacer import org.opendc.compute.api.* @@ -34,7 +32,7 @@ import org.opendc.workflow.service.scheduler.job.JobAdmissionPolicy import org.opendc.workflow.service.scheduler.job.JobOrderPolicy import org.opendc.workflow.service.scheduler.task.TaskEligibilityPolicy import org.opendc.workflow.service.scheduler.task.TaskOrderPolicy -import org.opendc.workflow.service.telemetry.SchedulerStats +import org.opendc.workflow.service.scheduler.telemetry.SchedulerStats import java.time.Clock import java.time.Duration import java.util.* @@ -48,9 +46,8 @@ import kotlin.coroutines.resume public class WorkflowServiceImpl( context: CoroutineContext, private val clock: Clock, - meterProvider: MeterProvider, private val computeClient: ComputeClient, - private val schedulingQuantum: Duration, + schedulingQuantum: Duration, jobAdmissionPolicy: JobAdmissionPolicy, jobOrderPolicy: JobOrderPolicy, taskEligibilityPolicy: TaskEligibilityPolicy, @@ -61,11 +58,6 @@ public class WorkflowServiceImpl( */ private val scope = CoroutineScope(context + Job()) - /** - * The [Meter] to collect metrics of this service. - */ - private val meter = meterProvider.get("org.opendc.workflow.service") - /** * The incoming jobs ready to be processed by the scheduler. */ @@ -139,58 +131,11 @@ public class WorkflowServiceImpl( } } - /** - * The number of jobs that have been submitted to the service. - */ - private val submittedJobs = meter.counterBuilder("jobs.submitted") - .setDescription("Number of submitted jobs") - .setUnit("1") - .build() private var _workflowsSubmitted: Int = 0 - - /** - * The number of jobs that are running. - */ - private val runningJobs = meter.upDownCounterBuilder("jobs.active") - .setDescription("Number of jobs running") - .setUnit("1") - .build() private var _workflowsRunning: Int = 0 - - /** - * The number of jobs that have finished running. - */ - private val finishedJobs = meter.counterBuilder("jobs.finished") - .setDescription("Number of jobs that finished running") - .setUnit("1") - .build() private var _workflowsFinished: Int = 0 - - /** - * The number of tasks that have been submitted to the service. - */ - private val submittedTasks = meter.counterBuilder("tasks.submitted") - .setDescription("Number of submitted tasks") - .setUnit("1") - .build() private var _tasksSubmitted: Int = 0 - - /** - * The number of jobs that are running. - */ - private val runningTasks = meter.upDownCounterBuilder("tasks.active") - .setDescription("Number of tasks running") - .setUnit("1") - .build() private var _tasksRunning: Int = 0 - - /** - * The number of jobs that have finished running. - */ - private val finishedTasks = meter.counterBuilder("tasks.finished") - .setDescription("Number of tasks that finished running") - .setUnit("1") - .build() private var _tasksFinished: Int = 0 /** @@ -229,14 +174,12 @@ public class WorkflowServiceImpl( instance.state = TaskStatus.READY } - submittedTasks.add(1) _tasksSubmitted++ } instances.values.toCollection(jobInstance.tasks) incomingJobs += jobInstance rootListener.jobSubmitted(jobInstance) - submittedJobs.add(1) _workflowsSubmitted++ pacer.enqueue() @@ -283,7 +226,6 @@ public class WorkflowServiceImpl( jobQueue.add(jobInstance) activeJobs += jobInstance - runningJobs.add(1) _workflowsRunning++ rootListener.jobStarted(jobInstance) } @@ -363,7 +305,6 @@ public class WorkflowServiceImpl( ServerState.RUNNING -> { val task = taskByServer.getValue(server) task.startedAt = clock.millis() - runningTasks.add(1) _tasksRunning++ rootListener.taskStarted(task) } @@ -381,8 +322,6 @@ public class WorkflowServiceImpl( job.tasks.remove(task) activeTasks -= task - runningTasks.add(-1) - finishedTasks.add(1) _tasksRunning-- _tasksFinished++ rootListener.taskFinished(task) @@ -410,8 +349,6 @@ public class WorkflowServiceImpl( private fun finishJob(job: JobState) { activeJobs -= job - runningJobs.add(-1) - finishedJobs.add(1) _workflowsRunning-- _workflowsFinished++ rootListener.jobFinished(job) diff --git a/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/scheduler/telemetry/SchedulerStats.kt b/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/scheduler/telemetry/SchedulerStats.kt new file mode 100644 index 00000000..608e82df --- /dev/null +++ b/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/scheduler/telemetry/SchedulerStats.kt @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.workflow.service.scheduler.telemetry + +/** + * Statistics about the workflow scheduler. + * + * @property workflowsSubmitted The number of workflows submitted to the scheduler. + * @property workflowsRunning The number of workflows that are currently running. + * @property workflowsFinished The number of workflows that have completed since the scheduler started. + * @property tasksSubmitted The number of tasks submitted to the scheduler. + * @property tasksRunning The number of tasks that are currently running. + * @property tasksFinished The number of tasks that have completed. + */ +public data class SchedulerStats( + val workflowsSubmitted: Int, + val workflowsRunning: Int, + val workflowsFinished: Int, + val tasksSubmitted: Int, + val tasksRunning: Int, + val tasksFinished: Int +) diff --git a/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/telemetry/SchedulerStats.kt b/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/telemetry/SchedulerStats.kt deleted file mode 100644 index 7c7d7c4d..00000000 --- a/opendc-workflow/opendc-workflow-service/src/main/kotlin/org/opendc/workflow/service/telemetry/SchedulerStats.kt +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2022 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.workflow.service.telemetry - -/** - * Statistics about the workflow scheduler. - * - * @property workflowsSubmitted The number of workflows submitted to the scheduler. - * @property workflowsRunning The number of workflows that are currently running. - * @property workflowsFinished The number of workflows that have completed since the scheduler started. - * @property tasksSubmitted The number of tasks submitted to the scheduler. - * @property tasksRunning The number of tasks that are currently running. - * @property tasksFinished The number of tasks that have completed. - */ -public data class SchedulerStats( - val workflowsSubmitted: Int, - val workflowsRunning: Int, - val workflowsFinished: Int, - val tasksSubmitted: Int, - val tasksRunning: Int, - val tasksFinished: Int -) diff --git a/opendc-workflow/opendc-workflow-workload/build.gradle.kts b/opendc-workflow/opendc-workflow-workload/build.gradle.kts index b725a69c..17eadf29 100644 --- a/opendc-workflow/opendc-workflow-workload/build.gradle.kts +++ b/opendc-workflow/opendc-workflow-workload/build.gradle.kts @@ -32,6 +32,4 @@ dependencies { implementation(projects.opendcSimulator.opendcSimulatorCompute) implementation(projects.opendcTrace.opendcTraceApi) - implementation(projects.opendcTelemetry.opendcTelemetrySdk) - implementation(libs.opentelemetry.semconv) } diff --git a/opendc-workflow/opendc-workflow-workload/src/main/kotlin/org/opendc/workflow/workload/WorkflowServiceHelper.kt b/opendc-workflow/opendc-workflow-workload/src/main/kotlin/org/opendc/workflow/workload/WorkflowServiceHelper.kt index a7d0ed6c..435d0190 100644 --- a/opendc-workflow/opendc-workflow-workload/src/main/kotlin/org/opendc/workflow/workload/WorkflowServiceHelper.kt +++ b/opendc-workflow/opendc-workflow-workload/src/main/kotlin/org/opendc/workflow/workload/WorkflowServiceHelper.kt @@ -22,24 +22,13 @@ package org.opendc.workflow.workload -import io.opentelemetry.api.metrics.MeterProvider -import io.opentelemetry.sdk.common.CompletableResultCode -import io.opentelemetry.sdk.metrics.SdkMeterProvider -import io.opentelemetry.sdk.metrics.data.AggregationTemporality -import io.opentelemetry.sdk.metrics.export.MetricProducer -import io.opentelemetry.sdk.metrics.export.MetricReader -import io.opentelemetry.sdk.metrics.export.MetricReaderFactory -import io.opentelemetry.sdk.resources.Resource -import io.opentelemetry.semconv.resource.attributes.ResourceAttributes import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.delay import kotlinx.coroutines.launch import org.opendc.compute.api.ComputeClient -import org.opendc.telemetry.sdk.toOtelClock import org.opendc.workflow.api.Job import org.opendc.workflow.service.WorkflowService import java.time.Clock -import java.util.* import kotlin.coroutines.CoroutineContext /** @@ -59,60 +48,16 @@ public class WorkflowServiceHelper( /** * The [WorkflowService] that is constructed by this runner. */ - public val service: WorkflowService - - /** - * The [MetricProducer] exposed by the [WorkflowService]. - */ - public lateinit var metricProducer: MetricProducer - private set - - /** - * The [MeterProvider] used for the service. - */ - private val _meterProvider: SdkMeterProvider - - /** - * The list of [MetricReader]s that have been registered with the runner. - */ - private val _metricReaders = mutableListOf() - - init { - val resource = Resource.builder() - .put(ResourceAttributes.SERVICE_NAME, "opendc-workflow") - .build() - - _meterProvider = SdkMeterProvider.builder() - .setClock(clock.toOtelClock()) - .setResource(resource) - .registerMetricReader { producer -> - metricProducer = producer - - val metricReaders = _metricReaders - object : MetricReader { - override fun getPreferredTemporality(): AggregationTemporality = AggregationTemporality.CUMULATIVE - override fun flush(): CompletableResultCode { - return CompletableResultCode.ofAll(metricReaders.map { it.flush() }) - } - override fun shutdown(): CompletableResultCode { - return CompletableResultCode.ofAll(metricReaders.map { it.shutdown() }) - } - } - } - .build() - - service = WorkflowService( - context, - clock, - _meterProvider, - computeClient, - schedulerSpec.schedulingQuantum, - jobAdmissionPolicy = schedulerSpec.jobAdmissionPolicy, - jobOrderPolicy = schedulerSpec.jobOrderPolicy, - taskEligibilityPolicy = schedulerSpec.taskEligibilityPolicy, - taskOrderPolicy = schedulerSpec.taskOrderPolicy, - ) - } + public val service: WorkflowService = WorkflowService( + context, + clock, + computeClient, + schedulerSpec.schedulingQuantum, + jobAdmissionPolicy = schedulerSpec.jobAdmissionPolicy, + jobOrderPolicy = schedulerSpec.jobOrderPolicy, + taskEligibilityPolicy = schedulerSpec.taskEligibilityPolicy, + taskOrderPolicy = schedulerSpec.taskOrderPolicy, + ) /** * Run the specified list of [jobs] using the workflow service and suspend execution until all jobs have @@ -146,19 +91,8 @@ public class WorkflowServiceHelper( } } - /** - * Register a [MetricReader] for this helper. - * - * @param factory The factory for the reader to register. - */ - public fun registerMetricReader(factory: MetricReaderFactory) { - val reader = factory.apply(metricProducer) - _metricReaders.add(reader) - } - override fun close() { computeClient.close() service.close() - _meterProvider.close() } } -- cgit v1.2.3