From 8d899e29dbd757f6df320212d6e0d77ce8216ab9 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 14 Sep 2021 15:38:38 +0200 Subject: refactor(telemetry): Standardize compute scheduler metrics This change updates the OpenDC compute service implementation with multiple meters that follow the OpenTelemetry conventions. --- .../kotlin/org/opendc/telemetry/compute/Helpers.kt | 59 ++++++++++++++-------- .../opendc/telemetry/compute/table/ServiceData.kt | 14 ++--- 2 files changed, 45 insertions(+), 28 deletions(-) (limited to 'opendc-telemetry') diff --git a/opendc-telemetry/opendc-telemetry-compute/src/main/kotlin/org/opendc/telemetry/compute/Helpers.kt b/opendc-telemetry/opendc-telemetry-compute/src/main/kotlin/org/opendc/telemetry/compute/Helpers.kt index 1f309f1b..f3690ee8 100644 --- a/opendc-telemetry/opendc-telemetry-compute/src/main/kotlin/org/opendc/telemetry/compute/Helpers.kt +++ b/opendc-telemetry/opendc-telemetry-compute/src/main/kotlin/org/opendc/telemetry/compute/Helpers.kt @@ -22,6 +22,7 @@ package org.opendc.telemetry.compute +import io.opentelemetry.api.common.AttributeKey import io.opentelemetry.sdk.metrics.data.MetricData import io.opentelemetry.sdk.metrics.export.MetricProducer import org.opendc.telemetry.compute.table.ServiceData @@ -37,32 +38,48 @@ public fun collectServiceMetrics(timestamp: Long, metricProducer: MetricProducer * Extract a [ServiceData] object from the specified list of metric data. */ public fun extractServiceMetrics(timestamp: Long, metrics: Collection): ServiceData { - var submittedVms = 0 - var queuedVms = 0 - var unscheduledVms = 0 - var runningVms = 0 - var finishedVms = 0 - var hosts = 0 - var availableHosts = 0 + val resultKey = AttributeKey.stringKey("result") + val stateKey = AttributeKey.stringKey("state") - for (metric in metrics) { - val points = metric.longSumData.points + var hostsUp = 0 + var hostsDown = 0 - if (points.isEmpty()) { - continue - } + var serversPending = 0 + var serversActive = 0 - val value = points.first().value.toInt() + var attemptsSuccess = 0 + var attemptsFailure = 0 + var attemptsError = 0 + + for (metric in metrics) { when (metric.name) { - "servers.submitted" -> submittedVms = value - "servers.waiting" -> queuedVms = value - "servers.unscheduled" -> unscheduledVms = value - "servers.active" -> runningVms = value - "servers.finished" -> finishedVms = value - "hosts.total" -> hosts = value - "hosts.available" -> availableHosts = value + "scheduler.hosts" -> { + for (point in metric.longSumData.points) { + when (point.attributes[stateKey]) { + "up" -> hostsUp = point.value.toInt() + "down" -> hostsDown = point.value.toInt() + } + } + } + "scheduler.servers" -> { + for (point in metric.longSumData.points) { + when (point.attributes[stateKey]) { + "pending" -> serversPending = point.value.toInt() + "active" -> serversActive = point.value.toInt() + } + } + } + "scheduler.attempts" -> { + for (point in metric.longSumData.points) { + when (point.attributes[resultKey]) { + "success" -> attemptsSuccess = point.value.toInt() + "failure" -> attemptsFailure = point.value.toInt() + "error" -> attemptsError = point.value.toInt() + } + } + } } } - return ServiceData(timestamp, hosts, availableHosts, submittedVms, runningVms, finishedVms, queuedVms, unscheduledVms) + return ServiceData(timestamp, hostsUp, hostsDown, serversPending, serversActive, attemptsSuccess, attemptsFailure, attemptsError) } diff --git a/opendc-telemetry/opendc-telemetry-compute/src/main/kotlin/org/opendc/telemetry/compute/table/ServiceData.kt b/opendc-telemetry/opendc-telemetry-compute/src/main/kotlin/org/opendc/telemetry/compute/table/ServiceData.kt index f6ff5db5..da2ebdf4 100644 --- a/opendc-telemetry/opendc-telemetry-compute/src/main/kotlin/org/opendc/telemetry/compute/table/ServiceData.kt +++ b/opendc-telemetry/opendc-telemetry-compute/src/main/kotlin/org/opendc/telemetry/compute/table/ServiceData.kt @@ -27,11 +27,11 @@ package org.opendc.telemetry.compute.table */ public data class ServiceData( public val timestamp: Long, - public val hostCount: Int, - public val activeHostCount: Int, - public val instanceCount: Int, - public val runningInstanceCount: Int, - public val finishedInstanceCount: Int, - public val queuedInstanceCount: Int, - public val failedInstanceCount: Int + public val hostsUp: Int, + public val hostsDown: Int, + public val serversPending: Int, + public val serversActive: Int, + public val attemptsSuccess: Int, + public val attemptsFailure: Int, + public val attemptsError: Int ) -- cgit v1.2.3