summaryrefslogtreecommitdiff
path: root/opendc-compute/opendc-compute-service/src
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2022-05-04 16:24:53 +0200
committerFabian Mastenbroek <mail.fabianm@gmail.com>2022-05-06 18:37:36 +0200
commitc7eec7904e08029b3ab31d3e7b21afa1ea9ab7e6 (patch)
tree459724b394f5aca35733582a024fd5d99d06a7a4 /opendc-compute/opendc-compute-service/src
parenta9657e4fa3b15e2c1c11884b5a250b0861bcc21d (diff)
refactor(compute/service): Remove OpenTelemetry from "compute" modules
This change removes the OpenTelemetry integration from the OpenDC Compute modules. Previously, we chose to integrate OpenTelemetry to provide a unified way to report metrics to the users. Although this worked as expected, the overhead of the OpenTelemetry when collecting metrics during simulation was considerable and lacked more optimization opportunities (other than providing a separate API implementation). Furthermore, since we were tied to OpenTelemetry's SDK implementation, we experienced issues with throttling and registering multiple instruments. We will instead use another approach, where we expose the core metrics in OpenDC via specialized interfaces (see the commits before) such that access is fast and can be done without having to interface with OpenTelemetry. In addition, we will provide an adapter to that is able to forward these metrics to OpenTelemetry implementations, so we can still integrate with the wider ecosystem.
Diffstat (limited to 'opendc-compute/opendc-compute-service/src')
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt6
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt2
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt2
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt88
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt18
-rw-r--r--opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt3
6 files changed, 6 insertions, 113 deletions
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt
index 3a6baaa1..c0b70268 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt
@@ -22,8 +22,6 @@
package org.opendc.compute.service
-import io.opentelemetry.api.metrics.Meter
-import io.opentelemetry.api.metrics.MeterProvider
import org.opendc.compute.api.ComputeClient
import org.opendc.compute.api.Server
import org.opendc.compute.service.driver.Host
@@ -79,18 +77,16 @@ public interface ComputeService : AutoCloseable {
*
* @param context The [CoroutineContext] to use in the service.
* @param clock The clock instance to use.
- * @param meterProvider The [MeterProvider] for creating a [Meter] for the service.
* @param scheduler The scheduler implementation to use.
* @param schedulingQuantum The interval between scheduling cycles.
*/
public operator fun invoke(
context: CoroutineContext,
clock: Clock,
- meterProvider: MeterProvider,
scheduler: ComputeScheduler,
schedulingQuantum: Duration = Duration.ofMinutes(5),
): ComputeService {
- return ComputeServiceImpl(context, clock, meterProvider, scheduler, schedulingQuantum)
+ return ComputeServiceImpl(context, clock, scheduler, schedulingQuantum)
}
}
}
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt
index b3958473..6fec5175 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt
@@ -35,5 +35,5 @@ import java.time.Instant
public data class GuestSystemStats(
val uptime: Duration,
val downtime: Duration,
- val bootTime: Instant
+ val bootTime: Instant?
)
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt
index 1c07023f..9d34a5ce 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt
@@ -41,7 +41,7 @@ import java.time.Instant
public data class HostSystemStats(
val uptime: Duration,
val downtime: Duration,
- val bootTime: Instant,
+ val bootTime: Instant?,
val powerUsage: Double,
val energyUsage: Double,
val guestsTerminated: Int,
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt
index e8664e5c..21aaa19e 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt
@@ -22,11 +22,6 @@
package org.opendc.compute.service.internal
-import io.opentelemetry.api.common.AttributeKey
-import io.opentelemetry.api.common.Attributes
-import io.opentelemetry.api.metrics.Meter
-import io.opentelemetry.api.metrics.MeterProvider
-import io.opentelemetry.api.metrics.ObservableLongMeasurement
import kotlinx.coroutines.*
import mu.KotlinLogging
import org.opendc.common.util.Pacer
@@ -49,14 +44,12 @@ import kotlin.math.max
*
* @param context The [CoroutineContext] to use in the service.
* @param clock The clock instance to use.
- * @param meterProvider The [MeterProvider] for creating a [Meter] for the service.
* @param scheduler The scheduler implementation to use.
* @param schedulingQuantum The interval between scheduling cycles.
*/
internal class ComputeServiceImpl(
private val context: CoroutineContext,
private val clock: Clock,
- meterProvider: MeterProvider,
private val scheduler: ComputeScheduler,
schedulingQuantum: Duration
) : ComputeService, HostListener {
@@ -71,11 +64,6 @@ internal class ComputeServiceImpl(
private val logger = KotlinLogging.logger {}
/**
- * The [Meter] to track metrics of the [ComputeService].
- */
- private val meter = meterProvider.get("org.opendc.compute.service")
-
- /**
* The [Random] instance used to generate unique identifiers for the objects.
*/
private val random = Random(0)
@@ -117,72 +105,20 @@ internal class ComputeServiceImpl(
private var maxCores = 0
private var maxMemory = 0L
-
- /**
- * The number of scheduling attempts.
- */
- private val _schedulingAttempts = meter.counterBuilder("scheduler.attempts")
- .setDescription("Number of scheduling attempts")
- .setUnit("1")
- .build()
- private val _schedulingAttemptsSuccessAttr = Attributes.of(AttributeKey.stringKey("result"), "success")
- private val _schedulingAttemptsFailureAttr = Attributes.of(AttributeKey.stringKey("result"), "failure")
- private val _schedulingAttemptsErrorAttr = Attributes.of(AttributeKey.stringKey("result"), "error")
private var _attemptsSuccess = 0L
private var _attemptsFailure = 0L
private var _attemptsError = 0L
-
- /**
- * The response time of the service.
- */
- private val _schedulingLatency = meter.histogramBuilder("scheduler.latency")
- .setDescription("End to end latency for a server to be scheduled (in multiple attempts)")
- .ofLongs()
- .setUnit("ms")
- .build()
-
- /**
- * The number of servers that are pending.
- */
- private val _servers = meter.upDownCounterBuilder("scheduler.servers")
- .setDescription("Number of servers managed by the scheduler")
- .setUnit("1")
- .build()
- private val _serversPendingAttr = Attributes.of(AttributeKey.stringKey("state"), "pending")
- private val _serversActiveAttr = Attributes.of(AttributeKey.stringKey("state"), "active")
private var _serversPending = 0
private var _serversActive = 0
/**
* The [Pacer] to use for scheduling the scheduler cycles.
*/
- private val pacer = Pacer(scope.coroutineContext, clock, schedulingQuantum.toMillis(), ::doSchedule)
+ private val pacer = Pacer(scope.coroutineContext, clock, schedulingQuantum.toMillis()) { doSchedule() }
override val hosts: Set<Host>
get() = hostToView.keys
- init {
- val upState = Attributes.of(AttributeKey.stringKey("state"), "up")
- val downState = Attributes.of(AttributeKey.stringKey("state"), "down")
-
- meter.upDownCounterBuilder("scheduler.hosts")
- .setDescription("Number of hosts registered with the scheduler")
- .setUnit("1")
- .buildWithCallback { result ->
- val total = hosts.size
- val available = availableHosts.size.toLong()
-
- result.record(available, upState)
- result.record(total - available, downState)
- }
-
- meter.gaugeBuilder("system.time.provision")
- .setDescription("The most recent timestamp where the server entered a provisioned state")
- .setUnit("1")
- .ofLongs()
- .buildWithCallback(::collectProvisionTime)
- }
-
override fun newClient(): ComputeClient {
check(scope.isActive) { "Service is already closed" }
return object : ComputeClient {
@@ -355,7 +291,6 @@ internal class ComputeServiceImpl(
server.launchedAt = Instant.ofEpochMilli(now)
queue.add(request)
_serversPending++
- _servers.add(1, _serversPendingAttr)
requestSchedulingCycle()
return request
}
@@ -387,14 +322,13 @@ internal class ComputeServiceImpl(
/**
* Run a single scheduling iteration.
*/
- private fun doSchedule(now: Long) {
+ private fun doSchedule() {
while (queue.isNotEmpty()) {
val request = queue.peek()
if (request.isCancelled) {
queue.poll()
_serversPending--
- _servers.add(-1, _serversPendingAttr)
continue
}
@@ -407,9 +341,7 @@ internal class ComputeServiceImpl(
// Remove the incoming image
queue.poll()
_serversPending--
- _servers.add(-1, _serversPendingAttr)
_attemptsFailure++
- _schedulingAttempts.add(1, _schedulingAttemptsFailureAttr)
logger.warn { "Failed to spawn $server: does not fit [${clock.instant()}]" }
@@ -425,8 +357,6 @@ internal class ComputeServiceImpl(
// Remove request from queue
queue.poll()
_serversPending--
- _servers.add(-1, _serversPendingAttr)
- _schedulingLatency.record(now - request.submitTime, server.attributes)
logger.info { "Assigned server $server to host $host." }
@@ -442,10 +372,8 @@ internal class ComputeServiceImpl(
host.spawn(server)
activeServers[server] = host
- _servers.add(1, _serversActiveAttr)
_serversActive++
_attemptsSuccess++
- _schedulingAttempts.add(1, _schedulingAttemptsSuccessAttr)
} catch (e: Throwable) {
logger.error(e) { "Failed to deploy VM" }
@@ -454,7 +382,6 @@ internal class ComputeServiceImpl(
hv.availableMemory += server.flavor.memorySize
_attemptsError++
- _schedulingAttempts.add(1, _schedulingAttemptsErrorAttr)
}
}
}
@@ -511,7 +438,6 @@ internal class ComputeServiceImpl(
if (activeServers.remove(server) != null) {
_serversActive--
- _servers.add(-1, _serversActiveAttr)
}
val hv = hostToView[host]
@@ -527,14 +453,4 @@ internal class ComputeServiceImpl(
requestSchedulingCycle()
}
}
-
- /**
- * Collect the timestamp when each server entered its provisioning state most recently.
- */
- private fun collectProvisionTime(result: ObservableLongMeasurement) {
- for ((_, server) in servers) {
- val launchedAt = server.launchedAt ?: continue
- result.record(launchedAt.toEpochMilli(), server.attributes)
- }
- }
}
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt
index d2a2d896..f9da24d8 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt
@@ -22,9 +22,6 @@
package org.opendc.compute.service.internal
-import io.opentelemetry.api.common.AttributeKey
-import io.opentelemetry.api.common.Attributes
-import io.opentelemetry.semconv.resource.attributes.ResourceAttributes
import mu.KotlinLogging
import org.opendc.compute.api.*
import org.opendc.compute.service.driver.Host
@@ -54,21 +51,6 @@ internal class InternalServer(
private val watchers = mutableListOf<ServerWatcher>()
/**
- * The attributes of a server.
- */
- @JvmField internal val attributes: Attributes = Attributes.builder()
- .put(ResourceAttributes.HOST_NAME, name)
- .put(ResourceAttributes.HOST_ID, uid.toString())
- .put(ResourceAttributes.HOST_TYPE, flavor.name)
- .put(AttributeKey.longKey("host.num_cpus"), flavor.cpuCount.toLong())
- .put(AttributeKey.longKey("host.mem_capacity"), flavor.memorySize)
- .put(AttributeKey.stringArrayKey("host.labels"), labels.map { (k, v) -> "$k:$v" })
- .put(ResourceAttributes.HOST_ARCH, ResourceAttributes.HostArchValues.AMD64)
- .put(ResourceAttributes.HOST_IMAGE_NAME, image.name)
- .put(ResourceAttributes.HOST_IMAGE_ID, image.uid.toString())
- .build()
-
- /**
* The [Host] that has been assigned to host the server.
*/
@JvmField internal var host: Host? = null
diff --git a/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt b/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt
index eb106817..cc7be4a8 100644
--- a/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt
+++ b/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt
@@ -23,7 +23,6 @@
package org.opendc.compute.service
import io.mockk.*
-import io.opentelemetry.api.metrics.MeterProvider
import kotlinx.coroutines.delay
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNull
@@ -59,7 +58,7 @@ internal class ComputeServiceTest {
filters = listOf(ComputeFilter(), VCpuFilter(allocationRatio = 1.0), RamFilter(allocationRatio = 1.0)),
weighers = listOf(RamWeigher())
)
- service = ComputeService(scope.coroutineContext, clock, MeterProvider.noop(), computeScheduler)
+ service = ComputeService(scope.coroutineContext, clock, computeScheduler)
}
@Test