summaryrefslogtreecommitdiff
path: root/opendc-compute
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2022-05-06 17:47:44 +0200
committerFabian Mastenbroek <mail.fabianm@gmail.com>2022-05-06 17:47:44 +0200
commita9657e4fa3b15e2c1c11884b5a250b0861bcc21d (patch)
tree6b25de3d7a1def150ab4977a45723c52167e7211 /opendc-compute
parent48da4538707cd074969287724ca6f02823f2ff5a (diff)
parent8e3905273c7a3f2df4df5d5840e4088d99b0dffb (diff)
merge: Expose metrics directly to user (#80)
This pull request adds the ability to access the metrics of resources modeled by the OpenDC Compute, Workflow, FaaS, and TensorFlow services directly from their corresponding interfaces. Previously, users would have to interact with OpenTelemetry to obtain these values, which is complex and provides significant overhead. With this pull request, users can access the metrics of all cloud resources modeled by OpenDC via methods such as `getSchedulerStats()`, etc. ** Breaking Changes ** - `ComputeService.hostCount` removed in favour of `ComputeService.hosts.size`
Diffstat (limited to 'opendc-compute')
-rw-r--r--opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Server.kt7
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt19
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/Host.kt35
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestCpuStats.kt44
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt39
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostCpuStats.kt47
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt51
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ClientServer.kt5
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt43
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt3
-rw-r--r--opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/telemetry/SchedulerStats.kt46
-rw-r--r--opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt9
-rw-r--r--opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/InternalServerTest.kt2
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt90
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt47
-rw-r--r--opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt123
-rw-r--r--opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt21
-rw-r--r--opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/export/parquet/ParquetComputeMonitor.kt (renamed from opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/export/parquet/ParquetComputeMetricExporter.kt)8
18 files changed, 497 insertions, 142 deletions
diff --git a/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Server.kt b/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Server.kt
index b508a9f8..64b73d0b 100644
--- a/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Server.kt
+++ b/opendc-compute/opendc-compute-api/src/main/kotlin/org/opendc/compute/api/Server.kt
@@ -22,6 +22,8 @@
package org.opendc.compute.api
+import java.time.Instant
+
/**
* A stateful object representing a server instance that is running on some physical or virtual machine.
*/
@@ -42,6 +44,11 @@ public interface Server : Resource {
public val state: ServerState
/**
+ * The most recent moment in time when the server was launched.
+ */
+ public val launchedAt: Instant?
+
+ /**
* Request the server to be started.
*
* This method is guaranteed to return after the request was acknowledged, but might return before the server was
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt
index 2a1fbaa0..3a6baaa1 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/ComputeService.kt
@@ -25,9 +25,11 @@ package org.opendc.compute.service
import io.opentelemetry.api.metrics.Meter
import io.opentelemetry.api.metrics.MeterProvider
import org.opendc.compute.api.ComputeClient
+import org.opendc.compute.api.Server
import org.opendc.compute.service.driver.Host
import org.opendc.compute.service.internal.ComputeServiceImpl
import org.opendc.compute.service.scheduler.ComputeScheduler
+import org.opendc.compute.service.telemetry.SchedulerStats
import java.time.Clock
import java.time.Duration
import kotlin.coroutines.CoroutineContext
@@ -37,16 +39,11 @@ import kotlin.coroutines.CoroutineContext
*/
public interface ComputeService : AutoCloseable {
/**
- * The hosts that are used by the compute service.
+ * The hosts that are registered with the "compute" service.
*/
public val hosts: Set<Host>
/**
- * The number of hosts available in the system.
- */
- public val hostCount: Int
-
- /**
* Create a new [ComputeClient] to control the compute service.
*/
public fun newClient(): ComputeClient
@@ -66,6 +63,16 @@ public interface ComputeService : AutoCloseable {
*/
public override fun close()
+ /**
+ * Lookup the [Host] that currently hosts the specified [server].
+ */
+ public fun lookupHost(server: Server): Host?
+
+ /**
+ * Collect the statistics about the scheduler component of this service.
+ */
+ public fun getSchedulerStats(): SchedulerStats
+
public companion object {
/**
* Construct a new [ComputeService] implementation.
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/Host.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/Host.kt
index bed15dfd..67b144d9 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/Host.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/Host.kt
@@ -23,6 +23,10 @@
package org.opendc.compute.service.driver
import org.opendc.compute.api.Server
+import org.opendc.compute.service.driver.telemetry.GuestCpuStats
+import org.opendc.compute.service.driver.telemetry.GuestSystemStats
+import org.opendc.compute.service.driver.telemetry.HostCpuStats
+import org.opendc.compute.service.driver.telemetry.HostSystemStats
import java.util.*
/**
@@ -55,6 +59,11 @@ public interface Host {
public val meta: Map<String, Any>
/**
+ * The [Server] instances known to the host.
+ */
+ public val instances: Set<Server>
+
+ /**
* Determine whether the specified [instance][server] can still fit on this host.
*/
public fun canFit(server: Server): Boolean
@@ -100,4 +109,30 @@ public interface Host {
* Remove a [HostListener] from this host.
*/
public fun removeListener(listener: HostListener)
+
+ /**
+ * Query the system statistics of the host.
+ */
+ public fun getSystemStats(): HostSystemStats
+
+ /**
+ * Query the system statistics of a [Server] that is located on this host.
+ *
+ * @param server The [Server] to obtain the system statistics of.
+ * @throws IllegalArgumentException if the server is not present on the host.
+ */
+ public fun getSystemStats(server: Server): GuestSystemStats
+
+ /**
+ * Query the CPU statistics of the host.
+ */
+ public fun getCpuStats(): HostCpuStats
+
+ /**
+ * Query the CPU statistics of a [Server] that is located on this host.
+ *
+ * @param server The [Server] to obtain the CPU statistics of.
+ * @throws IllegalArgumentException if the server is not present on the host.
+ */
+ public fun getCpuStats(server: Server): GuestCpuStats
}
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestCpuStats.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestCpuStats.kt
new file mode 100644
index 00000000..b5d63471
--- /dev/null
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestCpuStats.kt
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.service.driver.telemetry
+
+/**
+ * Statistics about the CPUs of a guest.
+ *
+ * @property activeTime The cumulative time (in seconds) that the CPUs of the guest were actively running.
+ * @property idleTime The cumulative time (in seconds) the CPUs of the guest were idle.
+ * @property stealTime The cumulative CPU time (in seconds) that the guest was ready to run, but not granted time by the host.
+ * @property lostTime The cumulative CPU time (in seconds) that was lost due to interference with other machines.
+ * @property capacity The available CPU capacity of the guest (in MHz).
+ * @property usage Amount of CPU resources (in MHz) actually used by the guest.
+ * @property utilization Utilization of the CPU resources (in %) relative to the total CPU capacity.
+ */
+public data class GuestCpuStats(
+ val activeTime: Long,
+ val idleTime: Long,
+ val stealTime: Long,
+ val lostTime: Long,
+ val capacity: Double,
+ val usage: Double,
+ val utilization: Double
+)
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt
new file mode 100644
index 00000000..b3958473
--- /dev/null
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/GuestSystemStats.kt
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.service.driver.telemetry
+
+import java.time.Duration
+import java.time.Instant
+
+/**
+ * System-level statistics of a guest.
+ *
+ * @property uptime The cumulative uptime of the guest since last boot (in ms).
+ * @property downtime The cumulative downtime of the guest since last boot (in ms).
+ * @property bootTime The time at which the guest booted.
+ */
+public data class GuestSystemStats(
+ val uptime: Duration,
+ val downtime: Duration,
+ val bootTime: Instant
+)
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostCpuStats.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostCpuStats.kt
new file mode 100644
index 00000000..55e23c0e
--- /dev/null
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostCpuStats.kt
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.service.driver.telemetry
+
+/**
+ * Statistics about the CPUs of a host.
+ *
+ * @property activeTime The cumulative time (in seconds) that the CPUs of the host were actively running.
+ * @property idleTime The cumulative time (in seconds) the CPUs of the host were idle.
+ * @property stealTime The cumulative CPU time (in seconds) that virtual machines were ready to run, but were not able to.
+ * @property lostTime The cumulative CPU time (in seconds) that was lost due to interference between virtual machines.
+ * @property capacity The available CPU capacity of the host (in MHz).
+ * @property demand Amount of CPU resources (in MHz) the guests would use if there were no CPU contention or CPU
+ * limits.
+ * @property usage Amount of CPU resources (in MHz) actually used by the host.
+ * @property utilization Utilization of the CPU resources (in %) relative to the total CPU capacity.
+ */
+public data class HostCpuStats(
+ val activeTime: Long,
+ val idleTime: Long,
+ val stealTime: Long,
+ val lostTime: Long,
+ val capacity: Double,
+ val demand: Double,
+ val usage: Double,
+ val utilization: Double
+)
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt
new file mode 100644
index 00000000..1c07023f
--- /dev/null
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/driver/telemetry/HostSystemStats.kt
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.service.driver.telemetry
+
+import java.time.Duration
+import java.time.Instant
+
+/**
+ * System-level statistics of a host.
+
+ * @property uptime The cumulative uptime of the host since last boot (in ms).
+ * @property downtime The cumulative downtime of the host since last boot (in ms).
+ * @property bootTime The time at which the server started.
+ * @property powerUsage Instantaneous power usage of the system (in W).
+ * @property energyUsage The cumulative energy usage of the system (in J).
+ * @property guestsTerminated The number of guests that are in a terminated state.
+ * @property guestsRunning The number of guests that are in a running state.
+ * @property guestsError The number of guests that are in an error state.
+ * @property guestsInvalid The number of guests that are in an unknown state.
+ */
+public data class HostSystemStats(
+ val uptime: Duration,
+ val downtime: Duration,
+ val bootTime: Instant,
+ val powerUsage: Double,
+ val energyUsage: Double,
+ val guestsTerminated: Int,
+ val guestsRunning: Int,
+ val guestsError: Int,
+ val guestsInvalid: Int,
+)
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ClientServer.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ClientServer.kt
index f2929bf3..45775640 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ClientServer.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ClientServer.kt
@@ -27,6 +27,7 @@ import org.opendc.compute.api.Image
import org.opendc.compute.api.Server
import org.opendc.compute.api.ServerState
import org.opendc.compute.api.ServerWatcher
+import java.time.Instant
import java.util.*
/**
@@ -55,6 +56,9 @@ internal class ClientServer(private val delegate: Server) : Server, ServerWatche
override var state: ServerState = delegate.state
private set
+ override var launchedAt: Instant? = null
+ private set
+
override suspend fun start() {
delegate.start()
refresh()
@@ -95,6 +99,7 @@ internal class ClientServer(private val delegate: Server) : Server, ServerWatche
labels = delegate.labels
meta = delegate.meta
state = delegate.state
+ launchedAt = delegate.launchedAt
}
override fun onStateChanged(server: Server, newState: ServerState) {
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt
index 144b6573..e8664e5c 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/ComputeServiceImpl.kt
@@ -36,8 +36,10 @@ import org.opendc.compute.service.driver.Host
import org.opendc.compute.service.driver.HostListener
import org.opendc.compute.service.driver.HostState
import org.opendc.compute.service.scheduler.ComputeScheduler
+import org.opendc.compute.service.telemetry.SchedulerStats
import java.time.Clock
import java.time.Duration
+import java.time.Instant
import java.util.*
import kotlin.coroutines.CoroutineContext
import kotlin.math.max
@@ -126,6 +128,9 @@ internal class ComputeServiceImpl(
private val _schedulingAttemptsSuccessAttr = Attributes.of(AttributeKey.stringKey("result"), "success")
private val _schedulingAttemptsFailureAttr = Attributes.of(AttributeKey.stringKey("result"), "failure")
private val _schedulingAttemptsErrorAttr = Attributes.of(AttributeKey.stringKey("result"), "error")
+ private var _attemptsSuccess = 0L
+ private var _attemptsFailure = 0L
+ private var _attemptsError = 0L
/**
* The response time of the service.
@@ -145,6 +150,8 @@ internal class ComputeServiceImpl(
.build()
private val _serversPendingAttr = Attributes.of(AttributeKey.stringKey("state"), "pending")
private val _serversActiveAttr = Attributes.of(AttributeKey.stringKey("state"), "active")
+ private var _serversPending = 0
+ private var _serversActive = 0
/**
* The [Pacer] to use for scheduling the scheduler cycles.
@@ -154,9 +161,6 @@ internal class ComputeServiceImpl(
override val hosts: Set<Host>
get() = hostToView.keys
- override val hostCount: Int
- get() = hostToView.size
-
init {
val upState = Attributes.of(AttributeKey.stringKey("state"), "up")
val downState = Attributes.of(AttributeKey.stringKey("state"), "down")
@@ -165,7 +169,7 @@ internal class ComputeServiceImpl(
.setDescription("Number of hosts registered with the scheduler")
.setUnit("1")
.buildWithCallback { result ->
- val total = hostCount
+ val total = hosts.size
val available = availableHosts.size.toLong()
result.record(available, upState)
@@ -322,17 +326,35 @@ internal class ComputeServiceImpl(
}
}
+ override fun lookupHost(server: Server): Host? {
+ val internal = requireNotNull(servers[server.uid]) { "Invalid server passed to lookupHost" }
+ return internal.host
+ }
+
override fun close() {
scope.cancel()
}
+ override fun getSchedulerStats(): SchedulerStats {
+ return SchedulerStats(
+ availableHosts.size,
+ hostToView.size - availableHosts.size,
+ _attemptsSuccess,
+ _attemptsFailure,
+ _attemptsError,
+ _serversPending,
+ _serversActive
+ )
+ }
+
internal fun schedule(server: InternalServer): SchedulingRequest {
logger.debug { "Enqueueing server ${server.uid} to be assigned to host." }
val now = clock.millis()
val request = SchedulingRequest(server, now)
- server.lastProvisioningTimestamp = now
+ server.launchedAt = Instant.ofEpochMilli(now)
queue.add(request)
+ _serversPending++
_servers.add(1, _serversPendingAttr)
requestSchedulingCycle()
return request
@@ -371,6 +393,7 @@ internal class ComputeServiceImpl(
if (request.isCancelled) {
queue.poll()
+ _serversPending--
_servers.add(-1, _serversPendingAttr)
continue
}
@@ -383,7 +406,9 @@ internal class ComputeServiceImpl(
if (server.flavor.memorySize > maxMemory || server.flavor.cpuCount > maxCores) {
// Remove the incoming image
queue.poll()
+ _serversPending--
_servers.add(-1, _serversPendingAttr)
+ _attemptsFailure++
_schedulingAttempts.add(1, _schedulingAttemptsFailureAttr)
logger.warn { "Failed to spawn $server: does not fit [${clock.instant()}]" }
@@ -399,6 +424,7 @@ internal class ComputeServiceImpl(
// Remove request from queue
queue.poll()
+ _serversPending--
_servers.add(-1, _serversPendingAttr)
_schedulingLatency.record(now - request.submitTime, server.attributes)
@@ -417,6 +443,8 @@ internal class ComputeServiceImpl(
activeServers[server] = host
_servers.add(1, _serversActiveAttr)
+ _serversActive++
+ _attemptsSuccess++
_schedulingAttempts.add(1, _schedulingAttemptsSuccessAttr)
} catch (e: Throwable) {
logger.error(e) { "Failed to deploy VM" }
@@ -425,6 +453,7 @@ internal class ComputeServiceImpl(
hv.provisionedCores -= server.flavor.cpuCount
hv.availableMemory += server.flavor.memorySize
+ _attemptsError++
_schedulingAttempts.add(1, _schedulingAttemptsErrorAttr)
}
}
@@ -481,6 +510,7 @@ internal class ComputeServiceImpl(
logger.info { "[${clock.instant()}] Server ${server.uid} ${server.name} ${server.flavor} finished." }
if (activeServers.remove(server) != null) {
+ _serversActive--
_servers.add(-1, _serversActiveAttr)
}
@@ -503,7 +533,8 @@ internal class ComputeServiceImpl(
*/
private fun collectProvisionTime(result: ObservableLongMeasurement) {
for ((_, server) in servers) {
- result.record(server.lastProvisioningTimestamp, server.attributes)
+ val launchedAt = server.launchedAt ?: continue
+ result.record(launchedAt.toEpochMilli(), server.attributes)
}
}
}
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt
index f1b92c66..d2a2d896 100644
--- a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/internal/InternalServer.kt
@@ -28,6 +28,7 @@ import io.opentelemetry.semconv.resource.attributes.ResourceAttributes
import mu.KotlinLogging
import org.opendc.compute.api.*
import org.opendc.compute.service.driver.Host
+import java.time.Instant
import java.util.UUID
/**
@@ -75,7 +76,7 @@ internal class InternalServer(
/**
* The most recent timestamp when the server entered a provisioning state.
*/
- @JvmField internal var lastProvisioningTimestamp: Long = Long.MIN_VALUE
+ override var launchedAt: Instant? = null
/**
* The current scheduling request.
diff --git a/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/telemetry/SchedulerStats.kt b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/telemetry/SchedulerStats.kt
new file mode 100644
index 00000000..4dc70286
--- /dev/null
+++ b/opendc-compute/opendc-compute-service/src/main/kotlin/org/opendc/compute/service/telemetry/SchedulerStats.kt
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.compute.service.telemetry
+
+import org.opendc.compute.service.ComputeService
+
+/**
+ * Statistics about the scheduling component of the [ComputeService].
+ *
+ * @property hostsAvailable The number of hosts currently available for scheduling.
+ * @property hostsUnavailable The number of hosts unavailable for scheduling.
+ * @property attemptsSuccess Scheduling attempts that resulted into an allocation onto a host.
+ * @property attemptsFailure The number of failed scheduling attempt due to insufficient capacity at the moment.
+ * @property attemptsError The number of scheduling attempts that failed due to system error.
+ * @property serversPending The number of servers that are pending to be scheduled.
+ * @property serversActive The number of servers that are currently managed by the service and running.
+ */
+public data class SchedulerStats(
+ val hostsAvailable: Int,
+ val hostsUnavailable: Int,
+ val attemptsSuccess: Long,
+ val attemptsFailure: Long,
+ val attemptsError: Long,
+ val serversPending: Int,
+ val serversActive: Int
+)
diff --git a/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt b/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt
index 7b8d0fe2..eb106817 100644
--- a/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt
+++ b/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/ComputeServiceTest.kt
@@ -24,7 +24,6 @@ package org.opendc.compute.service
import io.mockk.*
import io.opentelemetry.api.metrics.MeterProvider
-import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.delay
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNull
@@ -48,10 +47,9 @@ import java.util.*
/**
* Test suite for the [ComputeService] interface.
*/
-@OptIn(ExperimentalCoroutinesApi::class)
internal class ComputeServiceTest {
- lateinit var scope: SimulationCoroutineScope
- lateinit var service: ComputeService
+ private lateinit var scope: SimulationCoroutineScope
+ private lateinit var service: ComputeService
@BeforeEach
fun setUp() {
@@ -128,14 +126,12 @@ internal class ComputeServiceTest {
every { host.model } returns HostModel(4 * 2600.0, 4, 2048)
every { host.state } returns HostState.UP
- assertEquals(0, service.hostCount)
assertEquals(emptySet<Host>(), service.hosts)
service.addHost(host)
verify(exactly = 1) { host.addListener(any()) }
- assertEquals(1, service.hostCount)
assertEquals(1, service.hosts.size)
service.removeHost(host)
@@ -150,7 +146,6 @@ internal class ComputeServiceTest {
every { host.model } returns HostModel(4 * 2600.0, 4, 2048)
every { host.state } returns HostState.DOWN
- assertEquals(0, service.hostCount)
assertEquals(emptySet<Host>(), service.hosts)
service.addHost(host)
diff --git a/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/InternalServerTest.kt b/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/InternalServerTest.kt
index dfd3bc67..4e5a37ae 100644
--- a/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/InternalServerTest.kt
+++ b/opendc-compute/opendc-compute-service/src/test/kotlin/org/opendc/compute/service/InternalServerTest.kt
@@ -23,7 +23,6 @@
package org.opendc.compute.service
import io.mockk.*
-import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.yield
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.Test
@@ -41,7 +40,6 @@ import java.util.*
/**
* Test suite for the [InternalServer] implementation.
*/
-@OptIn(ExperimentalCoroutinesApi::class)
class InternalServerTest {
@Test
fun testEquality() {
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
index 4eb6392e..323ae4fe 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
@@ -29,11 +29,14 @@ import io.opentelemetry.api.metrics.MeterProvider
import io.opentelemetry.api.metrics.ObservableDoubleMeasurement
import io.opentelemetry.api.metrics.ObservableLongMeasurement
import kotlinx.coroutines.*
-import mu.KotlinLogging
import org.opendc.compute.api.Flavor
import org.opendc.compute.api.Server
import org.opendc.compute.api.ServerState
import org.opendc.compute.service.driver.*
+import org.opendc.compute.service.driver.telemetry.GuestCpuStats
+import org.opendc.compute.service.driver.telemetry.GuestSystemStats
+import org.opendc.compute.service.driver.telemetry.HostCpuStats
+import org.opendc.compute.service.driver.telemetry.HostSystemStats
import org.opendc.compute.simulator.internal.Guest
import org.opendc.compute.simulator.internal.GuestListener
import org.opendc.simulator.compute.*
@@ -49,6 +52,8 @@ import org.opendc.simulator.compute.power.PowerDriver
import org.opendc.simulator.compute.power.SimplePowerDriver
import org.opendc.simulator.compute.workload.SimWorkload
import org.opendc.simulator.flow.FlowEngine
+import java.time.Duration
+import java.time.Instant
import java.util.*
import kotlin.coroutines.CoroutineContext
@@ -81,11 +86,6 @@ public class SimHost(
private val clock = engine.clock
/**
- * The logger instance of this server.
- */
- private val logger = KotlinLogging.logger {}
-
- /**
* The [Meter] to track metrics of the simulated host.
*/
private val meter = meterProvider.get("org.opendc.compute.simulator")
@@ -112,6 +112,9 @@ public class SimHost(
private val guests = HashMap<Server, Guest>()
private val _guests = mutableListOf<Guest>()
+ override val instances: Set<Server>
+ get() = guests.keys
+
override val state: HostState
get() = _state
private var _state: HostState = HostState.DOWN
@@ -249,6 +252,68 @@ public class SimHost(
machine.cancel()
}
+ override fun getSystemStats(): HostSystemStats {
+ updateUptime()
+
+ var terminated = 0
+ var running = 0
+ var error = 0
+ var invalid = 0
+
+ val guests = _guests.listIterator()
+ for (guest in guests) {
+ when (guest.state) {
+ ServerState.TERMINATED -> terminated++
+ ServerState.RUNNING -> running++
+ ServerState.ERROR -> error++
+ ServerState.DELETED -> {
+ // Remove guests that have been deleted
+ this.guests.remove(guest.server)
+ guests.remove()
+ }
+ else -> invalid++
+ }
+ }
+
+ return HostSystemStats(
+ Duration.ofMillis(_uptime),
+ Duration.ofMillis(_downtime),
+ Instant.ofEpochMilli(_bootTime),
+ machine.powerUsage,
+ machine.energyUsage,
+ terminated,
+ running,
+ error,
+ invalid
+ )
+ }
+
+ override fun getSystemStats(server: Server): GuestSystemStats {
+ val guest = requireNotNull(guests[server]) { "Unknown server ${server.uid} at host $uid" }
+ return guest.getSystemStats()
+ }
+
+ override fun getCpuStats(): HostCpuStats {
+ val counters = hypervisor.counters
+ counters.flush()
+
+ return HostCpuStats(
+ counters.cpuActiveTime / 1000L,
+ counters.cpuIdleTime / 1000L,
+ counters.cpuStealTime / 1000L,
+ counters.cpuLostTime / 1000L,
+ hypervisor.cpuCapacity,
+ hypervisor.cpuDemand,
+ hypervisor.cpuUsage,
+ hypervisor.cpuUsage / _cpuLimit
+ )
+ }
+
+ override fun getCpuStats(server: Server): GuestCpuStats {
+ val guest = requireNotNull(guests[server]) { "Unknown server ${server.uid} at host $uid" }
+ return guest.getCpuStats()
+ }
+
override fun hashCode(): Int = uid.hashCode()
override fun equals(other: Any?): Boolean {
@@ -417,13 +482,12 @@ public class SimHost(
* Helper function to track the CPU time of a machine.
*/
private fun collectCpuTime(result: ObservableLongMeasurement) {
- val counters = hypervisor.counters
- counters.flush()
+ val stats = getCpuStats()
- result.record(counters.cpuActiveTime / 1000L, _activeState)
- result.record(counters.cpuIdleTime / 1000L, _idleState)
- result.record(counters.cpuStealTime / 1000L, _stealState)
- result.record(counters.cpuLostTime / 1000L, _lostState)
+ result.record(stats.activeTime, _activeState)
+ result.record(stats.idleTime, _idleState)
+ result.record(stats.stealTime, _stealState)
+ result.record(stats.lostTime, _lostState)
val guests = _guests
for (i in guests.indices) {
@@ -450,7 +514,7 @@ public class SimHost(
val guests = _guests
for (i in guests.indices) {
- guests[i].updateUptime(duration)
+ guests[i].updateUptime()
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
index bb378ee3..0d4c550d 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
@@ -32,6 +32,8 @@ import kotlinx.coroutines.*
import mu.KotlinLogging
import org.opendc.compute.api.Server
import org.opendc.compute.api.ServerState
+import org.opendc.compute.service.driver.telemetry.GuestCpuStats
+import org.opendc.compute.service.driver.telemetry.GuestSystemStats
import org.opendc.compute.simulator.SimHost
import org.opendc.compute.simulator.SimWorkloadMapper
import org.opendc.simulator.compute.kernel.SimHypervisor
@@ -39,6 +41,8 @@ import org.opendc.simulator.compute.kernel.SimVirtualMachine
import org.opendc.simulator.compute.runWorkload
import org.opendc.simulator.compute.workload.SimWorkload
import java.time.Clock
+import java.time.Duration
+import java.time.Instant
import kotlin.coroutines.CoroutineContext
/**
@@ -146,6 +150,37 @@ internal class Guest(
}
/**
+ * Obtain the system statistics of this guest.
+ */
+ fun getSystemStats(): GuestSystemStats {
+ updateUptime()
+
+ return GuestSystemStats(
+ Duration.ofMillis(_uptime),
+ Duration.ofMillis(_downtime),
+ Instant.ofEpochMilli(_bootTime)
+ )
+ }
+
+ /**
+ * Obtain the CPU statistics of this guest.
+ */
+ fun getCpuStats(): GuestCpuStats {
+ val counters = machine.counters
+ counters.flush()
+
+ return GuestCpuStats(
+ counters.cpuActiveTime / 1000L,
+ counters.cpuIdleTime / 1000L,
+ counters.cpuStealTime / 1000L,
+ counters.cpuLostTime / 1000L,
+ machine.cpuCapacity,
+ machine.cpuUsage,
+ machine.cpuUsage / _cpuLimit
+ )
+ }
+
+ /**
* The [Job] representing the current active virtual machine instance or `null` if no virtual machine is active.
*/
private var job: Job? = null
@@ -209,6 +244,8 @@ internal class Guest(
* This method is invoked when the guest stopped.
*/
private fun onStop(target: ServerState) {
+ updateUptime()
+
state = target
listener.onStop(this)
}
@@ -224,10 +261,16 @@ internal class Guest(
.put(STATE_KEY, "down")
.build()
+ private var _lastReport = clock.millis()
+
/**
* Helper function to track the uptime and downtime of the guest.
*/
- fun updateUptime(duration: Long) {
+ fun updateUptime() {
+ val now = clock.millis()
+ val duration = now - _lastReport
+ _lastReport = now
+
if (state == ServerState.RUNNING) {
_uptime += duration
} else if (state == ServerState.ERROR) {
@@ -239,6 +282,8 @@ internal class Guest(
* Helper function to track the uptime of the guest.
*/
fun collectUptime(result: ObservableLongMeasurement) {
+ updateUptime()
+
result.record(_uptime, _upState)
result.record(_downtime, _downState)
}
diff --git a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt
index f0325023..fd54ad1d 100644
--- a/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/test/kotlin/org/opendc/compute/simulator/SimHostTest.kt
@@ -22,8 +22,7 @@
package org.opendc.compute.simulator
-import io.opentelemetry.sdk.metrics.SdkMeterProvider
-import io.opentelemetry.sdk.resources.Resource
+import io.opentelemetry.api.metrics.MeterProvider
import kotlinx.coroutines.*
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.BeforeEach
@@ -42,13 +41,7 @@ import org.opendc.simulator.compute.workload.SimTraceFragment
import org.opendc.simulator.compute.workload.SimTraceWorkload
import org.opendc.simulator.core.runBlockingSimulation
import org.opendc.simulator.flow.FlowEngine
-import org.opendc.telemetry.compute.ComputeMetricExporter
-import org.opendc.telemetry.compute.HOST_ID
-import org.opendc.telemetry.compute.table.HostTableReader
-import org.opendc.telemetry.compute.table.ServerTableReader
-import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader
-import org.opendc.telemetry.sdk.toOtelClock
-import java.time.Duration
+import java.time.Instant
import java.util.*
import kotlin.coroutines.resume
@@ -73,45 +66,16 @@ internal class SimHostTest {
*/
@Test
fun testOvercommitted() = runBlockingSimulation {
- var idleTime = 0L
- var activeTime = 0L
- var stealTime = 0L
-
- val hostId = UUID.randomUUID()
- val hostResource = Resource.builder()
- .put(HOST_ID, hostId.toString())
- .build()
-
- // Setup metric reader
val duration = 5 * 60L
- val reader = CoroutineMetricReader(
- this,
- object : ComputeMetricExporter() {
- override fun record(reader: HostTableReader) {
- activeTime += reader.cpuActiveTime
- idleTime += reader.cpuIdleTime
- stealTime += reader.cpuStealTime
- }
- },
- exportInterval = Duration.ofSeconds(duration)
- )
-
- val meterProvider = SdkMeterProvider
- .builder()
- .setResource(hostResource)
- .setClock(clock.toOtelClock())
- .registerMetricReader(reader)
- .build()
-
val engine = FlowEngine(coroutineContext, clock)
- val virtDriver = SimHost(
- uid = hostId,
+ val host = SimHost(
+ uid = UUID.randomUUID(),
name = "test",
model = machineModel,
meta = emptyMap(),
coroutineContext,
engine,
- meterProvider,
+ MeterProvider.noop(),
SimFairShareHypervisorProvider()
)
val vmImageA = MockImage(
@@ -150,11 +114,11 @@ internal class SimHostTest {
val flavor = MockFlavor(2, 0)
coroutineScope {
- launch { virtDriver.spawn(MockServer(UUID.randomUUID(), "a", flavor, vmImageA)) }
- launch { virtDriver.spawn(MockServer(UUID.randomUUID(), "b", flavor, vmImageB)) }
+ launch { host.spawn(MockServer(UUID.randomUUID(), "a", flavor, vmImageA)) }
+ launch { host.spawn(MockServer(UUID.randomUUID(), "b", flavor, vmImageB)) }
suspendCancellableCoroutine<Unit> { cont ->
- virtDriver.addListener(object : HostListener {
+ host.addListener(object : HostListener {
private var finished = 0
override fun onStateChanged(host: Host, server: Server, newState: ServerState) {
@@ -168,13 +132,14 @@ internal class SimHostTest {
// Ensure last cycle is collected
delay(1000L * duration)
- virtDriver.close()
- meterProvider.close()
+ host.close()
+
+ val cpuStats = host.getCpuStats()
assertAll(
- { assertEquals(658, activeTime, "Active time does not match") },
- { assertEquals(2341, idleTime, "Idle time does not match") },
- { assertEquals(637, stealTime, "Steal time does not match") },
+ { assertEquals(658, cpuStats.activeTime, "Active time does not match") },
+ { assertEquals(2341, cpuStats.idleTime, "Idle time does not match") },
+ { assertEquals(637, cpuStats.stealTime, "Steal time does not match") },
{ assertEquals(1500001, clock.millis()) }
)
}
@@ -184,54 +149,16 @@ internal class SimHostTest {
*/
@Test
fun testFailure() = runBlockingSimulation {
- var activeTime = 0L
- var idleTime = 0L
- var uptime = 0L
- var downtime = 0L
- var guestUptime = 0L
- var guestDowntime = 0L
-
- val hostId = UUID.randomUUID()
- val hostResource = Resource.builder()
- .put(HOST_ID, hostId.toString())
- .build()
-
- // Setup metric reader
val duration = 5 * 60L
- val reader = CoroutineMetricReader(
- this,
- object : ComputeMetricExporter() {
- override fun record(reader: HostTableReader) {
- activeTime += reader.cpuActiveTime
- idleTime += reader.cpuIdleTime
- uptime += reader.uptime
- downtime += reader.downtime
- }
-
- override fun record(reader: ServerTableReader) {
- guestUptime += reader.uptime
- guestDowntime += reader.downtime
- }
- },
- exportInterval = Duration.ofSeconds(duration)
- )
-
- val meterProvider = SdkMeterProvider
- .builder()
- .setResource(hostResource)
- .setClock(clock.toOtelClock())
- .registerMetricReader(reader)
- .build()
-
val engine = FlowEngine(coroutineContext, clock)
val host = SimHost(
- uid = hostId,
+ uid = UUID.randomUUID(),
name = "test",
model = machineModel,
meta = emptyMap(),
coroutineContext,
engine,
- meterProvider,
+ MeterProvider.noop(),
SimFairShareHypervisorProvider()
)
val image = MockImage(
@@ -275,15 +202,17 @@ internal class SimHostTest {
// Ensure last cycle is collected
delay(1000L * duration)
- meterProvider.close()
+ val cpuStats = host.getCpuStats()
+ val sysStats = host.getSystemStats()
+ val guestSysStats = host.getSystemStats(server)
assertAll(
- { assertEquals(1775, idleTime, "Idle time does not match") },
- { assertEquals(624, activeTime, "Active time does not match") },
- { assertEquals(900001, uptime, "Uptime does not match") },
- { assertEquals(300000, downtime, "Downtime does not match") },
- { assertEquals(900000, guestUptime, "Guest uptime does not match") },
- { assertEquals(300000, guestDowntime, "Guest downtime does not match") },
+ { assertEquals(1775, cpuStats.idleTime, "Idle time does not match") },
+ { assertEquals(624, cpuStats.activeTime, "Active time does not match") },
+ { assertEquals(900001, sysStats.uptime.toMillis(), "Uptime does not match") },
+ { assertEquals(300000, sysStats.downtime.toMillis(), "Downtime does not match") },
+ { assertEquals(900001, guestSysStats.uptime.toMillis(), "Guest uptime does not match") },
+ { assertEquals(300000, guestSysStats.downtime.toMillis(), "Guest downtime does not match") },
)
}
@@ -332,6 +261,8 @@ internal class SimHostTest {
override val state: ServerState = ServerState.TERMINATED
+ override val launchedAt: Instant? = null
+
override suspend fun start() {}
override suspend fun stop() {}
diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt
index 4b0b343f..21cfdad2 100644
--- a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt
+++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/ComputeServiceHelper.kt
@@ -26,6 +26,7 @@ import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
import kotlinx.coroutines.yield
+import org.opendc.compute.api.Server
import org.opendc.compute.service.ComputeService
import org.opendc.compute.service.scheduler.ComputeScheduler
import org.opendc.compute.simulator.SimHost
@@ -81,9 +82,19 @@ public class ComputeServiceHelper(
}
/**
- * Converge a simulation of the [ComputeService] by replaying the workload trace given by [trace].
+ * Run a simulation of the [ComputeService] by replaying the workload trace given by [trace].
+ *
+ * @param trace The trace to simulate.
+ * @param seed The seed for the simulation.
+ * @param servers A list to which the created servers is added.
+ * @param submitImmediately A flag to indicate that the servers are scheduled immediately (so not at their start time).
*/
- public suspend fun run(trace: List<VirtualMachine>, seed: Long, submitImmediately: Boolean = false) {
+ public suspend fun run(
+ trace: List<VirtualMachine>,
+ seed: Long,
+ servers: MutableList<Server>? = null,
+ submitImmediately: Boolean = false
+ ) {
val random = Random(seed)
val injector = failureModel?.createInjector(context, clock, service, random)
val client = service.newClient()
@@ -129,12 +140,14 @@ public class ComputeServiceHelper(
meta = mapOf("workload" to workload)
)
+ servers?.add(server)
+
// Wait for the server reach its end time
val endTime = entry.stopTime.toEpochMilli()
delay(endTime + workloadOffset - clock.millis() + 5 * 60 * 1000)
- // Delete the server after reaching the end-time of the virtual machine
- server.delete()
+ // Stop the server after reaching the end-time of the virtual machine
+ server.stop()
}
}
}
diff --git a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/export/parquet/ParquetComputeMetricExporter.kt b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/export/parquet/ParquetComputeMonitor.kt
index a46885f4..6c515118 100644
--- a/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/export/parquet/ParquetComputeMetricExporter.kt
+++ b/opendc-compute/opendc-compute-workload/src/main/kotlin/org/opendc/compute/workload/export/parquet/ParquetComputeMonitor.kt
@@ -22,8 +22,6 @@
package org.opendc.compute.workload.export.parquet
-import io.opentelemetry.sdk.common.CompletableResultCode
-import org.opendc.telemetry.compute.ComputeMetricExporter
import org.opendc.telemetry.compute.ComputeMonitor
import org.opendc.telemetry.compute.table.HostTableReader
import org.opendc.telemetry.compute.table.ServerTableReader
@@ -33,7 +31,7 @@ import java.io.File
/**
* A [ComputeMonitor] that logs the events to a Parquet file.
*/
-public class ParquetComputeMetricExporter(base: File, partition: String, bufferSize: Int) : ComputeMetricExporter() {
+public class ParquetComputeMonitor(base: File, partition: String, bufferSize: Int) : ComputeMonitor, AutoCloseable {
private val serverWriter = ParquetServerDataWriter(
File(base, "server/$partition/data.parquet").also { it.parentFile.mkdirs() },
bufferSize
@@ -61,11 +59,9 @@ public class ParquetComputeMetricExporter(base: File, partition: String, bufferS
serviceWriter.write(reader)
}
- override fun shutdown(): CompletableResultCode {
+ override fun close() {
hostWriter.close()
serviceWriter.close()
serverWriter.close()
-
- return CompletableResultCode.ofSuccess()
}
}