summaryrefslogtreecommitdiff
path: root/opendc-compute/opendc-compute-simulator/src/main
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2022-05-06 17:47:44 +0200
committerFabian Mastenbroek <mail.fabianm@gmail.com>2022-05-06 17:47:44 +0200
commita9657e4fa3b15e2c1c11884b5a250b0861bcc21d (patch)
tree6b25de3d7a1def150ab4977a45723c52167e7211 /opendc-compute/opendc-compute-simulator/src/main
parent48da4538707cd074969287724ca6f02823f2ff5a (diff)
parent8e3905273c7a3f2df4df5d5840e4088d99b0dffb (diff)
merge: Expose metrics directly to user (#80)
This pull request adds the ability to access the metrics of resources modeled by the OpenDC Compute, Workflow, FaaS, and TensorFlow services directly from their corresponding interfaces. Previously, users would have to interact with OpenTelemetry to obtain these values, which is complex and provides significant overhead. With this pull request, users can access the metrics of all cloud resources modeled by OpenDC via methods such as `getSchedulerStats()`, etc. ** Breaking Changes ** - `ComputeService.hostCount` removed in favour of `ComputeService.hosts.size`
Diffstat (limited to 'opendc-compute/opendc-compute-simulator/src/main')
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt90
-rw-r--r--opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt47
2 files changed, 123 insertions, 14 deletions
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
index 4eb6392e..323ae4fe 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/SimHost.kt
@@ -29,11 +29,14 @@ import io.opentelemetry.api.metrics.MeterProvider
import io.opentelemetry.api.metrics.ObservableDoubleMeasurement
import io.opentelemetry.api.metrics.ObservableLongMeasurement
import kotlinx.coroutines.*
-import mu.KotlinLogging
import org.opendc.compute.api.Flavor
import org.opendc.compute.api.Server
import org.opendc.compute.api.ServerState
import org.opendc.compute.service.driver.*
+import org.opendc.compute.service.driver.telemetry.GuestCpuStats
+import org.opendc.compute.service.driver.telemetry.GuestSystemStats
+import org.opendc.compute.service.driver.telemetry.HostCpuStats
+import org.opendc.compute.service.driver.telemetry.HostSystemStats
import org.opendc.compute.simulator.internal.Guest
import org.opendc.compute.simulator.internal.GuestListener
import org.opendc.simulator.compute.*
@@ -49,6 +52,8 @@ import org.opendc.simulator.compute.power.PowerDriver
import org.opendc.simulator.compute.power.SimplePowerDriver
import org.opendc.simulator.compute.workload.SimWorkload
import org.opendc.simulator.flow.FlowEngine
+import java.time.Duration
+import java.time.Instant
import java.util.*
import kotlin.coroutines.CoroutineContext
@@ -81,11 +86,6 @@ public class SimHost(
private val clock = engine.clock
/**
- * The logger instance of this server.
- */
- private val logger = KotlinLogging.logger {}
-
- /**
* The [Meter] to track metrics of the simulated host.
*/
private val meter = meterProvider.get("org.opendc.compute.simulator")
@@ -112,6 +112,9 @@ public class SimHost(
private val guests = HashMap<Server, Guest>()
private val _guests = mutableListOf<Guest>()
+ override val instances: Set<Server>
+ get() = guests.keys
+
override val state: HostState
get() = _state
private var _state: HostState = HostState.DOWN
@@ -249,6 +252,68 @@ public class SimHost(
machine.cancel()
}
+ override fun getSystemStats(): HostSystemStats {
+ updateUptime()
+
+ var terminated = 0
+ var running = 0
+ var error = 0
+ var invalid = 0
+
+ val guests = _guests.listIterator()
+ for (guest in guests) {
+ when (guest.state) {
+ ServerState.TERMINATED -> terminated++
+ ServerState.RUNNING -> running++
+ ServerState.ERROR -> error++
+ ServerState.DELETED -> {
+ // Remove guests that have been deleted
+ this.guests.remove(guest.server)
+ guests.remove()
+ }
+ else -> invalid++
+ }
+ }
+
+ return HostSystemStats(
+ Duration.ofMillis(_uptime),
+ Duration.ofMillis(_downtime),
+ Instant.ofEpochMilli(_bootTime),
+ machine.powerUsage,
+ machine.energyUsage,
+ terminated,
+ running,
+ error,
+ invalid
+ )
+ }
+
+ override fun getSystemStats(server: Server): GuestSystemStats {
+ val guest = requireNotNull(guests[server]) { "Unknown server ${server.uid} at host $uid" }
+ return guest.getSystemStats()
+ }
+
+ override fun getCpuStats(): HostCpuStats {
+ val counters = hypervisor.counters
+ counters.flush()
+
+ return HostCpuStats(
+ counters.cpuActiveTime / 1000L,
+ counters.cpuIdleTime / 1000L,
+ counters.cpuStealTime / 1000L,
+ counters.cpuLostTime / 1000L,
+ hypervisor.cpuCapacity,
+ hypervisor.cpuDemand,
+ hypervisor.cpuUsage,
+ hypervisor.cpuUsage / _cpuLimit
+ )
+ }
+
+ override fun getCpuStats(server: Server): GuestCpuStats {
+ val guest = requireNotNull(guests[server]) { "Unknown server ${server.uid} at host $uid" }
+ return guest.getCpuStats()
+ }
+
override fun hashCode(): Int = uid.hashCode()
override fun equals(other: Any?): Boolean {
@@ -417,13 +482,12 @@ public class SimHost(
* Helper function to track the CPU time of a machine.
*/
private fun collectCpuTime(result: ObservableLongMeasurement) {
- val counters = hypervisor.counters
- counters.flush()
+ val stats = getCpuStats()
- result.record(counters.cpuActiveTime / 1000L, _activeState)
- result.record(counters.cpuIdleTime / 1000L, _idleState)
- result.record(counters.cpuStealTime / 1000L, _stealState)
- result.record(counters.cpuLostTime / 1000L, _lostState)
+ result.record(stats.activeTime, _activeState)
+ result.record(stats.idleTime, _idleState)
+ result.record(stats.stealTime, _stealState)
+ result.record(stats.lostTime, _lostState)
val guests = _guests
for (i in guests.indices) {
@@ -450,7 +514,7 @@ public class SimHost(
val guests = _guests
for (i in guests.indices) {
- guests[i].updateUptime(duration)
+ guests[i].updateUptime()
}
}
diff --git a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
index bb378ee3..0d4c550d 100644
--- a/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
+++ b/opendc-compute/opendc-compute-simulator/src/main/kotlin/org/opendc/compute/simulator/internal/Guest.kt
@@ -32,6 +32,8 @@ import kotlinx.coroutines.*
import mu.KotlinLogging
import org.opendc.compute.api.Server
import org.opendc.compute.api.ServerState
+import org.opendc.compute.service.driver.telemetry.GuestCpuStats
+import org.opendc.compute.service.driver.telemetry.GuestSystemStats
import org.opendc.compute.simulator.SimHost
import org.opendc.compute.simulator.SimWorkloadMapper
import org.opendc.simulator.compute.kernel.SimHypervisor
@@ -39,6 +41,8 @@ import org.opendc.simulator.compute.kernel.SimVirtualMachine
import org.opendc.simulator.compute.runWorkload
import org.opendc.simulator.compute.workload.SimWorkload
import java.time.Clock
+import java.time.Duration
+import java.time.Instant
import kotlin.coroutines.CoroutineContext
/**
@@ -146,6 +150,37 @@ internal class Guest(
}
/**
+ * Obtain the system statistics of this guest.
+ */
+ fun getSystemStats(): GuestSystemStats {
+ updateUptime()
+
+ return GuestSystemStats(
+ Duration.ofMillis(_uptime),
+ Duration.ofMillis(_downtime),
+ Instant.ofEpochMilli(_bootTime)
+ )
+ }
+
+ /**
+ * Obtain the CPU statistics of this guest.
+ */
+ fun getCpuStats(): GuestCpuStats {
+ val counters = machine.counters
+ counters.flush()
+
+ return GuestCpuStats(
+ counters.cpuActiveTime / 1000L,
+ counters.cpuIdleTime / 1000L,
+ counters.cpuStealTime / 1000L,
+ counters.cpuLostTime / 1000L,
+ machine.cpuCapacity,
+ machine.cpuUsage,
+ machine.cpuUsage / _cpuLimit
+ )
+ }
+
+ /**
* The [Job] representing the current active virtual machine instance or `null` if no virtual machine is active.
*/
private var job: Job? = null
@@ -209,6 +244,8 @@ internal class Guest(
* This method is invoked when the guest stopped.
*/
private fun onStop(target: ServerState) {
+ updateUptime()
+
state = target
listener.onStop(this)
}
@@ -224,10 +261,16 @@ internal class Guest(
.put(STATE_KEY, "down")
.build()
+ private var _lastReport = clock.millis()
+
/**
* Helper function to track the uptime and downtime of the guest.
*/
- fun updateUptime(duration: Long) {
+ fun updateUptime() {
+ val now = clock.millis()
+ val duration = now - _lastReport
+ _lastReport = now
+
if (state == ServerState.RUNNING) {
_uptime += duration
} else if (state == ServerState.ERROR) {
@@ -239,6 +282,8 @@ internal class Guest(
* Helper function to track the uptime of the guest.
*/
fun collectUptime(result: ObservableLongMeasurement) {
+ updateUptime()
+
result.record(_uptime, _upState)
result.record(_downtime, _downState)
}