summaryrefslogtreecommitdiff
path: root/opendc-experiments
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2021-09-14 15:38:38 +0200
committerFabian Mastenbroek <mail.fabianm@gmail.com>2021-09-17 16:51:07 +0200
commit8d899e29dbd757f6df320212d6e0d77ce8216ab9 (patch)
tree0dc2cd6d7c3fee35c32552848b46532265594cea /opendc-experiments
parent3ca64e0110adab65526a0ccfd5b252e9f047ab10 (diff)
refactor(telemetry): Standardize compute scheduler metrics
This change updates the OpenDC compute service implementation with multiple meters that follow the OpenTelemetry conventions.
Diffstat (limited to 'opendc-experiments')
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt11
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/export/parquet/ParquetServiceDataWriter.kt28
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt52
3 files changed, 48 insertions, 43 deletions
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
index f7f9336e..3ec424f1 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
@@ -153,11 +153,12 @@ abstract class Portfolio(name: String) : Experiment(name) {
val monitorResults = collectServiceMetrics(clock.millis(), simulator.producers[0])
logger.debug {
- "Finish " +
- "SUBMIT=${monitorResults.instanceCount} " +
- "FAIL=${monitorResults.failedInstanceCount} " +
- "QUEUE=${monitorResults.queuedInstanceCount} " +
- "RUNNING=${monitorResults.activeHostCount}"
+ "Scheduler " +
+ "Success=${monitorResults.attemptsSuccess} " +
+ "Failure=${monitorResults.attemptsFailure} " +
+ "Error=${monitorResults.attemptsError} " +
+ "Pending=${monitorResults.serversPending} " +
+ "Active=${monitorResults.serversActive}"
}
}
}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/export/parquet/ParquetServiceDataWriter.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/export/parquet/ParquetServiceDataWriter.kt
index e1428fe7..29b48878 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/export/parquet/ParquetServiceDataWriter.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/export/parquet/ParquetServiceDataWriter.kt
@@ -36,13 +36,13 @@ public class ParquetServiceDataWriter(path: File, bufferSize: Int) :
override fun convert(builder: GenericRecordBuilder, data: ServiceData) {
builder["timestamp"] = data.timestamp
- builder["host_total_count"] = data.hostCount
- builder["host_available_count"] = data.activeHostCount
- builder["instance_total_count"] = data.instanceCount
- builder["instance_active_count"] = data.runningInstanceCount
- builder["instance_inactive_count"] = data.finishedInstanceCount
- builder["instance_waiting_count"] = data.queuedInstanceCount
- builder["instance_failed_count"] = data.failedInstanceCount
+ builder["hosts_up"] = data.hostsUp
+ builder["hosts_down"] = data.hostsDown
+ builder["servers_pending"] = data.serversPending
+ builder["servers_active"] = data.serversActive
+ builder["attempts_success"] = data.attemptsSuccess
+ builder["attempts_failure"] = data.attemptsFailure
+ builder["attempts_error"] = data.attemptsError
}
override fun toString(): String = "service-writer"
@@ -53,13 +53,13 @@ public class ParquetServiceDataWriter(path: File, bufferSize: Int) :
.namespace("org.opendc.telemetry.compute")
.fields()
.requiredLong("timestamp")
- .requiredInt("host_total_count")
- .requiredInt("host_available_count")
- .requiredInt("instance_total_count")
- .requiredInt("instance_active_count")
- .requiredInt("instance_inactive_count")
- .requiredInt("instance_waiting_count")
- .requiredInt("instance_failed_count")
+ .requiredInt("hosts_up")
+ .requiredInt("hosts_down")
+ .requiredInt("servers_pending")
+ .requiredInt("servers_active")
+ .requiredInt("attempts_success")
+ .requiredInt("attempts_failure")
+ .requiredInt("attempts_error")
.endRecord()
}
}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt b/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
index f4cf3e5e..81405acf 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/test/kotlin/org/opendc/experiments/capelin/CapelinIntegrationTest.kt
@@ -104,19 +104,20 @@ class CapelinIntegrationTest {
val serviceMetrics = collectServiceMetrics(clock.millis(), simulator.producers[0])
println(
- "Finish " +
- "SUBMIT=${serviceMetrics.instanceCount} " +
- "FAIL=${serviceMetrics.failedInstanceCount} " +
- "QUEUE=${serviceMetrics.queuedInstanceCount} " +
- "RUNNING=${serviceMetrics.runningInstanceCount}"
+ "Scheduler " +
+ "Success=${serviceMetrics.attemptsSuccess} " +
+ "Failure=${serviceMetrics.attemptsFailure} " +
+ "Error=${serviceMetrics.attemptsError} " +
+ "Pending=${serviceMetrics.serversPending} " +
+ "Active=${serviceMetrics.serversActive}"
)
// Note that these values have been verified beforehand
assertAll(
- { assertEquals(50, serviceMetrics.instanceCount, "The trace contains 50 VMs") },
- { assertEquals(0, serviceMetrics.runningInstanceCount, "All VMs should finish after a run") },
- { assertEquals(0, serviceMetrics.failedInstanceCount, "No VM should not be unscheduled") },
- { assertEquals(0, serviceMetrics.queuedInstanceCount, "No VM should not be in the queue") },
+ { assertEquals(50, serviceMetrics.attemptsSuccess, "The scheduler should schedule 50 VMs") },
+ { assertEquals(0, serviceMetrics.serversActive, "All VMs should finish after a run") },
+ { assertEquals(0, serviceMetrics.attemptsFailure, "No VM should be unscheduled") },
+ { assertEquals(0, serviceMetrics.serversPending, "No VM should not be in the queue") },
{ assertEquals(220346412191, monitor.totalWork) { "Incorrect requested burst" } },
{ assertEquals(206667852689, monitor.totalGrantedWork) { "Incorrect granted burst" } },
{ assertEquals(1151612221, monitor.totalOvercommittedWork) { "Incorrect overcommitted burst" } },
@@ -152,11 +153,12 @@ class CapelinIntegrationTest {
val serviceMetrics = collectServiceMetrics(clock.millis(), simulator.producers[0])
println(
- "Finish " +
- "SUBMIT=${serviceMetrics.instanceCount} " +
- "FAIL=${serviceMetrics.failedInstanceCount} " +
- "QUEUE=${serviceMetrics.queuedInstanceCount} " +
- "RUNNING=${serviceMetrics.runningInstanceCount}"
+ "Scheduler " +
+ "Success=${serviceMetrics.attemptsSuccess} " +
+ "Failure=${serviceMetrics.attemptsFailure} " +
+ "Error=${serviceMetrics.attemptsError} " +
+ "Pending=${serviceMetrics.serversPending} " +
+ "Active=${serviceMetrics.serversActive}"
)
// Note that these values have been verified beforehand
@@ -202,11 +204,12 @@ class CapelinIntegrationTest {
val serviceMetrics = collectServiceMetrics(clock.millis(), simulator.producers[0])
println(
- "Finish " +
- "SUBMIT=${serviceMetrics.instanceCount} " +
- "FAIL=${serviceMetrics.failedInstanceCount} " +
- "QUEUE=${serviceMetrics.queuedInstanceCount} " +
- "RUNNING=${serviceMetrics.runningInstanceCount}"
+ "Scheduler " +
+ "Success=${serviceMetrics.attemptsSuccess} " +
+ "Failure=${serviceMetrics.attemptsFailure} " +
+ "Error=${serviceMetrics.attemptsError} " +
+ "Pending=${serviceMetrics.serversPending} " +
+ "Active=${serviceMetrics.serversActive}"
)
// Note that these values have been verified beforehand
@@ -246,11 +249,12 @@ class CapelinIntegrationTest {
val serviceMetrics = collectServiceMetrics(clock.millis(), simulator.producers[0])
println(
- "Finish " +
- "SUBMIT=${serviceMetrics.instanceCount} " +
- "FAIL=${serviceMetrics.failedInstanceCount} " +
- "QUEUE=${serviceMetrics.queuedInstanceCount} " +
- "RUNNING=${serviceMetrics.runningInstanceCount}"
+ "Scheduler " +
+ "Success=${serviceMetrics.attemptsSuccess} " +
+ "Failure=${serviceMetrics.attemptsFailure} " +
+ "Error=${serviceMetrics.attemptsError} " +
+ "Pending=${serviceMetrics.serversPending} " +
+ "Active=${serviceMetrics.serversActive}"
)
// Note that these values have been verified beforehand