summaryrefslogtreecommitdiff
path: root/opendc-web/opendc-web-runner/src/main
diff options
context:
space:
mode:
Diffstat (limited to 'opendc-web/opendc-web-runner/src/main')
-rw-r--r--opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt3
-rw-r--r--opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt44
-rw-r--r--opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt11
-rw-r--r--opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/ReportCollector.kt137
-rw-r--r--opendc-web/opendc-web-runner/src/main/resources/log4j2.xml2
5 files changed, 185 insertions, 12 deletions
diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt
index a517f3b4..a2378931 100644
--- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt
+++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt
@@ -24,6 +24,7 @@ package org.opendc.web.runner
import org.opendc.web.client.runner.OpenDCRunnerClient
import org.opendc.web.proto.runner.Job
+import org.opendc.web.proto.runner.Report
import org.opendc.web.runner.internal.JobManagerImpl
/**
@@ -58,6 +59,7 @@ public interface JobManager {
public fun fail(
id: Long,
runtime: Int,
+ report: Report? = null,
)
/**
@@ -67,6 +69,7 @@ public interface JobManager {
id: Long,
runtime: Int,
results: Map<String, Any>,
+ report: Report? = null,
)
public companion object {
diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt
index 33f8c94d..8d830acf 100644
--- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt
+++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt
@@ -42,8 +42,10 @@ import org.opendc.simulator.compute.models.MemoryUnit
import org.opendc.simulator.compute.power.PowerModels
import org.opendc.simulator.kotlin.runSimulation
import org.opendc.web.proto.runner.Job
+import org.opendc.web.proto.runner.Report
import org.opendc.web.proto.runner.Scenario
import org.opendc.web.proto.runner.Topology
+import org.opendc.web.runner.internal.ReportCollector
import org.opendc.web.runner.internal.WebComputeMonitor
import java.io.File
import java.time.Duration
@@ -159,6 +161,9 @@ public class OpenDCRunner(
TimeUnit.MILLISECONDS,
)
+ val reportCollector = ReportCollector()
+ reportCollector.attach()
+
try {
val topology = convertTopology(scenario.topology)
val jobs =
@@ -176,6 +181,16 @@ public class OpenDCRunner(
val duration = startTime.secondsSince()
logger.info { "Finished simulation for job $id (in $duration seconds)" }
+ reportCollector.detach()
+
+ // Calculate wait time if startedAt is available
+ val waitTime =
+ job.startedAt?.let { started ->
+ ChronoUnit.SECONDS.between(job.createdAt, started).toInt()
+ }
+
+ val report = reportCollector.collect(duration, waitTime, job.createdAt, job.startedAt)
+
manager.finish(
id,
duration,
@@ -196,18 +211,33 @@ public class OpenDCRunner(
"total_vms_finished" to results.map { it.totalVmsFinished },
"total_vms_failed" to results.map { it.totalVmsFailed },
),
+ report,
)
} catch (e: Exception) {
- // Check whether the job failed due to exceeding its time budget
- if (Thread.interrupted()) {
- logger.info { "Simulation job $id exceeded time limit (${startTime.secondsSince()} seconds)" }
- } else {
- logger.info(e) { "Simulation job $id failed" }
- }
+ reportCollector.detach()
+
+ val duration = startTime.secondsSince()
+
+ // Calculate wait time if startedAt is available
+ val waitTime =
+ job.startedAt?.let { started ->
+ ChronoUnit.SECONDS.between(job.createdAt, started).toInt()
+ }
+
+ val errorInfo =
+ if (Thread.interrupted()) {
+ logger.info { "Simulation job $id exceeded time limit ($duration seconds)" }
+ Report.ErrorInfo("Simulation exceeded time limit", "TIMEOUT", null)
+ } else {
+ logger.info(e) { "Simulation job $id failed" }
+ Report.ErrorInfo(e.message ?: "Unknown error", e.javaClass.simpleName, e.stackTraceToString())
+ }
+
+ val report = reportCollector.collect(duration, waitTime, job.createdAt, job.startedAt, errorInfo)
try {
heartbeat.cancel(true)
- manager.fail(id, startTime.secondsSince())
+ manager.fail(id, duration, report)
} catch (e: Throwable) {
logger.error(e) { "Failed to update job" }
}
diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt
index a0955978..2b3b302b 100644
--- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt
+++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt
@@ -25,6 +25,7 @@ package org.opendc.web.runner.internal
import org.opendc.web.client.runner.OpenDCRunnerClient
import org.opendc.web.proto.JobState
import org.opendc.web.proto.runner.Job
+import org.opendc.web.proto.runner.Report
import org.opendc.web.runner.JobManager
/**
@@ -37,7 +38,7 @@ internal class JobManagerImpl(private val client: OpenDCRunnerClient) : JobManag
override fun claim(id: Long): Boolean {
return try {
- client.jobs.update(id, Job.Update(JobState.CLAIMED, 0, null))
+ client.jobs.update(id, Job.Update(JobState.CLAIMED, 0, null, null))
true
} catch (e: IllegalStateException) {
false
@@ -48,22 +49,24 @@ internal class JobManagerImpl(private val client: OpenDCRunnerClient) : JobManag
id: Long,
runtime: Int,
): Boolean {
- val res = client.jobs.update(id, Job.Update(JobState.RUNNING, runtime, null))
+ val res = client.jobs.update(id, Job.Update(JobState.RUNNING, runtime, null, null))
return res?.state != JobState.FAILED
}
override fun fail(
id: Long,
runtime: Int,
+ report: Report?,
) {
- client.jobs.update(id, Job.Update(JobState.FAILED, runtime, null))
+ client.jobs.update(id, Job.Update(JobState.FAILED, runtime, null, report))
}
override fun finish(
id: Long,
runtime: Int,
results: Map<String, Any>,
+ report: Report?,
) {
- client.jobs.update(id, Job.Update(JobState.FINISHED, runtime, results))
+ client.jobs.update(id, Job.Update(JobState.FINISHED, runtime, results, report))
}
}
diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/ReportCollector.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/ReportCollector.kt
new file mode 100644
index 00000000..3462df86
--- /dev/null
+++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/ReportCollector.kt
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.web.runner.internal
+
+import org.apache.logging.log4j.Level
+import org.apache.logging.log4j.LogManager
+import org.apache.logging.log4j.core.LogEvent
+import org.apache.logging.log4j.core.Logger
+import org.apache.logging.log4j.core.appender.AbstractAppender
+import org.apache.logging.log4j.core.config.Property
+import org.opendc.web.proto.runner.Report
+import java.time.Instant
+
+/**
+ * A log collector that captures WARN and ERROR level messages during simulation execution.
+ */
+internal class ReportCollector : AbstractAppender(
+ "ReportCollector",
+ null,
+ null,
+ true,
+ Property.EMPTY_ARRAY,
+) {
+ private val logs = mutableListOf<LogEntry>()
+
+ init {
+ start()
+ }
+
+ override fun append(event: LogEvent) {
+ if (event.level == Level.WARN || event.level == Level.ERROR) {
+ logs.add(
+ LogEntry(
+ timestamp = Instant.ofEpochMilli(event.instant.epochMillisecond),
+ level = event.level.name(),
+ logger = event.loggerName,
+ message = event.message.formattedMessage,
+ ),
+ )
+ }
+ }
+
+ /**
+ * Attach this collector to the root logger.
+ */
+ fun attach() {
+ val rootLogger = LogManager.getRootLogger() as Logger
+ rootLogger.addAppender(this)
+ }
+
+ /**
+ * Detach this collector from the root logger.
+ */
+ fun detach() {
+ val rootLogger = LogManager.getRootLogger() as Logger
+ rootLogger.removeAppender(this)
+ }
+
+ /**
+ * Collect all captured logs and return them as a map structure.
+ *
+ * @param runtimeSeconds The runtime of the job in seconds (optional).
+ * @param waitTimeSeconds The time the job spent waiting in the queue in seconds (optional).
+ * @param createdAt The time the job was created (optional).
+ * @param startedAt The time the job started running (optional).
+ */
+ fun collect(
+ runtimeSeconds: Int? = null,
+ waitTimeSeconds: Int? = null,
+ createdAt: Instant? = null,
+ startedAt: Instant? = null,
+ error: Report.ErrorInfo? = null,
+ ): Report {
+ val logEntries =
+ logs.map {
+ Report.LogEntry(
+ it.timestamp.toString(),
+ it.level,
+ it.logger,
+ it.message,
+ )
+ }
+
+ val summary =
+ Report.Summary(
+ logs.count { it.level == "WARN" },
+ logs.count { it.level == "ERROR" },
+ runtimeSeconds,
+ waitTimeSeconds,
+ )
+
+ return Report(
+ createdAt?.toString(),
+ startedAt?.toString(),
+ logEntries,
+ summary,
+ error,
+ )
+ }
+
+ /**
+ * Clear all collected logs.
+ */
+ fun clear() {
+ logs.clear()
+ }
+
+ /**
+ * Represents a single log entry.
+ */
+ private data class LogEntry(
+ val timestamp: Instant,
+ val level: String,
+ val logger: String,
+ val message: String,
+ )
+}
diff --git a/opendc-web/opendc-web-runner/src/main/resources/log4j2.xml b/opendc-web/opendc-web-runner/src/main/resources/log4j2.xml
index ad99cc00..72c39b5c 100644
--- a/opendc-web/opendc-web-runner/src/main/resources/log4j2.xml
+++ b/opendc-web/opendc-web-runner/src/main/resources/log4j2.xml
@@ -32,7 +32,7 @@
<Sentry name="Sentry" />
</Appenders>
<Loggers>
- <Logger name="org.opendc" level="warn" additivity="false">
+ <Logger name="org.opendc" level="warn" additivity="true">
<AppenderRef ref="Console"/>
<AppenderRef ref="Sentry"/>
</Logger>