From 5b8dfc78496452bd23fab59e3ead84a8941da779 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Thu, 6 Oct 2022 22:42:31 +0200 Subject: feat(web/server): Add support for accounting simulation time This change updates the Quarkus-based web server to add support for tracking and limiting the simulation minutes used by the user in order to prevent misuse of shared resources. --- .../kotlin/org/opendc/web/runner/JobManager.kt | 10 +++++--- .../kotlin/org/opendc/web/runner/OpenDCRunner.kt | 27 ++++++++++++++++++---- .../opendc/web/runner/internal/JobManagerImpl.kt | 15 ++++++------ 3 files changed, 37 insertions(+), 15 deletions(-) (limited to 'opendc-web/opendc-web-runner/src') diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt index 50aa03d8..d6c06889 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/JobManager.kt @@ -42,18 +42,22 @@ public interface JobManager { /** * Update the heartbeat of the specified job. + * + * @param id The identifier of the job. + * @param runtime The total runtime of the job. + * @return `true` if the job can continue, `false` if the job has been cancelled. */ - public fun heartbeat(id: Long) + public fun heartbeat(id: Long, runtime: Int): Boolean /** * Mark the job as failed. */ - public fun fail(id: Long) + public fun fail(id: Long, runtime: Int) /** * Persist the specified results for the specified job. */ - public fun finish(id: Long, results: Map) + public fun finish(id: Long, runtime: Int, results: Map) public companion object { /** diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt index 226bad47..d4996198 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/OpenDCRunner.kt @@ -48,6 +48,8 @@ import org.opendc.web.proto.runner.Topology import org.opendc.web.runner.internal.WebComputeMonitor import java.io.File import java.time.Duration +import java.time.Instant +import java.time.temporal.ChronoUnit import java.util.Random import java.util.UUID import java.util.concurrent.Executors @@ -144,9 +146,15 @@ public class OpenDCRunner( override fun compute() { val id = job.id val scenario = job.scenario + val startTime = Instant.now() + val currentThread = Thread.currentThread() val heartbeat = scheduler.scheduleWithFixedDelay( - { manager.heartbeat(id) }, + { + if (!manager.heartbeat(id, startTime.secondsSince())) { + currentThread.interrupt() + } + }, 0, heartbeatInterval.toMillis(), TimeUnit.MILLISECONDS @@ -163,12 +171,14 @@ public class OpenDCRunner( } val results = invokeAll(jobs).map { it.rawResult } - logger.info { "Finished simulation for job $id" } - heartbeat.cancel(true) + val duration = startTime.secondsSince() + logger.info { "Finished simulation for job $id (in $duration seconds)" } + manager.finish( id, + duration, mapOf( "total_requested_burst" to results.map { it.totalActiveTime + it.totalIdleTime }, "total_granted_burst" to results.map { it.totalActiveTime }, @@ -190,19 +200,26 @@ public class OpenDCRunner( } catch (e: Exception) { // Check whether the job failed due to exceeding its time budget if (Thread.interrupted()) { - logger.info { "Simulation job $id exceeded time limit" } + logger.info { "Simulation job $id exceeded time limit (${startTime.secondsSince()} seconds)" } } else { logger.info(e) { "Simulation job $id failed" } } try { heartbeat.cancel(true) - manager.fail(id) + manager.fail(id, startTime.secondsSince()) } catch (e: Throwable) { logger.error(e) { "Failed to update job" } } } } + + /** + * Calculate the seconds since the specified instant. + */ + private fun Instant.secondsSince(): Int { + return ChronoUnit.SECONDS.between(this, Instant.now()).toInt() + } } /** diff --git a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt index 39a6851c..5b1b7132 100644 --- a/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt +++ b/opendc-web/opendc-web-runner/src/main/kotlin/org/opendc/web/runner/internal/JobManagerImpl.kt @@ -37,22 +37,23 @@ internal class JobManagerImpl(private val client: OpenDCRunnerClient) : JobManag override fun claim(id: Long): Boolean { return try { - client.jobs.update(id, Job.Update(JobState.CLAIMED)) + client.jobs.update(id, Job.Update(JobState.CLAIMED, 0)) true } catch (e: IllegalStateException) { false } } - override fun heartbeat(id: Long) { - client.jobs.update(id, Job.Update(JobState.RUNNING)) + override fun heartbeat(id: Long, runtime: Int): Boolean { + val res = client.jobs.update(id, Job.Update(JobState.RUNNING, runtime)) + return res?.state != JobState.FAILED } - override fun fail(id: Long) { - client.jobs.update(id, Job.Update(JobState.FAILED)) + override fun fail(id: Long, runtime: Int) { + client.jobs.update(id, Job.Update(JobState.FAILED, runtime)) } - override fun finish(id: Long, results: Map) { - client.jobs.update(id, Job.Update(JobState.FINISHED, results)) + override fun finish(id: Long, runtime: Int, results: Map) { + client.jobs.update(id, Job.Update(JobState.FINISHED, runtime)) } } -- cgit v1.2.3