diff options
| author | Dante Niewenhuis <d.niewenhuis@hotmail.com> | 2024-09-12 15:32:47 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-12 15:32:47 +0200 |
| commit | 5047e4a25a0814f96852882f02c4017e1d5f81e7 (patch) | |
| tree | 348f064fd8e03a2a64fc5b30406e992586b4aac0 /opendc-compute/opendc-compute-service/src/main/java/org/opendc | |
| parent | ad8051faa1f0a6e7f78384e9e0607e847848c033 (diff) | |
Added max number of failures (#254)
* Added a max failure for tasks. If tasks fail more times, they get cancelled
* Added maxNumFailures to the frontend
* Updated tests
Diffstat (limited to 'opendc-compute/opendc-compute-service/src/main/java/org/opendc')
2 files changed, 40 insertions, 10 deletions
diff --git a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java index a64f6a4e..ad01ee57 100644 --- a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java +++ b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java @@ -82,6 +82,8 @@ public final class ComputeService implements AutoCloseable { */ private final SplittableRandom random = new SplittableRandom(0); + private final int maxNumFailures; + /** * A flag to indicate that the service is closed. */ @@ -162,6 +164,7 @@ public final class ComputeService implements AutoCloseable { serviceTask.setState(newState); + // TODO: move the removal of tasks when max Failures are reached to here if (newState == TaskState.TERMINATED || newState == TaskState.DELETED || newState == TaskState.ERROR) { LOGGER.info("task {} {} {} finished", task.getUid(), task.getName(), task.getFlavor()); @@ -196,10 +199,11 @@ public final class ComputeService implements AutoCloseable { /** * Construct a {@link ComputeService} instance. */ - ComputeService(Dispatcher dispatcher, ComputeScheduler scheduler, Duration quantum) { + ComputeService(Dispatcher dispatcher, ComputeScheduler scheduler, Duration quantum, int maxNumFailures) { this.clock = dispatcher.getTimeSource(); this.scheduler = scheduler; this.pacer = new Pacer(dispatcher, quantum.toMillis(), (time) -> doSchedule()); + this.maxNumFailures = maxNumFailures; } /** @@ -365,8 +369,16 @@ public final class ComputeService implements AutoCloseable { } final ServiceTask task = request.task; - // Check if all dependencies are met - // otherwise continue + + // Remove task from scheduling if it has failed too many times + if (task.getNumFailures() > maxNumFailures) { + LOGGER.warn("Failed to spawn {}: Task has failed more than the allowed {} times", task, maxNumFailures); + + taskQueue.poll(); + tasksPending--; + task.setState(TaskState.TERMINATED); + continue; + } final ServiceFlavor flavor = task.getFlavor(); final HostView hv = scheduler.select(request.task); @@ -425,6 +437,7 @@ public final class ComputeService implements AutoCloseable { private final Dispatcher dispatcher; private final ComputeScheduler computeScheduler; private Duration quantum = Duration.ofMinutes(5); + private int maxNumFailures = 10; Builder(Dispatcher dispatcher, ComputeScheduler computeScheduler) { this.dispatcher = dispatcher; @@ -439,11 +452,16 @@ public final class ComputeService implements AutoCloseable { return this; } + public Builder withMaxNumFailures(int maxNumFailures) { + this.maxNumFailures = maxNumFailures; + return this; + } + /** * Build a {@link ComputeService}. */ public ComputeService build() { - return new ComputeService(dispatcher, computeScheduler, quantum); + return new ComputeService(dispatcher, computeScheduler, quantum, maxNumFailures); } } diff --git a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java index e981921a..f0e2a82e 100644 --- a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java +++ b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java @@ -60,6 +60,8 @@ public final class ServiceTask implements Task { Host host = null; private ComputeService.SchedulingRequest request = null; + private int numFailures = 0; + ServiceTask( ComputeService service, UUID uid, @@ -232,14 +234,19 @@ public final class ServiceTask implements Task { return "Task[uid=" + uid + ",name=" + name + ",state=" + state + "]"; } - void setState(TaskState state) { - if (this.state != state) { - for (TaskWatcher watcher : watchers) { - watcher.onStateChanged(this, state); - } + void setState(TaskState newState) { + if (this.state == newState) { + return; + } + + for (TaskWatcher watcher : watchers) { + watcher.onStateChanged(this, newState); + } + if (newState == TaskState.ERROR) { + this.numFailures++; } - this.state = state; + this.state = newState; } /** @@ -252,4 +259,9 @@ public final class ServiceTask implements Task { request.isCancelled = true; } } + + @Override + public int getNumFailures() { + return this.numFailures; + } } |
