summaryrefslogtreecommitdiff
path: root/opendc-compute/opendc-compute-service/src/main/java/org/opendc
diff options
context:
space:
mode:
authorDante Niewenhuis <d.niewenhuis@hotmail.com>2024-09-12 15:32:47 +0200
committerGitHub <noreply@github.com>2024-09-12 15:32:47 +0200
commit5047e4a25a0814f96852882f02c4017e1d5f81e7 (patch)
tree348f064fd8e03a2a64fc5b30406e992586b4aac0 /opendc-compute/opendc-compute-service/src/main/java/org/opendc
parentad8051faa1f0a6e7f78384e9e0607e847848c033 (diff)
Added max number of failures (#254)
* Added a max failure for tasks. If tasks fail more times, they get cancelled * Added maxNumFailures to the frontend * Updated tests
Diffstat (limited to 'opendc-compute/opendc-compute-service/src/main/java/org/opendc')
-rw-r--r--opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java26
-rw-r--r--opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java24
2 files changed, 40 insertions, 10 deletions
diff --git a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java
index a64f6a4e..ad01ee57 100644
--- a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java
+++ b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ComputeService.java
@@ -82,6 +82,8 @@ public final class ComputeService implements AutoCloseable {
*/
private final SplittableRandom random = new SplittableRandom(0);
+ private final int maxNumFailures;
+
/**
* A flag to indicate that the service is closed.
*/
@@ -162,6 +164,7 @@ public final class ComputeService implements AutoCloseable {
serviceTask.setState(newState);
+ // TODO: move the removal of tasks when max Failures are reached to here
if (newState == TaskState.TERMINATED || newState == TaskState.DELETED || newState == TaskState.ERROR) {
LOGGER.info("task {} {} {} finished", task.getUid(), task.getName(), task.getFlavor());
@@ -196,10 +199,11 @@ public final class ComputeService implements AutoCloseable {
/**
* Construct a {@link ComputeService} instance.
*/
- ComputeService(Dispatcher dispatcher, ComputeScheduler scheduler, Duration quantum) {
+ ComputeService(Dispatcher dispatcher, ComputeScheduler scheduler, Duration quantum, int maxNumFailures) {
this.clock = dispatcher.getTimeSource();
this.scheduler = scheduler;
this.pacer = new Pacer(dispatcher, quantum.toMillis(), (time) -> doSchedule());
+ this.maxNumFailures = maxNumFailures;
}
/**
@@ -365,8 +369,16 @@ public final class ComputeService implements AutoCloseable {
}
final ServiceTask task = request.task;
- // Check if all dependencies are met
- // otherwise continue
+
+ // Remove task from scheduling if it has failed too many times
+ if (task.getNumFailures() > maxNumFailures) {
+ LOGGER.warn("Failed to spawn {}: Task has failed more than the allowed {} times", task, maxNumFailures);
+
+ taskQueue.poll();
+ tasksPending--;
+ task.setState(TaskState.TERMINATED);
+ continue;
+ }
final ServiceFlavor flavor = task.getFlavor();
final HostView hv = scheduler.select(request.task);
@@ -425,6 +437,7 @@ public final class ComputeService implements AutoCloseable {
private final Dispatcher dispatcher;
private final ComputeScheduler computeScheduler;
private Duration quantum = Duration.ofMinutes(5);
+ private int maxNumFailures = 10;
Builder(Dispatcher dispatcher, ComputeScheduler computeScheduler) {
this.dispatcher = dispatcher;
@@ -439,11 +452,16 @@ public final class ComputeService implements AutoCloseable {
return this;
}
+ public Builder withMaxNumFailures(int maxNumFailures) {
+ this.maxNumFailures = maxNumFailures;
+ return this;
+ }
+
/**
* Build a {@link ComputeService}.
*/
public ComputeService build() {
- return new ComputeService(dispatcher, computeScheduler, quantum);
+ return new ComputeService(dispatcher, computeScheduler, quantum, maxNumFailures);
}
}
diff --git a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java
index e981921a..f0e2a82e 100644
--- a/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java
+++ b/opendc-compute/opendc-compute-service/src/main/java/org/opendc/compute/service/ServiceTask.java
@@ -60,6 +60,8 @@ public final class ServiceTask implements Task {
Host host = null;
private ComputeService.SchedulingRequest request = null;
+ private int numFailures = 0;
+
ServiceTask(
ComputeService service,
UUID uid,
@@ -232,14 +234,19 @@ public final class ServiceTask implements Task {
return "Task[uid=" + uid + ",name=" + name + ",state=" + state + "]";
}
- void setState(TaskState state) {
- if (this.state != state) {
- for (TaskWatcher watcher : watchers) {
- watcher.onStateChanged(this, state);
- }
+ void setState(TaskState newState) {
+ if (this.state == newState) {
+ return;
+ }
+
+ for (TaskWatcher watcher : watchers) {
+ watcher.onStateChanged(this, newState);
+ }
+ if (newState == TaskState.ERROR) {
+ this.numFailures++;
}
- this.state = state;
+ this.state = newState;
}
/**
@@ -252,4 +259,9 @@ public final class ServiceTask implements Task {
request.isCancelled = true;
}
}
+
+ @Override
+ public int getNumFailures() {
+ return this.numFailures;
+ }
}