summaryrefslogtreecommitdiff
path: root/opendc-experiments/opendc-experiments-tf20
diff options
context:
space:
mode:
Diffstat (limited to 'opendc-experiments/opendc-experiments-tf20')
-rw-r--r--opendc-experiments/opendc-experiments-tf20/build.gradle.kts1
-rw-r--r--opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt39
-rw-r--r--opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt31
-rw-r--r--opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt94
-rw-r--r--opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt28
5 files changed, 193 insertions, 0 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
index 3ee5462a..64483bd4 100644
--- a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
+++ b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
@@ -35,6 +35,7 @@ dependencies {
implementation(projects.opendcSimulator.opendcSimulatorCompute)
implementation(projects.opendcTelemetry.opendcTelemetrySdk)
implementation(projects.opendcFormat)
+ implementation(projects.opendcUtils)
implementation(libs.kotlin.logging)
implementation(libs.parquet)
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt
new file mode 100644
index 00000000..d6360873
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.tf20.network
+
+/**
+ * A communication message between TensorFlow worker and master nodes.
+ *
+ * @property from The source node.
+ * @property to The destination node.
+ * @property type The type of message sent.
+ * @property dataSize message data size.
+ */
+public data class Message(
+ val from: NetworkNode,
+ val to: NetworkNode,
+ val type: MessageType,
+ val dataSize: Long,
+ val iterations: Int
+)
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt
new file mode 100644
index 00000000..8be16261
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.tf20.network
+
+/**
+ * Enumeration of the types of messages exchanged between worker and master nodes during TensorFlow execution.
+ */
+public enum class MessageType {
+ REQUEST,
+ WEIGHTS
+}
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt
new file mode 100644
index 00000000..75b11423
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.tf20.network
+
+import kotlinx.coroutines.channels.Channel
+import org.opendc.utils.TimerScheduler
+import java.time.Clock
+import kotlin.coroutines.CoroutineContext
+
+/**
+ * The network controller represents a simple network model between the worker and master nodes during
+ * TensorFlow execution.
+ */
+public class NetworkController(context: CoroutineContext, clock: Clock) : AutoCloseable {
+ /**
+ * The scheduler for the message.
+ */
+ private val scheduler = TimerScheduler<Message>(context, clock)
+
+ /**
+ * The outbound communication channels.
+ */
+ private val channels = mutableMapOf<NetworkNode, Channel<Message>>()
+
+ /**
+ * A map of the bandwidth between the different nodes.
+ */
+ private val bandwidthMatrix: MutableMap<Pair<NetworkNode, NetworkNode>, Long> = mutableMapOf()
+
+ /**
+ * A counter representing the amount of messages sent via the controller.
+ */
+ private var messageCounter = 0
+
+ /**
+ * Add the specified link to this controller.
+ */
+ public fun addLink(node: NetworkNode): Channel<Message> {
+ val channel = Channel<Message>(Channel.UNLIMITED)
+ channels[node] = channel
+ return channel
+ }
+
+ /**
+ * Add a connection between two links.
+ */
+ public fun addConnection(node1: NetworkNode, node2: NetworkNode, bandwidth: Long) {
+ bandwidthMatrix[Pair(node1, node2)] = bandwidth
+ }
+
+ /**
+ * Route the specified [message].
+ */
+ public fun send(message: Message) {
+ val from = message.from
+ val to = message.to
+ val bandwidth = bandwidthMatrix[Pair(from, to)] ?: bandwidthMatrix[Pair(to, from)] ?: 1
+ val size = message.dataSize / 1_000_000
+ val delayTime = size / bandwidth + (0..5).random()
+
+ messageCounter++
+
+ val target = channels[to] ?: return // Drop if destination not found
+
+ scheduler.startSingleTimer(message, delayTime) { target.offer(message) }
+ }
+
+ /**
+ * Stop the network controller.
+ */
+ override fun close() {
+ scheduler.close()
+ }
+}
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt
new file mode 100644
index 00000000..46fb5ce9
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.tf20.network
+
+/**
+ * A node represents a machine with which other nodes can communicate.
+ */
+public data class NetworkNode(val hostname: String)