diff options
| author | Wenchen Lai <wenchen.lai@hotmail.com> | 2021-05-07 21:07:40 +0200 |
|---|---|---|
| committer | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2021-05-09 19:48:25 +0200 |
| commit | 052f2b4a80c737e0a3fc38c5facdec5472931aeb (patch) | |
| tree | 10dec8009f12a511c1aacd69a40f1668c86de859 /opendc-experiments/opendc-experiments-tf20/src | |
| parent | 62c0a5bd210972e824979eb38c75e3b543aae194 (diff) | |
exp: Add simple network model for TensorFlow experiments
Diffstat (limited to 'opendc-experiments/opendc-experiments-tf20/src')
4 files changed, 192 insertions, 0 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt new file mode 100644 index 00000000..d6360873 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.network + +/** + * A communication message between TensorFlow worker and master nodes. + * + * @property from The source node. + * @property to The destination node. + * @property type The type of message sent. + * @property dataSize message data size. + */ +public data class Message( + val from: NetworkNode, + val to: NetworkNode, + val type: MessageType, + val dataSize: Long, + val iterations: Int +) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt new file mode 100644 index 00000000..8be16261 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.network + +/** + * Enumeration of the types of messages exchanged between worker and master nodes during TensorFlow execution. + */ +public enum class MessageType { + REQUEST, + WEIGHTS +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt new file mode 100644 index 00000000..75b11423 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.network + +import kotlinx.coroutines.channels.Channel +import org.opendc.utils.TimerScheduler +import java.time.Clock +import kotlin.coroutines.CoroutineContext + +/** + * The network controller represents a simple network model between the worker and master nodes during + * TensorFlow execution. + */ +public class NetworkController(context: CoroutineContext, clock: Clock) : AutoCloseable { + /** + * The scheduler for the message. + */ + private val scheduler = TimerScheduler<Message>(context, clock) + + /** + * The outbound communication channels. + */ + private val channels = mutableMapOf<NetworkNode, Channel<Message>>() + + /** + * A map of the bandwidth between the different nodes. + */ + private val bandwidthMatrix: MutableMap<Pair<NetworkNode, NetworkNode>, Long> = mutableMapOf() + + /** + * A counter representing the amount of messages sent via the controller. + */ + private var messageCounter = 0 + + /** + * Add the specified link to this controller. + */ + public fun addLink(node: NetworkNode): Channel<Message> { + val channel = Channel<Message>(Channel.UNLIMITED) + channels[node] = channel + return channel + } + + /** + * Add a connection between two links. + */ + public fun addConnection(node1: NetworkNode, node2: NetworkNode, bandwidth: Long) { + bandwidthMatrix[Pair(node1, node2)] = bandwidth + } + + /** + * Route the specified [message]. + */ + public fun send(message: Message) { + val from = message.from + val to = message.to + val bandwidth = bandwidthMatrix[Pair(from, to)] ?: bandwidthMatrix[Pair(to, from)] ?: 1 + val size = message.dataSize / 1_000_000 + val delayTime = size / bandwidth + (0..5).random() + + messageCounter++ + + val target = channels[to] ?: return // Drop if destination not found + + scheduler.startSingleTimer(message, delayTime) { target.offer(message) } + } + + /** + * Stop the network controller. + */ + override fun close() { + scheduler.close() + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt new file mode 100644 index 00000000..46fb5ce9 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.network + +/** + * A node represents a machine with which other nodes can communicate. + */ +public data class NetworkNode(val hostname: String) |
