diff options
Diffstat (limited to 'opendc-experiments/opendc-experiments-tf20')
29 files changed, 2113 insertions, 0 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/README.md b/opendc-experiments/opendc-experiments-tf20/README.md new file mode 100644 index 00000000..3b3d00f8 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/README.md @@ -0,0 +1,10 @@ +TensorFlow with OpenDC +================= + +This module contains a reproduction of the experiments of Wenchen Lai's MSc Computer Science thesis: +Modeling and Simulation of the Google TensorFlow Ecosystem [1] + +The implementations in this module are still experimental and will be moved into OpenDC library space +once the implementations stabilize. + +[1] https://atlarge-research.com/pdfs/lai2020thesis.pdf diff --git a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts new file mode 100644 index 00000000..64483bd4 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +description = "Experiments with the OpenDC TensorFlow model" + +/* Build configuration */ +plugins { + `experiment-conventions` + `testing-conventions` +} + +dependencies { + api(platform(projects.opendcPlatform)) + api(projects.opendcHarness.opendcHarnessApi) + implementation(projects.opendcSimulator.opendcSimulatorCore) + implementation(projects.opendcSimulator.opendcSimulatorCompute) + implementation(projects.opendcTelemetry.opendcTelemetrySdk) + implementation(projects.opendcFormat) + implementation(projects.opendcUtils) + + implementation(libs.kotlin.logging) + implementation(libs.parquet) + implementation(libs.hadoop.client) { + exclude(group = "org.slf4j", module = "slf4j-log4j12") + exclude(group = "log4j") + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt new file mode 100644 index 00000000..9ef5b621 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras + +import org.opendc.experiments.tf20.keras.activations.Activation +import org.opendc.experiments.tf20.keras.layer.conv.Conv2D +import org.opendc.experiments.tf20.keras.layer.conv.ConvPadding +import org.opendc.experiments.tf20.keras.layer.core.ActivationLayer +import org.opendc.experiments.tf20.keras.layer.core.Input +import org.opendc.experiments.tf20.keras.layer.pool.Pool2D +import org.opendc.experiments.tf20.keras.layer.regularization.Dropout + +/** + * Construct an AlexNet model with the given batch size. + */ +fun AlexNet(batchSize: Long): TrainableModel { + return Sequential( + Input(batchSize, 227, 227, 3, name = "Input"), + Conv2D(longArrayOf(11, 11, 3, 96), longArrayOf(1, 4, 4, 1), padding = ConvPadding.VALID, name = "conv1"), + Pool2D(intArrayOf(1, 3, 3, 1), intArrayOf(1, 2, 2, 1), padding = ConvPadding.VALID, name = "pool1"), + Conv2D(longArrayOf(5, 5, 96, 256), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv2"), + Pool2D(intArrayOf(1, 3, 3, 1), intArrayOf(1, 2, 2, 1), padding = ConvPadding.VALID, name = "pool2"), + Conv2D(longArrayOf(3, 3, 256, 384), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv3"), + Conv2D(longArrayOf(3, 3, 384, 384), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv4"), + Conv2D(longArrayOf(3, 3, 384, 256), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv5"), + Pool2D(intArrayOf(1, 3, 3, 1), intArrayOf(1, 2, 2, 1), padding = ConvPadding.VALID, name = "pool5"), + Conv2D(longArrayOf(6, 6, 256, 4096), longArrayOf(1, 1, 1, 1), padding = ConvPadding.VALID, name = "fc6"), + Dropout(0.5f, name = "dropout6"), + Conv2D(longArrayOf(1, 1, 4096, 4096), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "fc7"), + Dropout(0.5f, name = "dropout7"), + Conv2D(longArrayOf(1, 1, 4096, 1000), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "f8"), + ActivationLayer(Activation.Softmax, name = "softmax") + ) +} + +/** + * Construct an VGG16 model with the given batch size. + */ +fun VGG16(batchSize: Long = 128): TrainableModel { + return Sequential( + Input(batchSize, 224, 224, 3, name = "Input"), + Conv2D(longArrayOf(3, 3, 3, 64), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv1-1"), + Conv2D(longArrayOf(3, 3, 64, 64), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv1-2"), + Pool2D(intArrayOf(1, 2, 2, 1), intArrayOf(1, 2, 2, 1), padding = ConvPadding.VALID, name = "pool1"), + Conv2D(longArrayOf(3, 3, 64, 128), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv2-1"), + Conv2D(longArrayOf(3, 3, 128, 128), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv2-2"), + Pool2D(intArrayOf(1, 2, 2, 1), intArrayOf(1, 2, 2, 1), padding = ConvPadding.VALID, name = "pool2"), + Conv2D(longArrayOf(3, 3, 128, 256), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv3-1"), + Conv2D(longArrayOf(3, 3, 256, 256), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv3-2"), + Conv2D(longArrayOf(3, 3, 256, 256), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv3-3"), + Pool2D(intArrayOf(1, 2, 2, 1), intArrayOf(1, 2, 2, 1), padding = ConvPadding.VALID, name = "pool3"), + Conv2D(longArrayOf(3, 3, 256, 512), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv4-1"), + Conv2D(longArrayOf(3, 3, 512, 512), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv4-2"), + Conv2D(longArrayOf(3, 3, 512, 512), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv4-3"), + Pool2D(intArrayOf(1, 2, 2, 1), intArrayOf(1, 2, 2, 1), padding = ConvPadding.VALID, name = "pool4"), + Conv2D(longArrayOf(3, 3, 512, 512), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv5-1"), + Conv2D(longArrayOf(3, 3, 512, 512), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv5-2"), + Conv2D(longArrayOf(3, 3, 512, 512), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv5-3"), + Pool2D(intArrayOf(1, 2, 2, 1), intArrayOf(1, 2, 2, 1), padding = ConvPadding.VALID, name = "pool5"), + Conv2D(longArrayOf(7, 7, 512, 4096), longArrayOf(1, 1, 1, 1), padding = ConvPadding.VALID, name = "fc6"), + Dropout(0.5f, name = "dropout6"), + Conv2D(longArrayOf(1, 1, 4096, 4096), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "fc7"), + Dropout(0.5f, name = "dropout7"), + Conv2D(longArrayOf(1, 1, 4096, 1000), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "f8"), + ActivationLayer(Activation.Softmax, name = "softmax") + ) +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt new file mode 100644 index 00000000..9a48aced --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/TensorFlowExperiment.kt @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20 + +import io.opentelemetry.api.metrics.MeterProvider +import io.opentelemetry.sdk.metrics.SdkMeterProvider +import org.opendc.experiments.tf20.core.SimTFDevice +import org.opendc.experiments.tf20.distribute.* +import org.opendc.experiments.tf20.keras.AlexNet +import org.opendc.experiments.tf20.util.MLEnvironmentReader +import org.opendc.harness.dsl.Experiment +import org.opendc.harness.dsl.anyOf +import org.opendc.simulator.compute.power.LinearPowerModel +import org.opendc.simulator.core.runBlockingSimulation +import org.opendc.telemetry.sdk.toOtelClock + +/** + * Experiments with the TensorFlow simulation model. + */ +public class TensorFlowExperiment : Experiment(name = "tf20") { + /** + * The environment file to use. + */ + private val environmentFile by anyOf("/kth.json") + + /** + * The batch size used. + */ + private val batchSize by anyOf(16, 32, 64, 128) + + override fun doRun(repeat: Int): Unit = runBlockingSimulation { + val meterProvider: MeterProvider = SdkMeterProvider + .builder() + .setClock(clock.toOtelClock()) + .build() + val meter = meterProvider.get("opendc-tf20") + + val def = MLEnvironmentReader(TensorFlowExperiment::class.java.getResourceAsStream(environmentFile)).read().first() + val device = SimTFDevice( + def.uid, def.meta["gpu"] as Boolean, coroutineContext, clock, meter, def.model.cpus[0], + def.model.memory[0], LinearPowerModel(250.0, 60.0) + ) + val strategy = OneDeviceStrategy(device) + + val model = AlexNet(batchSize.toLong()) + model.use { + it.compile(strategy) + + it.fit(epochs = 9088 / batchSize, batchSize = batchSize) + } + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt new file mode 100644 index 00000000..f4c18ff1 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.core + +import io.opentelemetry.api.metrics.Meter +import io.opentelemetry.api.metrics.common.Labels +import kotlinx.coroutines.* +import org.opendc.simulator.compute.SimBareMetalMachine +import org.opendc.simulator.compute.SimMachine +import org.opendc.simulator.compute.SimMachineContext +import org.opendc.simulator.compute.SimMachineModel +import org.opendc.simulator.compute.cpufreq.PerformanceScalingGovernor +import org.opendc.simulator.compute.cpufreq.SimpleScalingDriver +import org.opendc.simulator.compute.model.MemoryUnit +import org.opendc.simulator.compute.model.ProcessingUnit +import org.opendc.simulator.compute.power.PowerModel +import org.opendc.simulator.compute.workload.SimWorkload +import org.opendc.simulator.resources.SimResourceCommand +import org.opendc.simulator.resources.SimResourceConsumer +import org.opendc.simulator.resources.SimResourceContext +import org.opendc.simulator.resources.SimResourceEvent +import java.time.Clock +import java.util.* +import kotlin.coroutines.Continuation +import kotlin.coroutines.CoroutineContext +import kotlin.coroutines.resume + +/** + * A [TFDevice] implementation using simulated components. + */ +public class SimTFDevice( + override val uid: UUID, + override val isGpu: Boolean, + context: CoroutineContext, + clock: Clock, + meter: Meter, + private val pu: ProcessingUnit, + private val memory: MemoryUnit, + powerModel: PowerModel +) : TFDevice { + /** + * The scope in which the device runs. + */ + private val scope = CoroutineScope(context + Job()) + + /** + * The [SimMachine] representing the device. + */ + private val machine = SimBareMetalMachine( + scope.coroutineContext, clock, SimMachineModel(listOf(pu), listOf(memory)), + PerformanceScalingGovernor(), SimpleScalingDriver(powerModel) + ) + + /** + * The usage of the device. + */ + private val _usage = meter.doubleValueRecorderBuilder("device.usage") + .setDescription("The amount of device resources used") + .setUnit("MHz") + .build() + .bind(Labels.of("device", uid.toString())) + + /** + * The power draw of the device. + */ + private val _power = meter.doubleValueRecorderBuilder("device.power") + .setDescription("The power draw of the device") + .setUnit("W") + .build() + .bind(Labels.of("device", uid.toString())) + + /** + * The workload that will be run by the device. + */ + private val workload = object : SimWorkload, SimResourceConsumer { + /** + * The resource context to interrupt the workload with. + */ + var ctx: SimResourceContext? = null + + /** + * The capacity of the device. + */ + private var capacity: Double = 0.0 + + /** + * The queue of work to run. + */ + val queue = ArrayDeque<Work>() + + /** + * A flag to indicate that the workload is idle. + */ + val isIdle + get() = activeWork == null + + /** + * The active work of the workload. + */ + private var activeWork: Work? = null + + override fun onStart(ctx: SimMachineContext) {} + + override fun getConsumer(ctx: SimMachineContext, cpu: ProcessingUnit): SimResourceConsumer = this + + override fun onNext(ctx: SimResourceContext): SimResourceCommand { + val activeWork = activeWork + if (activeWork != null) { + if (activeWork.consume(activeWork.flops - ctx.remainingWork)) { + this.activeWork = null + } else { + return SimResourceCommand.Consume(activeWork.flops, ctx.capacity) + } + } + + val queue = queue + val head = queue.poll() + return if (head != null) { + this.activeWork = head + SimResourceCommand.Consume(head.flops, ctx.capacity) + } else { + SimResourceCommand.Idle() + } + } + + override fun onEvent(ctx: SimResourceContext, event: SimResourceEvent) { + when (event) { + SimResourceEvent.Start -> { + this.ctx = ctx + this.capacity = ctx.capacity + } + SimResourceEvent.Capacity -> { + this.capacity = ctx.capacity + ctx.interrupt() + } + SimResourceEvent.Run -> { + _usage.record(ctx.speed) + _power.record(machine.powerDraw) + } + else -> {} + } + } + } + + init { + scope.launch { + machine.run(workload) + } + } + + override suspend fun load(dataSize: Long) { + val duration = dataSize / memory.speed * 1000 + delay(duration.toLong()) + } + + override suspend fun compute(flops: Double) = suspendCancellableCoroutine<Unit> { cont -> + workload.queue.add(Work(flops, cont)) + if (workload.isIdle) { + workload.ctx?.interrupt() + } + } + + override fun close() { + machine.close() + scope.cancel() + } + + private data class Work(var flops: Double, val cont: Continuation<Unit>) { + fun consume(flops: Double): Boolean { + this.flops -= flops + + if (this.flops <= 0) { + cont.resume(Unit) + return true + } + + return false + } + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt new file mode 100644 index 00000000..bbc34ed9 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDevice.kt @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.core + +import java.util.* + +/** + * A compute device on which tensor operations are performed. + */ +public interface TFDevice : AutoCloseable { + /** + * The unique identifier of the device. + */ + public val uid: UUID + + /** + * A flag to indicate whether the device is a GPU. + */ + public val isGpu: Boolean + + /** + * Transfer the specified amount of data from memory. + */ + public suspend fun load(dataSize: Long) + + /** + * Perform [flops] amount of computation on the device. + */ + public suspend fun compute(flops: Double) +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/MirroredStrategy.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/MirroredStrategy.kt new file mode 100644 index 00000000..8caa7ec9 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/MirroredStrategy.kt @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.distribute + +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.launch +import org.opendc.experiments.tf20.core.TFDevice + +/** + * A distribution [Strategy] that supports synchronous distributed training on multiple GPUs on one machine. + * + * It creates one replica per GPU device. Each variable in the model is mirrored across all the replicas. + */ +public class MirroredStrategy(val devices: List<TFDevice>) : Strategy { + override suspend fun run(forward: Double, backward: Double, batchSize: Int) = coroutineScope { + for (device in devices) { + launch { device.compute(forward * batchSize / devices.size + backward) } + } + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/OneDeviceStrategy.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/OneDeviceStrategy.kt new file mode 100644 index 00000000..271fab98 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/OneDeviceStrategy.kt @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.distribute + +import org.opendc.experiments.tf20.core.TFDevice + +/** + * A distribution [Strategy] that places all variables and computation on a single specified device. + */ +public class OneDeviceStrategy(val device: TFDevice) : Strategy { + override suspend fun run(forward: Double, backward: Double, batchSize: Int) { + device.compute(forward * batchSize + backward) + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/Strategy.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/Strategy.kt new file mode 100644 index 00000000..5839c0df --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/Strategy.kt @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.distribute + +/** + * A strategy for distributing TensorFlow state and computation over multiple devices. + */ +public interface Strategy { + /** + * Run the specified batch using the given strategy. + */ + public suspend fun run(forward: Double, backward: Double, batchSize: Int) +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/Sequential.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/Sequential.kt new file mode 100644 index 00000000..411ddb59 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/Sequential.kt @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras + +import org.opendc.experiments.tf20.keras.layer.Layer +import org.opendc.experiments.tf20.keras.layer.core.Input +import org.opendc.experiments.tf20.keras.shape.TensorShape + +/** + * Sequential model groups a linear stack of layers into a TensorFlow TrainableModel. + * + * @param [layers] The layers to describe the model design. + */ +public class Sequential(vararg layers: Layer) : TrainableModel(*layers) { + override fun buildLayers() { + val inputShape = TensorShape(*inputLayer.packedDims) + inputLayer.inputTensor = inputShape + inputLayer.build(inputShape) + var nextShape: TensorShape = inputLayer.getOutputShape(inputShape) + inputLayer.outputTensor = nextShape + + layers.filter { it !is Input }.forEach { + it.inputTensor = nextShape + it.build(nextShape) + + nextShape = it.getOutputShape(nextShape) + it.outputTensor = nextShape + } + } + + override fun forward(): Double { + return layers.sumByDouble { it.forward() } + } + + override fun backward(): Double { + return layers.sumByDouble { it.backward() } + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/TrainableModel.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/TrainableModel.kt new file mode 100644 index 00000000..2cac6cbc --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/TrainableModel.kt @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras + +import org.opendc.experiments.tf20.distribute.Strategy +import org.opendc.experiments.tf20.keras.layer.Layer +import org.opendc.experiments.tf20.keras.layer.core.Input + +/** + * A model groups layers into an object with training and inference features. + */ +public abstract class TrainableModel(vararg layers: Layer) : AutoCloseable { + /** + * The layers to describe the model design. Main part of the internal state of the model. + */ + public val layers: List<Layer> = listOf(*layers) + + /** + * First layer that is responsible for the input shape of the Neural Network. + */ + public val inputLayer: Input + get() = layers[0] as Input + + /** + * Returns input dimensions in order HWC (height, width, channels) + */ + public val inputDimensions: LongArray + get() = (layers[0] as Input).packedDims + + /** + * Layers indexed by name. + */ + protected val layersByName: MutableMap<String, Layer> = mutableMapOf() + + /** + * A flag to indicate that the model is compiled. + */ + public var isCompiled: Boolean = false + private set + + /** + * The strategy that is being used. + */ + private lateinit var strategy: Strategy + + /** + * Common method for building the initial part of the model static graph. + */ + protected abstract fun buildLayers() + + /** + * Perform a forward propagation. + */ + protected abstract fun forward(): Double + + /** + * Perform a backward propagation. + */ + protected abstract fun backward(): Double + + init { + for (layer in layers) { + if (layersByName.containsKey(layer.name)) { + throw IllegalArgumentException(layer.name) + } else { + layersByName[layer.name] = layer + } + + layer.parentModel = this + } + } + + /** + * Configures the model for training. + * + * @param strategy The distribution strategy for training. + */ + public fun compile(strategy: Strategy) { + check(!isCompiled) { "Model is already compiled." } + + buildLayers() + + this.strategy = strategy + this.isCompiled = true + } + + /** + * Train the model for a fixed number of [epochs] (iterations over a dataset). + * + * @param [epochs] Number of epochs to train the model. An epoch is an iteration over the entire x and y data provided. + * @param [batchSize] Number of samples per gradient update. + */ + public suspend fun fit(epochs: Int = 5, batchSize: Int = 32) { + check(isCompiled) { "Model not yet compiled." } + + val forwardFlops = forward() + val backwardFlops = backward() + + for (i in 1..epochs) { + strategy.run(forwardFlops, backwardFlops, batchSize) + } + } + + override fun close() { + } + + override fun toString(): String { + return "TrainableModel ${super.toString()}" + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/activations/Activation.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/activations/Activation.kt new file mode 100644 index 00000000..403acfc0 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/activations/Activation.kt @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.activations + +/** + * Neural network hyper-parameter, activation function of a node defines the output of that node given an input or + * set of inputs. + */ +public enum class Activation { + /** + * Linear unit. Returns unmodified input. + * + * NOTE: Doing nothing useful. Returns to ancient times of linear perceptron. + */ + Linear, + + /** + * Sigmoid activation function. + * + * Transforms input 'x' according formula: + * ``` + * sigmoid(x) = 1 / (1 + exp(-x)) + * ``` + * + * For small values (<-5), `sigmoid` returns a value close to zero, and for large values (>5) + * the result of the function gets close to 1. + * + * NOTE: Sigmoid is equivalent to a 2-element ActivationLayer, where the second element is + * assumed to be zero. The sigmoid function always returns a value between 0 and 1. + */ + Sigmoid, + + /** + * Hyperbolic tangent activation function. + * + * Transforms input 'x' according formula: + * ``` + * tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x))) + * ``` + */ + Tanh, + + /** + * Rectified linear unit (ReLU). + * + * With default values, this returns the standard ReLU activation: + * `max(x, 0)`, the element-wise maximum of 0 and the input tensor. + */ + Relu, + + /** + * Computes Rectified Linear 6: + * ``` + * min(max(features, 0), 6) + * ``` + * @see <a href="http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf"> + * Convolutional Deep Belief Networks on CIFAR-10. A. Krizhevsky</a> + */ + Relu6, + + /** + * Exponential Linear Unit. + * + * The exponential linear unit (ELU) with `alpha > 0` is: + * `x` if `x > 0` and `alpha * (exp(x) - 1)` if `x < 0` + * + * For this implementations alpha is equal to 1.0. + * + * The ELU hyperparameter `alpha` controls the value to which an + * ELU saturates for negative net inputs. ELUs diminish the + * vanishing gradient effect. + * + * ELUs have negative values which pushes the mean of the activations closer to zero. + * + * Mean activations that are closer to zero enable faster learning as they + * bring the gradient closer to the natural gradient. + * + * ELUs saturate to a negative value when the argument gets smaller. + * Saturation means a small derivative which decreases the variation + * and the information that is propagated to the next layer. + * + * @see <a href="https://arxiv.org/abs/1511.07289">Fast and Accurate Deep Network Learning by Exponential Linear Units + * (ELUs) (Clevert et al, 2016)</a> + */ + Elu, + + /** + * Scaled Exponential Linear Unit (SELU). + * + * The Scaled Exponential Linear Unit (SELU) activation function is defined as: + * ``` + * if x > 0: return scale * x + * if x < 0: return scale * alpha * (exp(x) - 1) + * ``` + * where `alpha` and `scale` are pre-defined constants (`alpha=1.67326324` and `scale=1.05070098`). + * + * Basically, the SELU activation function multiplies `scale` (> 1) with the + * output of the `tf.keras.activations.elu` function to ensure a slope larger + * than one for positive inputs. + * + * @see <a href="https://arxiv.org/abs/1706.02515">Klambauer et al., 2017</a> + */ + Selu, + + /** + * ActivationLayer converts a real vector to a vector of categorical probabilities. + * The elements of the output vector are in range (0, 1) and sum to 1. + * + * ActivationLayer is often used as the activation for the last + * layer of a classification network because the result could be interpreted as + * a probability distribution. + */ + Softmax, + + /** + * + */ + LogSoftmax, + + /** + * Exponential activation function. + * + * Transforms input 'x' according formula: + * ``` + * exp(x) + * ``` + */ + Exponential, + + /** + * Softplus activation function. + * + * Transforms input 'x' according formula: + * ``` + * softplus(x) = log(exp(x) + 1) + * ``` + */ + SoftPlus, + + /*** + * Softsign activation function. + * + * Transforms input 'x' according formula: + * ``` + * softsign(x) = x / (abs(x) + 1) + * ``` + */ + SoftSign, + + /** + * Hard sigmoid activation function. + * + * Transforms input 'x' according formula: + * ``` + * if x < -2.5: return 0 + * if x > 2.5: return 1 + * if -2.5 <= x <= 2.5: return 0.2 * x + 0.5 + * ``` + * A faster approximation of the sigmoid activation. + */ + HardSigmoid, + + /** + * Swish activation function. + * + * Transforms input 'x' according formula: + * ``` + * swish(x) = x * sigmoid(x) + * ``` + * + * It is a smooth, non-monotonic function that consistently matches + * or outperforms ReLU on deep networks, it is unbounded above and + * bounded below. + * + * @see <a href="https://arxiv.org/abs/1710.05941">Ramachandran et al., 2017</a> + */ + Swish; +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/Layer.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/Layer.kt new file mode 100644 index 00000000..143b27f0 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/Layer.kt @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.layer + +import org.opendc.experiments.tf20.keras.TrainableModel +import org.opendc.experiments.tf20.keras.shape.TensorShape + +/** + * Abstract class from which all layers inherit. + * + * @param name The name of the layer. + */ +public abstract class Layer(public val name: String) { + /** + * TrainableModel in which the layer exists. + */ + internal var parentModel: TrainableModel? = null + + /** + * The input shape of the layer. + */ + public lateinit var inputTensor: TensorShape + internal set + + /** + * The output shape of the layer. + */ + public lateinit var outputTensor: TensorShape + internal set + + /** + * Build the layer for the specified [inputShape]. + * + * @param [inputShape] Input shape, result of [getOutputShape] call from previous layer. + */ + public abstract fun build(inputShape: TensorShape) + + /** + * Compute output shape of this layer, based on [inputShape] and [Layer] type. + */ + public abstract fun getOutputShape(inputShape: TensorShape): TensorShape + + /** + * Perform a forward propagation + */ + public abstract fun forward(): Double + + /** + * Perform a backward propagation. + */ + public abstract fun backward(): Double +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/Conv2D.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/Conv2D.kt new file mode 100644 index 00000000..bf4725b7 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/Conv2D.kt @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.layer.conv + +import org.opendc.experiments.tf20.keras.activations.Activation +import org.opendc.experiments.tf20.keras.layer.Layer +import org.opendc.experiments.tf20.keras.shape.TensorShape +import kotlin.math.ceil + +/** + * 2D convolution layer (e.g. spatial convolution over images). + * + * This layer creates a convolution kernel that is convolved (actually cross-correlated) + * with the layer input to produce a tensor of outputs. + * Finally, if `activation` is applied to the outputs as well. + */ +public class Conv2D( + public val filter: LongArray = LongArray(4), // [H, W, channel_in, channel_out] + public val strides: LongArray = LongArray(4), // [1, stride_h, stride_w, 1] + public val activation: Activation = Activation.Relu, + public val padding: ConvPadding = ConvPadding.VALID, + name: String = "", +) : Layer(name) { + + private var padHeight: Double = 0.0 + private var padWidth: Double = 0.0 + + override fun build(inputShape: TensorShape) {} + + override fun getOutputShape(inputShape: TensorShape): TensorShape { + check(filter[2] == inputShape[3]) { "Input channel ${filter[2]} and ${inputShape[3]} shall match" } + + var outHeight = 0L + var outWidth = 0L + + if (padding == ConvPadding.VALID) { + outHeight = ceil((inputShape[1] - filter[0] + 1).toDouble() / strides[1].toDouble()).toLong() + outWidth = ceil((inputShape[2] - filter[1] + 1).toDouble() / strides[2].toDouble()).toLong() + padHeight = 0.0 + padWidth = 0.0 + } else if (padding == ConvPadding.SAME) { + outHeight = ceil(inputShape[1].toFloat() / strides[1].toFloat()).toLong() + outWidth = ceil(inputShape[2].toFloat() / strides[2].toFloat()).toLong() + + val padAlongHeight = (outHeight - 1) * strides[1] + filter[0] - inputShape[1] + val padAlongWidth = (outWidth - 1) * strides[2] + filter[1] - inputShape[2] + + padHeight = (padAlongHeight / 2).toDouble() + padWidth = (padAlongWidth / 2).toDouble() + } + + return TensorShape(inputShape[0], outHeight, outWidth, filter[3]) + } + + override fun forward(): Double { + // Mul and add per output pixel: kernel_w x kernel_h x in_channel + var flops: Long = (2 * filter[0] * filter[1] * filter[2]) + + val output = outputTensor + // Flops per output map. + flops *= output[1] * output[2] * filter[3] + + // Flops across multiple input patches. + flops *= inputTensor[0] + + if (activation == Activation.Relu) { + flops += output[0] * output[1] * output[2] * output[3] + } + + // return paramsNum() * output.H * output.W * FLOAT_BYTES / MILLION + return flops * 4.0 / 1_000_000 + } + + override fun backward(): Double = forward() + + override fun toString(): String { + return "Conv2D[filter=${filter.contentToString()}, strides=${strides.contentToString()}, activation=$activation, padding=$padding]" + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/ConvPadding.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/ConvPadding.kt new file mode 100644 index 00000000..03ae6282 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/ConvPadding.kt @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.layer.conv + +/** + * Enumeration of convolution padding types. + */ +public enum class ConvPadding { + /** + * Pad evenly to the left/right or up/down of the input such that output has the same + * height/width dimension as the input. + */ + SAME, + + /** + * No padding. + */ + VALID +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/ActivationLayer.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/ActivationLayer.kt new file mode 100644 index 00000000..fd25cea6 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/ActivationLayer.kt @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.layer.core + +import org.opendc.experiments.tf20.keras.activations.Activation +import org.opendc.experiments.tf20.keras.layer.Layer +import org.opendc.experiments.tf20.keras.shape.TensorShape + +/** + * This layer applies an activation function to an output. + */ +public class ActivationLayer( + public val activation: Activation = Activation.Relu, + name: String = "", +) : Layer(name) { + + override fun build(inputShape: TensorShape) { + // Intentionally left empty + } + + override fun getOutputShape(inputShape: TensorShape): TensorShape = inputShape + + override fun forward(): Double = 0.0 + + override fun backward(): Double = forward() + + override fun toString(): String { + return "ActivationLayer[activation=$activation]" + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/Input.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/Input.kt new file mode 100644 index 00000000..6619ccc0 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/Input.kt @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.layer.core + +import org.opendc.experiments.tf20.keras.layer.Layer +import org.opendc.experiments.tf20.keras.shape.TensorShape + +/** + * This layer is responsible for the input shape of the built model. + */ +public class Input(vararg dims: Long, name: String) : Layer(name) { + /** + * Input data dimensions. Rank = 3 or 4 for most popular supported cases. + */ + public val packedDims: LongArray = dims + + override fun build(inputShape: TensorShape) {} + + override fun getOutputShape(inputShape: TensorShape): TensorShape { + return inputShape + } + + override fun forward(): Double = 0.0 + + override fun backward(): Double = 0.0 + + override fun toString(): String { + return "Input[shape=${packedDims.contentToString()}]" + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/pool/Pool2D.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/pool/Pool2D.kt new file mode 100644 index 00000000..3c6b15bb --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/pool/Pool2D.kt @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.layer.pool + +import org.opendc.experiments.tf20.keras.layer.Layer +import org.opendc.experiments.tf20.keras.layer.conv.ConvPadding +import org.opendc.experiments.tf20.keras.shape.TensorShape +import kotlin.math.ceil + +/** + * Max pooling layer for 2D inputs (e.g. images). + * + * @property [poolSize] The size of the sliding window for each dimension of input tensor (pool batch, pool height, pool width, pool channels). + * Usually, pool batch and pool channels are equal to 1. + * @property [strides] Strides of the pooling operation for each dimension of input tensor. + * @property [padding] The padding method, either 'valid' or 'same' or 'full'. + * @property [name] Custom layer name. + */ +public class Pool2D( + public val poolSize: IntArray = intArrayOf(1, 2, 2, 1), + public val strides: IntArray = intArrayOf(1, 2, 2, 1), + public val padding: ConvPadding = ConvPadding.VALID, + name: String +) : Layer(name) { + + private var padHeight = 0L + private var padWidth = 0L + + override fun build(inputShape: TensorShape) { + } + + override fun getOutputShape(inputShape: TensorShape): TensorShape { + var outHeight = 0L + var outWidth = 0L + // return the output tensor shape + if (padding == ConvPadding.VALID) { + outHeight = ceil((inputShape[1] - poolSize[1] + 1).toDouble() / strides[1].toDouble()).toLong() + outWidth = ceil((inputShape[2] - poolSize[2] + 1).toDouble() / strides[2].toDouble()).toLong() + padHeight = 0 + padWidth = 0 + } else if (padding == ConvPadding.SAME) { + outHeight = ceil(inputShape[1].toFloat() / strides[1].toFloat()).toLong() + outWidth = ceil(inputShape[2].toFloat() / strides[2].toFloat()).toLong() + val padAlongHeight = (outHeight - 1) * strides[1] + poolSize[1] - inputShape[1] + val padAlongWidth = (outWidth - 1) * strides[2] + poolSize[2] - inputShape[2] + + padHeight = padAlongHeight / 2 + padWidth = padAlongWidth / 2 + } + + return TensorShape(inputShape[0], outHeight, outWidth, inputShape[3]) + } + + override fun forward(): Double { + val output = outputTensor + // Per output pixel: kernel_w x kernel_h x in_channel + var flops: Long = 2 * poolSize[1] * poolSize[2] * inputTensor[3] + + // Flops per output map. + flops *= output[2] * output[1] + + // Flops across multiple input patches. + flops *= inputTensor[0] + + return flops * 4.0 / 1_000_000 + } + + override fun backward(): Double = forward() + + override fun toString(): String { + return "MaxPool2D[poolSize=${poolSize.contentToString()}, strides=${strides.contentToString()}, padding=$padding]" + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/regularization/Dropout.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/regularization/Dropout.kt new file mode 100644 index 00000000..ff5f7711 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/regularization/Dropout.kt @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.layer.regularization + +import org.opendc.experiments.tf20.keras.layer.Layer +import org.opendc.experiments.tf20.keras.shape.TensorShape + +/** + * This layer applies dropout to the input. + * + * Dropout consists in randomly setting a fraction `rate` of input units to 0 + * at each update during training time, which helps prevent overfitting. + * The units that are kept are scaled by `1 / (1 - rate)`, so that their + * sum is unchanged at training time and inference time. + * + * @property keepProbability The dropout rate, between 0 and 1. E.g. `rate=0.1` would drop out 10% of input units. + * @property [name] Custom layer name. + */ +public class Dropout( + public val keepProbability: Float = 0.1f, + name: String +) : Layer(name) { + override fun build(inputShape: TensorShape) {} + + override fun getOutputShape(inputShape: TensorShape): TensorShape { + return inputShape + } + + override fun forward(): Double { + val output = outputTensor + return output[0] * output[1] * output[2] * output[3] * 4.0 / 1_000_000 + } + + override fun backward(): Double = forward() + + override fun toString(): String = "Dropout[keepProbability=$keepProbability]" +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/shape/TensorShape.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/shape/TensorShape.kt new file mode 100644 index 00000000..7affcb63 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/shape/TensorShape.kt @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.keras.shape + +import kotlin.math.abs + +/** + * Represents the shape of a tensor. + * + * @param dims The sizes of the tensor dimensions. + */ +public class TensorShape(vararg dims: Long) { + /** + * The dimensions of the tensor represented as [LongArray]. + */ + private val _dims: LongArray = dims + + /** + * Return amount of elements in Tensor with the given shape. + */ + public val numElements: Long + get() { + var prod = 1L + for (i in 0 until rank) { + prod *= abs(_dims[i]) + } + return prod + } + + /** + * Returns the rank of this shape. + */ + public val rank: Int + get() = _dims.size + + /** + * Returns the value of a dimension + * + * @param i The index at which to retrieve a dimension. + * @return The size of dimension i + */ + public operator fun get(i: Int): Long { + return _dims[i] + } + + /** + * Test whether dimension i in this shape is known + * + * @param i Target dimension to test + * @return Whether dimension i is unknown (equal to -1) + */ + private fun isKnown(i: Int): Boolean { + return _dims[i] != -1L + } + + /** + * Get the size of a target dimension. + * + * @param i Target dimension. + * @return The size of dimension i + */ + public fun size(i: Int): Long { + return _dims[i] + } + + /** + * Clone the [TensorShape] and return a new instance. + */ + public fun clone(): TensorShape { + return TensorShape(*_dims) + } + + /** + * Create a string representation of this [TensorShape]. + */ + override fun toString(): String { + return _dims.contentToString().replace("-1", "None") + } + + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (javaClass != other?.javaClass) return false + + other as TensorShape + + if (!_dims.contentEquals(other._dims)) return false + + return true + } + + override fun hashCode(): Int { + return _dims.contentHashCode() + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt new file mode 100644 index 00000000..d6360873 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.network + +/** + * A communication message between TensorFlow worker and master nodes. + * + * @property from The source node. + * @property to The destination node. + * @property type The type of message sent. + * @property dataSize message data size. + */ +public data class Message( + val from: NetworkNode, + val to: NetworkNode, + val type: MessageType, + val dataSize: Long, + val iterations: Int +) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt new file mode 100644 index 00000000..8be16261 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.network + +/** + * Enumeration of the types of messages exchanged between worker and master nodes during TensorFlow execution. + */ +public enum class MessageType { + REQUEST, + WEIGHTS +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt new file mode 100644 index 00000000..75b11423 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.network + +import kotlinx.coroutines.channels.Channel +import org.opendc.utils.TimerScheduler +import java.time.Clock +import kotlin.coroutines.CoroutineContext + +/** + * The network controller represents a simple network model between the worker and master nodes during + * TensorFlow execution. + */ +public class NetworkController(context: CoroutineContext, clock: Clock) : AutoCloseable { + /** + * The scheduler for the message. + */ + private val scheduler = TimerScheduler<Message>(context, clock) + + /** + * The outbound communication channels. + */ + private val channels = mutableMapOf<NetworkNode, Channel<Message>>() + + /** + * A map of the bandwidth between the different nodes. + */ + private val bandwidthMatrix: MutableMap<Pair<NetworkNode, NetworkNode>, Long> = mutableMapOf() + + /** + * A counter representing the amount of messages sent via the controller. + */ + private var messageCounter = 0 + + /** + * Add the specified link to this controller. + */ + public fun addLink(node: NetworkNode): Channel<Message> { + val channel = Channel<Message>(Channel.UNLIMITED) + channels[node] = channel + return channel + } + + /** + * Add a connection between two links. + */ + public fun addConnection(node1: NetworkNode, node2: NetworkNode, bandwidth: Long) { + bandwidthMatrix[Pair(node1, node2)] = bandwidth + } + + /** + * Route the specified [message]. + */ + public fun send(message: Message) { + val from = message.from + val to = message.to + val bandwidth = bandwidthMatrix[Pair(from, to)] ?: bandwidthMatrix[Pair(to, from)] ?: 1 + val size = message.dataSize / 1_000_000 + val delayTime = size / bandwidth + (0..5).random() + + messageCounter++ + + val target = channels[to] ?: return // Drop if destination not found + + scheduler.startSingleTimer(message, delayTime) { target.offer(message) } + } + + /** + * Stop the network controller. + */ + override fun close() { + scheduler.close() + } +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt new file mode 100644 index 00000000..46fb5ce9 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkNode.kt @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.network + +/** + * A node represents a machine with which other nodes can communicate. + */ +public data class NetworkNode(val hostname: String) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MLEnvironmentReader.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MLEnvironmentReader.kt new file mode 100644 index 00000000..eea079fb --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MLEnvironmentReader.kt @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.util + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper +import com.fasterxml.jackson.module.kotlin.readValue +import org.opendc.format.environment.EnvironmentReader +import org.opendc.format.environment.MachineDef +import org.opendc.simulator.compute.SimMachineModel +import org.opendc.simulator.compute.model.MemoryUnit +import org.opendc.simulator.compute.model.ProcessingNode +import org.opendc.simulator.compute.model.ProcessingUnit +import org.opendc.simulator.compute.power.LinearPowerModel +import java.io.InputStream +import java.util.* + +/** + * An [EnvironmentReader] for the TensorFlow experiments. + */ +public class MLEnvironmentReader(input: InputStream, mapper: ObjectMapper = jacksonObjectMapper()) : EnvironmentReader { + + private val setup: Setup = mapper.readValue(input) + + override fun read(): List<MachineDef> { + var counter = 0 + return setup.rooms.flatMap { room -> + room.objects.flatMap { roomObject -> + when (roomObject) { + is RoomObject.Rack -> { + roomObject.machines.map { machine -> + var isGpuFlag = true + var maxPower = 350.0 + var minPower = 200.0 + val cores = machine.cpus.flatMap { id -> + when (id) { + 1 -> { + // ref: https://www.guru3d.com/articles-pages/nvidia-geforce-gtx-titan-x-review,8.html#:~:text=GeForce%20GTX%20Titan%20X%20%2D%20On,power%20supply%20unit%20as%20minimum. + maxPower = 334.0 + minPower = 90.0 + val node = ProcessingNode("NVidia", "TITAN X", "Pascal", 4992) + List(node.coreCount) { ProcessingUnit(node, it, 824.0) } + } + 2 -> { + // ref: https://www.microway.com/hpc-tech-tips/nvidia-tesla-p100-pci-e-16gb-gpu-accelerator-pascal-gp100-close/ + maxPower = 250.0 + minPower = 125.0 + val node = ProcessingNode("NVIDIA", "Tesla P100", "Pascal", 3584) + List(node.coreCount) { ProcessingUnit(node, it, 1190.0) } + } + 3 -> { + // ref: https://www.anandtech.com/show/10923/openpower-saga-tyans-1u-power8-gt75/7 + minPower = 84.0 + maxPower = 135.0 + val node = ProcessingNode("Intel", "E5-2690v3 Haswell24", "amd64", 24) + isGpuFlag = false + List(node.coreCount) { ProcessingUnit(node, it, 3498.0) } + } + 4 -> { + minPower = 130.0 + maxPower = 190.0 + val node = ProcessingNode("IBM", "POWER8", "RISC", 10) + isGpuFlag = false + List(node.coreCount) { ProcessingUnit(node, it, 143000.0) } // 28600.0 3690 + } + else -> throw IllegalArgumentException("The cpu id $id is not recognized") + } + } + val memories = machine.memories.map { id -> + when (id) { + 1 -> MemoryUnit("NVidia", "GDDR5X", 480.0, 24L) + 2 -> MemoryUnit("NVidia", "GDDR5X", 720.0, 16L) + 3 -> MemoryUnit("IBM", "GDDR5X", 115.0, 160L) + 4 -> MemoryUnit("Inter", "GDDR5X", 68.0, 512L) + else -> throw IllegalArgumentException("The cpu id $id is not recognized") + } + } + + MachineDef( + UUID(0, counter.toLong()), + "node-${counter++}", + mapOf("gpu" to isGpuFlag), + SimMachineModel(cores, memories), + LinearPowerModel(maxPower, minPower) + ) + } + } + } + } + } + } + + override fun close() {} +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/Model.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/Model.kt new file mode 100644 index 00000000..0487a36f --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/Model.kt @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.util + +import com.fasterxml.jackson.annotation.JsonSubTypes +import com.fasterxml.jackson.annotation.JsonTypeInfo + +/** + * A datacenter setup. + * + * @property name The name of the setup. + * @property rooms The rooms in the datacenter. + */ +internal data class Setup(val name: String, val rooms: List<Room>) + +/** + * A room in a datacenter. + * + * @property type The type of room in the datacenter. + * @property objects The objects in the room. + */ +internal data class Room(val type: String, val objects: List<RoomObject>) + +/** + * An object in a [Room]. + * + * @property type The type of the room object. + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") +@JsonSubTypes(value = [JsonSubTypes.Type(name = "RACK", value = RoomObject.Rack::class)]) +internal sealed class RoomObject(val type: String) { + /** + * A rack in a server room. + * + * @property machines The machines in the rack. + */ + internal data class Rack(val machines: List<Machine>) : RoomObject("RACK") +} + +/** + * A machine in the setup that consists of the specified CPU's represented as + * integer identifiers and ethernet speed. + * + * @property cpus The Processing Units(CPUs/GPUs) in the machine represented as integer identifiers. + * @property memories The memories in the machine represented as integer identifiers. + */ +internal data class Machine(val cpus: List<Int>, val memories: List<Int>) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/resources/ibm.json b/opendc-experiments/opendc-experiments-tf20/src/main/resources/ibm.json new file mode 100644 index 00000000..b16d1b18 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/resources/ibm.json @@ -0,0 +1,113 @@ +{ + "name": "IBM Environment Setup", + "rooms": [ + { + "type": "SERVER", + "objects": [ + { + "type": "RACK", + "machines": [ + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]} + ] + }, + { + "type": "RACK", + "machines": [ + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]} + ] + }, + { + "type": "RACK", + "machines": [ + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]} + ] + }, + { + "type": "RACK", + "machines": [ + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]}, + { "cpus": [2], "memories": [2]} + ] + }, + { + "type": "RACK", + "machines": [ + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]} + ] + }, + { + "type": "RACK", + "machines": [ + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]}, + { "cpus": [4], "memories": [4]} + ] + } + + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/resources/kth.json b/opendc-experiments/opendc-experiments-tf20/src/main/resources/kth.json new file mode 100644 index 00000000..50eecb47 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/main/resources/kth.json @@ -0,0 +1,33 @@ +{ + "name": "KTH Environment Setup", + "rooms": [ + { + "type": "SERVER", + "objects": [ + { + "type": "RACK", + "machines": [ + {"cpus": [1], "memories": [1]}, + {"cpus": [1], "memories": [1]}, + {"cpus": [1], "memories": [1]}, + {"cpus": [1], "memories": [1]}, + {"cpus": [1], "memories": [1]}, + {"cpus": [1], "memories": [1]}, + {"cpus": [1], "memories": [1]}, + {"cpus": [1], "memories": [1]}, + {"cpus": [1], "memories": [1]}, + {"cpus": [3], "memories": [3]}, + {"cpus": [3], "memories": [3]}, + {"cpus": [3], "memories": [3]}, + {"cpus": [3], "memories": [3]}, + {"cpus": [3], "memories": [3]}, + {"cpus": [3], "memories": [3]}, + {"cpus": [3], "memories": [3]}, + {"cpus": [3], "memories": [3]}, + {"cpus": [3], "memories": [3]} + ] + } + ] + } + ] +} diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt new file mode 100644 index 00000000..28a2a319 --- /dev/null +++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.experiments.tf20.core + +import io.opentelemetry.api.metrics.MeterProvider +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.launch +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import org.opendc.simulator.compute.model.MemoryUnit +import org.opendc.simulator.compute.model.ProcessingNode +import org.opendc.simulator.compute.model.ProcessingUnit +import org.opendc.simulator.compute.power.LinearPowerModel +import org.opendc.simulator.core.runBlockingSimulation +import java.util.* + +/** + * Test suite for the [SimTFDevice] class. + */ +internal class SimTFDeviceTest { + @Test + fun testSmoke() = runBlockingSimulation { + val meterProvider: MeterProvider = MeterProvider.noop() + val meter = meterProvider.get("opendc-tf20") + + val puNode = ProcessingNode("NVIDIA", "Tesla V100", "unknown", 1) + val pu = ProcessingUnit(puNode, 0, 960 * 1230.0) + val memory = MemoryUnit("NVIDIA", "Tesla V100", 877.0, 32_000) + + val device = SimTFDevice(UUID.randomUUID(), isGpu = true, coroutineContext, clock, meter, pu, memory, LinearPowerModel(250.0, 100.0)) + + // Load 1 GiB into GPU memory + device.load(1000) + assertEquals(1140, clock.millis()) + + coroutineScope { + launch { device.compute(1e6) } + launch { device.compute(2e6) } + } + assertEquals(3681, clock.millis()) + } +} |
