diff options
| author | Dante Niewenhuis <d.niewenhuis@hotmail.com> | 2024-03-05 13:23:57 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-05 13:23:57 +0100 |
| commit | 5864cbcbfe2eb8c36ca05c3a39c7e5916aeecaec (patch) | |
| tree | 5b2773b8dc21c2e1b526fb70f829c376dd80532a /opendc-experiments/opendc-experiments-tf20/src | |
| parent | d28002a3c151d198298574312f32f1cb43f3a660 (diff) | |
Updated package versions, updated web server tests. (#207)
* Updated all package versions including kotlin. Updated all web-server tests to run.
* Changed the java version of the tests. OpenDC now only supports java 19.
* small update
* test update
* new update
* updated docker version to 19
* updated docker version to 19
Diffstat (limited to 'opendc-experiments/opendc-experiments-tf20/src')
21 files changed, 317 insertions, 284 deletions
diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt index be166bd5..78a63df8 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/Models.kt @@ -35,7 +35,7 @@ import org.opendc.experiments.tf20.keras.layer.regularization.Dropout /** * Construct an AlexNet model with the given batch size. */ -fun AlexNet(batchSize: Long): TrainableModel { +fun getAlexNet(batchSize: Long): TrainableModel { return Sequential( Input(batchSize, 227, 227, 3, name = "Input"), Conv2D(longArrayOf(11, 11, 3, 96), longArrayOf(1, 4, 4, 1), padding = ConvPadding.VALID, name = "conv1"), @@ -51,14 +51,14 @@ fun AlexNet(batchSize: Long): TrainableModel { Conv2D(longArrayOf(1, 1, 4096, 4096), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "fc7"), Dropout(0.5f, name = "dropout7"), Conv2D(longArrayOf(1, 1, 4096, 1000), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "f8"), - ActivationLayer(Activation.Softmax, name = "softmax") + ActivationLayer(Activation.Softmax, name = "softmax"), ) } /** * Construct an VGG16 model with the given batch size. */ -fun VGG16(batchSize: Long = 128): TrainableModel { +fun getVGG16(batchSize: Long = 128): TrainableModel { return Sequential( Input(batchSize, 224, 224, 3, name = "Input"), Conv2D(longArrayOf(3, 3, 3, 64), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "conv1-1"), @@ -84,6 +84,6 @@ fun VGG16(batchSize: Long = 128): TrainableModel { Conv2D(longArrayOf(1, 1, 4096, 4096), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "fc7"), Dropout(0.5f, name = "dropout7"), Conv2D(longArrayOf(1, 1, 4096, 1000), longArrayOf(1, 1, 1, 1), padding = ConvPadding.SAME, name = "f8"), - ActivationLayer(Activation.Softmax, name = "softmax") + ActivationLayer(Activation.Softmax, name = "softmax"), ) } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt index a1fc3fba..b14e499c 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/SimTFDevice.kt @@ -54,102 +54,107 @@ public class SimTFDevice( dispatcher: Dispatcher, pu: ProcessingUnit, private val memory: MemoryUnit, - powerModel: CpuPowerModel + powerModel: CpuPowerModel, ) : TFDevice { /** * The [SimMachine] representing the device. */ - private val machine = SimBareMetalMachine.create( - FlowEngine.create(dispatcher).newGraph(), - MachineModel(listOf(pu), listOf(memory)), - SimPsuFactories.simple(powerModel) - ) + private val machine = + SimBareMetalMachine.create( + FlowEngine.create(dispatcher).newGraph(), + MachineModel(listOf(pu), listOf(memory)), + SimPsuFactories.simple(powerModel), + ) /** * The workload that will be run by the device. */ - private val workload = object : SimWorkload, FlowStageLogic { - /** - * The [FlowStage] of the workload. - */ - var stage: FlowStage? = null - - /** - * The output of the workload. - */ - private var output: OutPort? = null - - /** - * The queue of work to run. - */ - val queue = ArrayDeque<Work>() - - /** - * A flag to indicate that the workload is idle. - */ - val isIdle - get() = activeWork == null - - /** - * The active work of the workload. - */ - private var activeWork: Work? = null - - /** - * The timestamp of the last pull. - */ - private var lastPull: Long = 0L - - override fun onStart(ctx: SimMachineContext) { - val stage = ctx.graph.newStage(this) - this.stage = stage - output = stage.getOutlet("out") - lastPull = ctx.graph.engine.clock.millis() - - ctx.graph.connect(output, ctx.cpus[0].input) - } - - override fun onStop(ctx: SimMachineContext) { - stage?.close() - stage = null - output = null - } - - override fun setOffset(now: Long) {} + private val workload = + object : SimWorkload, FlowStageLogic { + /** + * The [FlowStage] of the workload. + */ + var stage: FlowStage? = null + + /** + * The output of the workload. + */ + private var output: OutPort? = null + + /** + * The queue of work to run. + */ + val queue = ArrayDeque<Work>() + + /** + * A flag to indicate that the workload is idle. + */ + val isIdle + get() = activeWork == null + + /** + * The active work of the workload. + */ + private var activeWork: Work? = null + + /** + * The timestamp of the last pull. + */ + private var lastPull: Long = 0L + + override fun onStart(ctx: SimMachineContext) { + val stage = ctx.graph.newStage(this) + this.stage = stage + output = stage.getOutlet("out") + lastPull = ctx.graph.engine.clock.millis() + + ctx.graph.connect(output, ctx.cpus[0].input) + } - override fun snapshot(): SimWorkload = throw UnsupportedOperationException() + override fun onStop(ctx: SimMachineContext) { + stage?.close() + stage = null + output = null + } - override fun onUpdate(ctx: FlowStage, now: Long): Long { - val output = output ?: return Long.MAX_VALUE - val lastPull = lastPull - this.lastPull = now - val delta = (now - lastPull).coerceAtLeast(0) - val consumedWork = output.rate * delta / 1000.0 + override fun setOffset(now: Long) {} + + override fun snapshot(): SimWorkload = throw UnsupportedOperationException() + + override fun onUpdate( + ctx: FlowStage, + now: Long, + ): Long { + val output = output ?: return Long.MAX_VALUE + val lastPull = lastPull + this.lastPull = now + val delta = (now - lastPull).coerceAtLeast(0) + val consumedWork = output.rate * delta / 1000.0 + + val activeWork = activeWork + if (activeWork != null) { + if (activeWork.consume(consumedWork)) { + this.activeWork = null + } else { + val duration = ceil(activeWork.flops / output.capacity * 1000).toLong() + output.push(output.capacity) + return now + duration + } + } - val activeWork = activeWork - if (activeWork != null) { - if (activeWork.consume(consumedWork)) { - this.activeWork = null - } else { - val duration = ceil(activeWork.flops / output.capacity * 1000).toLong() + val queue = queue + val head = queue.poll() + return if (head != null) { + this.activeWork = head + val duration = (head.flops / output.capacity * 1000).roundToLong() output.push(output.capacity) - return now + duration + now + duration + } else { + output.push(0.0f) + Long.MAX_VALUE } } - - val queue = queue - val head = queue.poll() - return if (head != null) { - this.activeWork = head - val duration = (head.flops / output.capacity * 1000).roundToLong() - output.push(output.capacity) - now + duration - } else { - output.push(0.0f) - Long.MAX_VALUE - } } - } init { machine.startWorkload(workload, emptyMap()) {} @@ -160,12 +165,13 @@ public class SimTFDevice( delay(duration.toLong()) } - override suspend fun compute(flops: Double) = suspendCancellableCoroutine<Unit> { cont -> - workload.queue.add(Work(flops, cont)) - if (workload.isIdle) { - workload.stage?.invalidate() + override suspend fun compute(flops: Double) = + suspendCancellableCoroutine<Unit> { cont -> + workload.queue.add(Work(flops, cont)) + if (workload.isIdle) { + workload.stage?.invalidate() + } } - } override fun getDeviceStats(): TFDeviceStats { return TFDeviceStats(machine.cpuUsage, machine.psu.powerDraw, machine.psu.energyUsage) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt index 3fea44da..c40982f8 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/core/TFDeviceStats.kt @@ -32,5 +32,5 @@ package org.opendc.experiments.tf20.core data class TFDeviceStats( val resourceUsage: Double, val powerDraw: Double, - val energyUsage: Double + val energyUsage: Double, ) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/MirroredStrategy.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/MirroredStrategy.kt index 8caa7ec9..69d180a9 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/MirroredStrategy.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/MirroredStrategy.kt @@ -32,7 +32,11 @@ import org.opendc.experiments.tf20.core.TFDevice * It creates one replica per GPU device. Each variable in the model is mirrored across all the replicas. */ public class MirroredStrategy(val devices: List<TFDevice>) : Strategy { - override suspend fun run(forward: Double, backward: Double, batchSize: Int) = coroutineScope { + override suspend fun run( + forward: Double, + backward: Double, + batchSize: Int, + ) = coroutineScope { for (device in devices) { launch { device.compute(forward * batchSize / devices.size + backward) } } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/OneDeviceStrategy.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/OneDeviceStrategy.kt index 271fab98..05235b12 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/OneDeviceStrategy.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/OneDeviceStrategy.kt @@ -28,7 +28,11 @@ import org.opendc.experiments.tf20.core.TFDevice * A distribution [Strategy] that places all variables and computation on a single specified device. */ public class OneDeviceStrategy(val device: TFDevice) : Strategy { - override suspend fun run(forward: Double, backward: Double, batchSize: Int) { + override suspend fun run( + forward: Double, + backward: Double, + batchSize: Int, + ) { device.compute(forward * batchSize + backward) } } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/Strategy.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/Strategy.kt index 3e755b56..d5da628a 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/Strategy.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/distribute/Strategy.kt @@ -29,5 +29,9 @@ public interface Strategy { /** * Converge the specified batch using the given strategy. */ - public suspend fun run(forward: Double, backward: Double, batchSize: Int) + public suspend fun run( + forward: Double, + backward: Double, + batchSize: Int, + ) } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/TrainableModel.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/TrainableModel.kt index 2cac6cbc..2d621d16 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/TrainableModel.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/TrainableModel.kt @@ -110,7 +110,10 @@ public abstract class TrainableModel(vararg layers: Layer) : AutoCloseable { * @param [epochs] Number of epochs to train the model. An epoch is an iteration over the entire x and y data provided. * @param [batchSize] Number of samples per gradient update. */ - public suspend fun fit(epochs: Int = 5, batchSize: Int = 32) { + public suspend fun fit( + epochs: Int = 5, + batchSize: Int = 32, + ) { check(isCompiled) { "Model not yet compiled." } val forwardFlops = forward() diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/activations/Activation.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/activations/Activation.kt index 403acfc0..cb3b778e 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/activations/Activation.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/activations/Activation.kt @@ -194,5 +194,5 @@ public enum class Activation { * * @see <a href="https://arxiv.org/abs/1710.05941">Ramachandran et al., 2017</a> */ - Swish; + Swish, } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/Conv2D.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/Conv2D.kt index 74124bbd..f89c47c6 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/Conv2D.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/Conv2D.kt @@ -35,13 +35,12 @@ import kotlin.math.ceil * Finally, if `activation` is applied to the outputs as well. */ public class Conv2D( - public val filter: LongArray = LongArray(4), // [H, W, channel_in, channel_out] - public val strides: LongArray = LongArray(4), // [1, stride_h, stride_w, 1] + public val filter: LongArray = LongArray(4), + public val strides: LongArray = LongArray(4), public val activation: Activation = Activation.Relu, public val padding: ConvPadding = ConvPadding.VALID, - name: String = "" + name: String = "", ) : Layer(name) { - private var padHeight: Double = 0.0 private var padWidth: Double = 0.0 diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/ConvPadding.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/ConvPadding.kt index 03ae6282..a47c435a 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/ConvPadding.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/conv/ConvPadding.kt @@ -35,5 +35,5 @@ public enum class ConvPadding { /** * No padding. */ - VALID + VALID, } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/ActivationLayer.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/ActivationLayer.kt index 60b0f754..000401b9 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/ActivationLayer.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/core/ActivationLayer.kt @@ -31,9 +31,8 @@ import org.opendc.experiments.tf20.keras.shape.TensorShape */ public class ActivationLayer( public val activation: Activation = Activation.Relu, - name: String = "" + name: String = "", ) : Layer(name) { - override fun build(inputShape: TensorShape) { // Intentionally left empty } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/pool/Pool2D.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/pool/Pool2D.kt index 3c6b15bb..a9a54938 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/pool/Pool2D.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/pool/Pool2D.kt @@ -40,9 +40,8 @@ public class Pool2D( public val poolSize: IntArray = intArrayOf(1, 2, 2, 1), public val strides: IntArray = intArrayOf(1, 2, 2, 1), public val padding: ConvPadding = ConvPadding.VALID, - name: String + name: String, ) : Layer(name) { - private var padHeight = 0L private var padWidth = 0L diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/regularization/Dropout.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/regularization/Dropout.kt index ff5f7711..8198f98c 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/regularization/Dropout.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/layer/regularization/Dropout.kt @@ -38,7 +38,7 @@ import org.opendc.experiments.tf20.keras.shape.TensorShape */ public class Dropout( public val keepProbability: Float = 0.1f, - name: String + name: String, ) : Layer(name) { override fun build(inputShape: TensorShape) {} diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/shape/TensorShape.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/shape/TensorShape.kt index 7affcb63..67e00e24 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/shape/TensorShape.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/keras/shape/TensorShape.kt @@ -33,7 +33,7 @@ public class TensorShape(vararg dims: Long) { /** * The dimensions of the tensor represented as [LongArray]. */ - private val _dims: LongArray = dims + private val localDims: LongArray = dims /** * Return amount of elements in Tensor with the given shape. @@ -42,7 +42,7 @@ public class TensorShape(vararg dims: Long) { get() { var prod = 1L for (i in 0 until rank) { - prod *= abs(_dims[i]) + prod *= abs(localDims[i]) } return prod } @@ -51,7 +51,7 @@ public class TensorShape(vararg dims: Long) { * Returns the rank of this shape. */ public val rank: Int - get() = _dims.size + get() = localDims.size /** * Returns the value of a dimension @@ -60,7 +60,7 @@ public class TensorShape(vararg dims: Long) { * @return The size of dimension i */ public operator fun get(i: Int): Long { - return _dims[i] + return localDims[i] } /** @@ -70,7 +70,7 @@ public class TensorShape(vararg dims: Long) { * @return Whether dimension i is unknown (equal to -1) */ private fun isKnown(i: Int): Boolean { - return _dims[i] != -1L + return localDims[i] != -1L } /** @@ -80,21 +80,21 @@ public class TensorShape(vararg dims: Long) { * @return The size of dimension i */ public fun size(i: Int): Long { - return _dims[i] + return localDims[i] } /** * Clone the [TensorShape] and return a new instance. */ public fun clone(): TensorShape { - return TensorShape(*_dims) + return TensorShape(*localDims) } /** * Create a string representation of this [TensorShape]. */ override fun toString(): String { - return _dims.contentToString().replace("-1", "None") + return localDims.contentToString().replace("-1", "None") } override fun equals(other: Any?): Boolean { @@ -103,12 +103,12 @@ public class TensorShape(vararg dims: Long) { other as TensorShape - if (!_dims.contentEquals(other._dims)) return false + if (!localDims.contentEquals(other.localDims)) return false return true } override fun hashCode(): Int { - return _dims.contentHashCode() + return localDims.contentHashCode() } } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt index d6360873..fddcc779 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/Message.kt @@ -35,5 +35,5 @@ public data class Message( val to: NetworkNode, val type: MessageType, val dataSize: Long, - val iterations: Int + val iterations: Int, ) diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt index 8be16261..d7130137 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/MessageType.kt @@ -27,5 +27,5 @@ package org.opendc.experiments.tf20.network */ public enum class MessageType { REQUEST, - WEIGHTS + WEIGHTS, } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt index 5b408fb3..a4e79b4e 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/network/NetworkController.kt @@ -63,7 +63,11 @@ public class NetworkController(dispatcher: Dispatcher) : AutoCloseable { /** * Add a connection between two links. */ - public fun addConnection(node1: NetworkNode, node2: NetworkNode, bandwidth: Long) { + public fun addConnection( + node1: NetworkNode, + node2: NetworkNode, + bandwidth: Long, + ) { bandwidthMatrix[Pair(node1, node2)] = bandwidth } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MLEnvironmentReader.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MLEnvironmentReader.kt index 2a7578b3..077bcc04 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MLEnvironmentReader.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MLEnvironmentReader.kt @@ -53,56 +53,58 @@ public class MLEnvironmentReader { var isGpuFlag = true var maxPower = 350.0 var minPower = 200.0 - val cores = machine.cpus.flatMap { id -> - when (id) { - 1 -> { - // ref: https://www.guru3d.com/articles-pages/nvidia-geforce-gtx-titan-x-review,8.html#:~:text=GeForce%20GTX%20Titan%20X%20%2D%20On,power%20supply%20unit%20as%20minimum. - maxPower = 334.0 - minPower = 90.0 - val node = ProcessingNode("NVidia", "TITAN X", "Pascal", 4992) - List(node.coreCount) { ProcessingUnit(node, it, 824.0) } + val cores = + machine.cpus.flatMap { id -> + when (id) { + 1 -> { + // ref: https://www.guru3d.com/articles-pages/nvidia-geforce-gtx-titan-x-review,8.html#:~:text=GeForce%20GTX%20Titan%20X%20%2D%20On,power%20supply%20unit%20as%20minimum. + maxPower = 334.0 + minPower = 90.0 + val node = ProcessingNode("NVidia", "TITAN X", "Pascal", 4992) + List(node.coreCount) { ProcessingUnit(node, it, 824.0) } + } + 2 -> { + // ref: https://www.microway.com/hpc-tech-tips/nvidia-tesla-p100-pci-e-16gb-gpu-accelerator-pascal-gp100-close/ + maxPower = 250.0 + minPower = 125.0 + val node = ProcessingNode("NVIDIA", "Tesla P100", "Pascal", 3584) + List(node.coreCount) { ProcessingUnit(node, it, 1190.0) } + } + 3 -> { + // ref: https://www.anandtech.com/show/10923/openpower-saga-tyans-1u-power8-gt75/7 + minPower = 84.0 + maxPower = 135.0 + val node = ProcessingNode("Intel", "E5-2690v3 Haswell24", "amd64", 24) + isGpuFlag = false + List(node.coreCount) { ProcessingUnit(node, it, 3498.0) } + } + 4 -> { + minPower = 130.0 + maxPower = 190.0 + val node = ProcessingNode("IBM", "POWER8", "RISC", 10) + isGpuFlag = false + List(node.coreCount) { ProcessingUnit(node, it, 143000.0) } // 28600.0 3690 + } + else -> throw IllegalArgumentException("The cpu id $id is not recognized") } - 2 -> { - // ref: https://www.microway.com/hpc-tech-tips/nvidia-tesla-p100-pci-e-16gb-gpu-accelerator-pascal-gp100-close/ - maxPower = 250.0 - minPower = 125.0 - val node = ProcessingNode("NVIDIA", "Tesla P100", "Pascal", 3584) - List(node.coreCount) { ProcessingUnit(node, it, 1190.0) } - } - 3 -> { - // ref: https://www.anandtech.com/show/10923/openpower-saga-tyans-1u-power8-gt75/7 - minPower = 84.0 - maxPower = 135.0 - val node = ProcessingNode("Intel", "E5-2690v3 Haswell24", "amd64", 24) - isGpuFlag = false - List(node.coreCount) { ProcessingUnit(node, it, 3498.0) } - } - 4 -> { - minPower = 130.0 - maxPower = 190.0 - val node = ProcessingNode("IBM", "POWER8", "RISC", 10) - isGpuFlag = false - List(node.coreCount) { ProcessingUnit(node, it, 143000.0) } // 28600.0 3690 - } - else -> throw IllegalArgumentException("The cpu id $id is not recognized") } - } - val memories = machine.memories.map { id -> - when (id) { - 1 -> MemoryUnit("NVidia", "GDDR5X", 480.0, 24L) - 2 -> MemoryUnit("NVidia", "GDDR5X", 720.0, 16L) - 3 -> MemoryUnit("IBM", "GDDR5X", 115.0, 160L) - 4 -> MemoryUnit("Inter", "GDDR5X", 68.0, 512L) - else -> throw IllegalArgumentException("The cpu id $id is not recognized") + val memories = + machine.memories.map { id -> + when (id) { + 1 -> MemoryUnit("NVidia", "GDDR5X", 480.0, 24L) + 2 -> MemoryUnit("NVidia", "GDDR5X", 720.0, 16L) + 3 -> MemoryUnit("IBM", "GDDR5X", 115.0, 160L) + 4 -> MemoryUnit("Inter", "GDDR5X", 68.0, 512L) + else -> throw IllegalArgumentException("The cpu id $id is not recognized") + } } - } MachineDef( UUID(0, counter.toLong()), "node-${counter++}", mapOf("gpu" to isGpuFlag), MachineModel(cores, memories), - CpuPowerModels.linear(maxPower, minPower) + CpuPowerModels.linear(maxPower, minPower), ) } } diff --git a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MachineDef.kt b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MachineDef.kt index 6b72e155..7ff91797 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MachineDef.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/main/kotlin/org/opendc/experiments/tf20/util/MachineDef.kt @@ -34,5 +34,5 @@ public data class MachineDef( val name: String, val meta: Map<String, Any>, val model: MachineModel, - val powerModel: CpuPowerModel + val powerModel: CpuPowerModel, ) diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt index 899aafc0..e3814175 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/TensorFlowTest.kt @@ -41,114 +41,121 @@ class TensorFlowTest { * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC. */ @Test - fun testSmokeAlexNet() = runSimulation { - val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json")) - val def = MLEnvironmentReader().readEnvironment(envInput).first() - - val device = SimTFDevice( - def.uid, - def.meta["gpu"] as Boolean, - dispatcher, - def.model.cpus[0], - def.model.memory[0], - CpuPowerModels.linear(250.0, 60.0) - ) - val strategy = OneDeviceStrategy(device) - val batchSize = 32 - val model = AlexNet(batchSize.toLong()) - model.use { - it.compile(strategy) - - it.fit(epochs = 9088 / batchSize, batchSize = batchSize) + fun testSmokeAlexNet() = + runSimulation { + val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json")) + val def = MLEnvironmentReader().readEnvironment(envInput).first() + + val device = + SimTFDevice( + def.uid, + def.meta["gpu"] as Boolean, + dispatcher, + def.model.cpus[0], + def.model.memory[0], + CpuPowerModels.linear(250.0, 60.0), + ) + val strategy = OneDeviceStrategy(device) + val batchSize = 32 + val model = getAlexNet(batchSize.toLong()) + model.use { + it.compile(strategy) + + it.fit(epochs = 9088 / batchSize, batchSize = batchSize) + } + + device.close() + + val stats = device.getDeviceStats() + assertAll( + { assertEquals(3309694252, timeSource.millis()) }, + { assertEquals(8.27423563E8, stats.energyUsage) }, + ) } - device.close() - - val stats = device.getDeviceStats() - assertAll( - { assertEquals(3309694252, timeSource.millis()) }, - { assertEquals(8.27423563E8, stats.energyUsage) } - ) - } - /** * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC. */ @Test - fun testSmokeVGG() = runSimulation { - val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json")) - val def = MLEnvironmentReader().readEnvironment(envInput).first() - - val device = SimTFDevice( - def.uid, - def.meta["gpu"] as Boolean, - dispatcher, - def.model.cpus[0], - def.model.memory[0], - CpuPowerModels.linear(250.0, 60.0) - ) - val strategy = OneDeviceStrategy(device) - val batchSize = 128 - val model = VGG16(batchSize.toLong()) - model.use { - it.compile(strategy) - - it.fit(epochs = 9088 / batchSize, batchSize = batchSize) + fun testSmokeVGG() = + runSimulation { + val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json")) + val def = MLEnvironmentReader().readEnvironment(envInput).first() + + val device = + SimTFDevice( + def.uid, + def.meta["gpu"] as Boolean, + dispatcher, + def.model.cpus[0], + def.model.memory[0], + CpuPowerModels.linear(250.0, 60.0), + ) + val strategy = OneDeviceStrategy(device) + val batchSize = 128 + val model = getVGG16(batchSize.toLong()) + model.use { + it.compile(strategy) + + it.fit(epochs = 9088 / batchSize, batchSize = batchSize) + } + + device.close() + + val stats = device.getDeviceStats() + assertAll( + { assertEquals(176230328513, timeSource.millis()) }, + { assertEquals(4.405758212825E10, stats.energyUsage) }, + ) } - device.close() - - val stats = device.getDeviceStats() - assertAll( - { assertEquals(176230328513, timeSource.millis()) }, - { assertEquals(4.405758212825E10, stats.energyUsage) } - ) - } - /** * Smoke test that tests the capabilities of the TensorFlow application model in OpenDC. */ @Test - fun testSmokeDistribute() = runSimulation { - val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json")) - val def = MLEnvironmentReader().readEnvironment(envInput).first() - - val deviceA = SimTFDevice( - def.uid, - def.meta["gpu"] as Boolean, - dispatcher, - def.model.cpus[0], - def.model.memory[0], - CpuPowerModels.linear(250.0, 60.0) - ) - - val deviceB = SimTFDevice( - UUID.randomUUID(), - def.meta["gpu"] as Boolean, - dispatcher, - def.model.cpus[0], - def.model.memory[0], - CpuPowerModels.linear(250.0, 60.0) - ) - - val strategy = MirroredStrategy(listOf(deviceA, deviceB)) - val batchSize = 32 - val model = AlexNet(batchSize.toLong()) - model.use { - it.compile(strategy) - - it.fit(epochs = 9088 / batchSize, batchSize = batchSize) + fun testSmokeDistribute() = + runSimulation { + val envInput = checkNotNull(TensorFlowTest::class.java.getResourceAsStream("/kth.json")) + val def = MLEnvironmentReader().readEnvironment(envInput).first() + + val deviceA = + SimTFDevice( + def.uid, + def.meta["gpu"] as Boolean, + dispatcher, + def.model.cpus[0], + def.model.memory[0], + CpuPowerModels.linear(250.0, 60.0), + ) + + val deviceB = + SimTFDevice( + UUID.randomUUID(), + def.meta["gpu"] as Boolean, + dispatcher, + def.model.cpus[0], + def.model.memory[0], + CpuPowerModels.linear(250.0, 60.0), + ) + + val strategy = MirroredStrategy(listOf(deviceA, deviceB)) + val batchSize = 32 + val model = getAlexNet(batchSize.toLong()) + model.use { + it.compile(strategy) + + it.fit(epochs = 9088 / batchSize, batchSize = batchSize) + } + + deviceA.close() + deviceB.close() + + val statsA = deviceA.getDeviceStats() + val statsB = deviceB.getDeviceStats() + assertAll( + { assertEquals(1704994000, timeSource.millis()) }, + { assertEquals(4.262485E8, statsA.energyUsage) }, + { assertEquals(4.262485E8, statsB.energyUsage) }, + ) } - - deviceA.close() - deviceB.close() - - val statsA = deviceA.getDeviceStats() - val statsB = deviceB.getDeviceStats() - assertAll( - { assertEquals(1704994000, timeSource.millis()) }, - { assertEquals(4.262485E8, statsA.energyUsage) }, - { assertEquals(4.262485E8, statsB.energyUsage) } - ) - } } diff --git a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt index 549c6f3e..76473868 100644 --- a/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt +++ b/opendc-experiments/opendc-experiments-tf20/src/test/kotlin/org/opendc/experiments/tf20/core/SimTFDeviceTest.kt @@ -39,36 +39,38 @@ import java.util.UUID */ internal class SimTFDeviceTest { @Test - fun testSmoke() = runSimulation { - val puNode = ProcessingNode("NVIDIA", "Tesla V100", "unknown", 1) - val pu = ProcessingUnit(puNode, 0, 960 * 1230.0) - val memory = MemoryUnit("NVIDIA", "Tesla V100", 877.0, 32_000) + fun testSmoke() = + runSimulation { + val puNode = ProcessingNode("NVIDIA", "Tesla V100", "unknown", 1) + val pu = ProcessingUnit(puNode, 0, 960 * 1230.0) + val memory = MemoryUnit("NVIDIA", "Tesla V100", 877.0, 32_000) - val device = SimTFDevice( - UUID.randomUUID(), - isGpu = true, - dispatcher, - pu, - memory, - CpuPowerModels.linear(250.0, 100.0) - ) + val device = + SimTFDevice( + UUID.randomUUID(), + isGpu = true, + dispatcher, + pu, + memory, + CpuPowerModels.linear(250.0, 100.0), + ) - // Load 1 GiB into GPU memory - device.load(1000) - assertEquals(1140, timeSource.millis()) + // Load 1 GiB into GPU memory + device.load(1000) + assertEquals(1140, timeSource.millis()) - coroutineScope { - launch { device.compute(1e6) } - launch { device.compute(2e6) } - } + coroutineScope { + launch { device.compute(1e6) } + launch { device.compute(2e6) } + } - device.close() + device.close() - val stats = device.getDeviceStats() + val stats = device.getDeviceStats() - assertAll( - { assertEquals(3681, timeSource.millis()) }, - { assertEquals(749.25, stats.energyUsage) } - ) - } + assertAll( + { assertEquals(3681, timeSource.millis()) }, + { assertEquals(749.25, stats.energyUsage) }, + ) + } } |
