From be34a55c2c2fe94a6883c6b97d2abe4c43288e8a Mon Sep 17 00:00:00 2001
From: Fabian Mastenbroek <mail.fabianm@gmail.com>
Date: Wed, 23 Jun 2021 16:54:31 +0200
Subject: format: Remove performance interference from trace readers

This change updates the trace reader implementation to remove their
dependency on the performance interference model. In a future commit, we
will instead pass the performance interference model via the
host/hypervisor.
---
 .../experiments/capelin/ExperimentHelpers.kt       |  49 +-
 .../org/opendc/experiments/capelin/Portfolio.kt    |  34 +-
 .../capelin/env/ClusterEnvironmentReader.kt        | 122 ++++
 .../capelin/trace/ParquetTraceReader.kt            |  65 +++
 .../capelin/trace/RawParquetTraceReader.kt         | 137 +++++
 .../capelin/trace/Sc20ParquetTraceReader.kt        |  83 ---
 .../capelin/trace/Sc20RawParquetTraceReader.kt     | 149 -----
 .../trace/Sc20StreamingParquetTraceReader.kt       | 283 ----------
 .../capelin/trace/Sc20TraceConverter.kt            | 621 ---------------------
 .../capelin/trace/StreamingParquetTraceReader.kt   | 261 +++++++++
 .../experiments/capelin/trace/TraceConverter.kt    | 620 ++++++++++++++++++++
 .../experiments/capelin/trace/VmPlacementReader.kt |  52 ++
 12 files changed, 1282 insertions(+), 1194 deletions(-)
 create mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/env/ClusterEnvironmentReader.kt
 create mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/ParquetTraceReader.kt
 create mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/RawParquetTraceReader.kt
 delete mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt
 delete mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt
 delete mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt
 delete mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt
 create mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/StreamingParquetTraceReader.kt
 create mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/TraceConverter.kt
 create mode 100644 opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/VmPlacementReader.kt

(limited to 'opendc-experiments/opendc-experiments-capelin/src/main')

diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt
index 47f5f71e..06251dd3 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/ExperimentHelpers.kt
@@ -41,10 +41,8 @@ import org.opendc.compute.service.scheduler.ComputeScheduler
 import org.opendc.compute.simulator.SimHost
 import org.opendc.experiments.capelin.monitor.ExperimentMetricExporter
 import org.opendc.experiments.capelin.monitor.ExperimentMonitor
-import org.opendc.experiments.capelin.trace.Sc20StreamingParquetTraceReader
 import org.opendc.format.environment.EnvironmentReader
 import org.opendc.format.trace.TraceReader
-import org.opendc.simulator.compute.interference.PerformanceInterferenceModel
 import org.opendc.simulator.compute.kernel.SimFairShareHypervisorProvider
 import org.opendc.simulator.compute.workload.SimTraceWorkload
 import org.opendc.simulator.compute.workload.SimWorkload
@@ -53,7 +51,6 @@ import org.opendc.simulator.failures.FaultInjector
 import org.opendc.simulator.resources.SimResourceInterpreter
 import org.opendc.telemetry.sdk.metrics.export.CoroutineMetricReader
 import org.opendc.telemetry.sdk.toOtelClock
-import java.io.File
 import java.time.Clock
 import kotlin.coroutines.resume
 import kotlin.math.ln
@@ -68,7 +65,7 @@ private val logger = KotlinLogging.logger {}
 /**
  * Construct the failure domain for the experiments.
  */
-public fun createFailureDomain(
+fun createFailureDomain(
     coroutineScope: CoroutineScope,
     clock: Clock,
     seed: Int,
@@ -100,7 +97,7 @@ public fun createFailureDomain(
 /**
  * Obtain the [FaultInjector] to use for the experiments.
  */
-public fun createFaultInjector(
+fun createFaultInjector(
     coroutineScope: CoroutineScope,
     clock: Clock,
     random: Random,
@@ -118,27 +115,10 @@ public fun createFaultInjector(
     )
 }
 
-/**
- * Create the trace reader from which the VM workloads are read.
- */
-public fun createTraceReader(
-    path: File,
-    performanceInterferenceModel: PerformanceInterferenceModel,
-    vms: List<String>,
-    seed: Int
-): Sc20StreamingParquetTraceReader {
-    return Sc20StreamingParquetTraceReader(
-        path,
-        performanceInterferenceModel,
-        vms,
-        Random(seed)
-    )
-}
-
 /**
  * Construct the environment for a simulated compute service..
  */
-public suspend fun withComputeService(
+suspend fun withComputeService(
     clock: Clock,
     meterProvider: MeterProvider,
     environmentReader: EnvironmentReader,
@@ -182,15 +162,13 @@ public suspend fun withComputeService(
  * Attach the specified monitor to the VM provisioner.
  */
 @OptIn(ExperimentalCoroutinesApi::class)
-public suspend fun withMonitor(
+suspend fun withMonitor(
     monitor: ExperimentMonitor,
     clock: Clock,
     metricProducer: MetricProducer,
     scheduler: ComputeService,
     block: suspend CoroutineScope.() -> Unit
 ): Unit = coroutineScope {
-    val monitorJobs = mutableSetOf<Job>()
-
     // Monitor host events
     for (host in scheduler.hosts) {
         monitor.reportHostStateChange(clock.millis(), host, HostState.UP)
@@ -211,24 +189,23 @@ public suspend fun withMonitor(
     try {
         block(this)
     } finally {
-        monitorJobs.forEach(Job::cancel)
         reader.close()
         monitor.close()
     }
 }
 
-public class ComputeMetrics {
-    public var submittedVms: Int = 0
-    public var queuedVms: Int = 0
-    public var runningVms: Int = 0
-    public var unscheduledVms: Int = 0
-    public var finishedVms: Int = 0
+class ComputeMetrics {
+    var submittedVms: Int = 0
+    var queuedVms: Int = 0
+    var runningVms: Int = 0
+    var unscheduledVms: Int = 0
+    var finishedVms: Int = 0
 }
 
 /**
  * Collect the metrics of the compute service.
  */
-public fun collectMetrics(metricProducer: MetricProducer): ComputeMetrics {
+fun collectMetrics(metricProducer: MetricProducer): ComputeMetrics {
     val metrics = metricProducer.collectAllMetrics().associateBy { it.name }
     val res = ComputeMetrics()
     try {
@@ -247,7 +224,7 @@ public fun collectMetrics(metricProducer: MetricProducer): ComputeMetrics {
 /**
  * Process the trace.
  */
-public suspend fun processTrace(
+suspend fun processTrace(
     clock: Clock,
     reader: TraceReader<SimWorkload>,
     scheduler: ComputeService,
@@ -306,7 +283,7 @@ public suspend fun processTrace(
 /**
  * Create a [MeterProvider] instance for the experiment.
  */
-public fun createMeterProvider(clock: Clock): MeterProvider {
+fun createMeterProvider(clock: Clock): MeterProvider {
     val powerSelector = InstrumentSelector.builder()
         .setInstrumentNameRegex("power\\.usage")
         .setInstrumentType(InstrumentType.VALUE_RECORDER)
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
index b70eefb2..460da303 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/Portfolio.kt
@@ -32,29 +32,27 @@ import org.opendc.compute.service.scheduler.*
 import org.opendc.compute.service.scheduler.filters.ComputeCapabilitiesFilter
 import org.opendc.compute.service.scheduler.filters.ComputeFilter
 import org.opendc.compute.service.scheduler.weights.*
+import org.opendc.experiments.capelin.env.ClusterEnvironmentReader
 import org.opendc.experiments.capelin.model.CompositeWorkload
 import org.opendc.experiments.capelin.model.OperationalPhenomena
 import org.opendc.experiments.capelin.model.Topology
 import org.opendc.experiments.capelin.model.Workload
 import org.opendc.experiments.capelin.monitor.ParquetExperimentMonitor
-import org.opendc.experiments.capelin.trace.Sc20ParquetTraceReader
-import org.opendc.experiments.capelin.trace.Sc20RawParquetTraceReader
-import org.opendc.format.environment.sc20.Sc20ClusterEnvironmentReader
-import org.opendc.format.trace.PerformanceInterferenceModelReader
+import org.opendc.experiments.capelin.trace.ParquetTraceReader
+import org.opendc.experiments.capelin.trace.RawParquetTraceReader
 import org.opendc.harness.dsl.Experiment
 import org.opendc.harness.dsl.anyOf
 import org.opendc.simulator.core.runBlockingSimulation
 import java.io.File
 import java.util.*
 import java.util.concurrent.ConcurrentHashMap
-import kotlin.random.asKotlinRandom
 
 /**
  * A portfolio represents a collection of scenarios are tested for the work.
  *
  * @param name The name of the portfolio.
  */
-public abstract class Portfolio(name: String) : Experiment(name) {
+abstract class Portfolio(name: String) : Experiment(name) {
     /**
      * The logger for this portfolio instance.
      */
@@ -70,35 +68,30 @@ public abstract class Portfolio(name: String) : Experiment(name) {
      */
     private val vmPlacements by anyOf(emptyMap<String, String>())
 
-    /**
-     * The path to the performance interference model.
-     */
-    private val performanceInterferenceModel by anyOf<PerformanceInterferenceModelReader?>(null)
-
     /**
      * The topology to test.
      */
-    public abstract val topology: Topology
+    abstract val topology: Topology
 
     /**
      * The workload to test.
      */
-    public abstract val workload: Workload
+    abstract val workload: Workload
 
     /**
      * The operational phenomenas to consider.
      */
-    public abstract val operationalPhenomena: OperationalPhenomena
+    abstract val operationalPhenomena: OperationalPhenomena
 
     /**
      * The allocation policies to consider.
      */
-    public abstract val allocationPolicy: String
+    abstract val allocationPolicy: String
 
     /**
      * A map of trace readers.
      */
-    private val traceReaders = ConcurrentHashMap<String, Sc20RawParquetTraceReader>()
+    private val traceReaders = ConcurrentHashMap<String, RawParquetTraceReader>()
 
     /**
      * Perform a single trial for this portfolio.
@@ -106,7 +99,7 @@ public abstract class Portfolio(name: String) : Experiment(name) {
     @OptIn(ExperimentalCoroutinesApi::class)
     override fun doRun(repeat: Int): Unit = runBlockingSimulation {
         val seeder = Random(repeat.toLong())
-        val environment = Sc20ClusterEnvironmentReader(File(config.getString("env-path"), "${topology.name}.txt"))
+        val environment = ClusterEnvironmentReader(File(config.getString("env-path"), "${topology.name}.txt"))
 
         val chan = Channel<Unit>(Channel.CONFLATED)
         val allocationPolicy = createComputeScheduler(seeder)
@@ -122,14 +115,11 @@ public abstract class Portfolio(name: String) : Experiment(name) {
         val rawReaders = workloadNames.map { workloadName ->
             traceReaders.computeIfAbsent(workloadName) {
                 logger.info { "Loading trace $workloadName" }
-                Sc20RawParquetTraceReader(File(config.getString("trace-path"), workloadName))
+                RawParquetTraceReader(File(config.getString("trace-path"), workloadName))
             }
         }
 
-        val performanceInterferenceModel = performanceInterferenceModel
-            ?.takeIf { operationalPhenomena.hasInterference }
-            ?.construct(seeder.asKotlinRandom()) ?: emptyMap()
-        val trace = Sc20ParquetTraceReader(rawReaders, performanceInterferenceModel, workload, seeder.nextInt())
+        val trace = ParquetTraceReader(rawReaders, workload, seeder.nextInt())
 
         val monitor = ParquetExperimentMonitor(
             File(config.getString("output-path")),
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/env/ClusterEnvironmentReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/env/ClusterEnvironmentReader.kt
new file mode 100644
index 00000000..d73d14f5
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/env/ClusterEnvironmentReader.kt
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.capelin.env
+
+import org.opendc.format.environment.EnvironmentReader
+import org.opendc.format.environment.MachineDef
+import org.opendc.simulator.compute.model.MachineModel
+import org.opendc.simulator.compute.model.MemoryUnit
+import org.opendc.simulator.compute.model.ProcessingNode
+import org.opendc.simulator.compute.model.ProcessingUnit
+import org.opendc.simulator.compute.power.LinearPowerModel
+import java.io.File
+import java.io.FileInputStream
+import java.io.InputStream
+import java.util.*
+
+/**
+ * A [EnvironmentReader] for the internal environment format.
+ *
+ * @param input The input stream describing the physical cluster.
+ */
+class ClusterEnvironmentReader(private val input: InputStream) : EnvironmentReader {
+    /**
+     * Construct a [ClusterEnvironmentReader] for the specified [file].
+     */
+    constructor(file: File) : this(FileInputStream(file))
+
+    override fun read(): List<MachineDef> {
+        var clusterIdCol = 0
+        var speedCol = 0
+        var numberOfHostsCol = 0
+        var memoryPerHostCol = 0
+        var coresPerHostCol = 0
+
+        var clusterIdx = 0
+        var clusterId: String
+        var speed: Double
+        var numberOfHosts: Int
+        var memoryPerHost: Long
+        var coresPerHost: Int
+
+        val nodes = mutableListOf<MachineDef>()
+        val random = Random(0)
+
+        input.bufferedReader().use { reader ->
+            reader.lineSequence()
+                .filter { line ->
+                    // Ignore comments in the file
+                    !line.startsWith("#") && line.isNotBlank()
+                }
+                .forEachIndexed { idx, line ->
+                    val values = line.split(";")
+
+                    if (idx == 0) {
+                        val header = values.mapIndexed { col, name -> Pair(name.trim(), col) }.toMap()
+                        clusterIdCol = header["ClusterID"]!!
+                        speedCol = header["Speed"]!!
+                        numberOfHostsCol = header["numberOfHosts"]!!
+                        memoryPerHostCol = header["memoryCapacityPerHost"]!!
+                        coresPerHostCol = header["coreCountPerHost"]!!
+                        return@forEachIndexed
+                    }
+
+                    clusterIdx++
+                    clusterId = values[clusterIdCol].trim()
+                    speed = values[speedCol].trim().toDouble() * 1000.0
+                    numberOfHosts = values[numberOfHostsCol].trim().toInt()
+                    memoryPerHost = values[memoryPerHostCol].trim().toLong() * 1000L
+                    coresPerHost = values[coresPerHostCol].trim().toInt()
+
+                    val unknownProcessingNode = ProcessingNode("unknown", "unknown", "unknown", coresPerHost)
+                    val unknownMemoryUnit = MemoryUnit("unknown", "unknown", -1.0, memoryPerHost)
+
+                    repeat(numberOfHosts) {
+                        nodes.add(
+                            MachineDef(
+                                UUID(random.nextLong(), random.nextLong()),
+                                "node-$clusterId-$it",
+                                mapOf("cluster" to clusterId),
+                                MachineModel(
+                                    List(coresPerHost) { coreId ->
+                                        ProcessingUnit(unknownProcessingNode, coreId, speed)
+                                    },
+                                    listOf(unknownMemoryUnit)
+                                ),
+                                // For now we assume a simple linear load model with an idle draw of ~200W and a maximum
+                                // power draw of 350W.
+                                // Source: https://stackoverflow.com/questions/6128960
+                                LinearPowerModel(350.0, idlePower = 200.0)
+                            )
+                        )
+                    }
+                }
+        }
+
+        return nodes
+    }
+
+    override fun close() {
+        input.close()
+    }
+}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/ParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/ParquetTraceReader.kt
new file mode 100644
index 00000000..2ebe65ea
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/ParquetTraceReader.kt
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.capelin.trace
+
+import org.opendc.experiments.capelin.model.CompositeWorkload
+import org.opendc.experiments.capelin.model.Workload
+import org.opendc.format.trace.TraceEntry
+import org.opendc.format.trace.TraceReader
+import org.opendc.simulator.compute.workload.SimWorkload
+
+/**
+ * A [TraceReader] for the internal VM workload trace format.
+ *
+ * @param rawReaders The raw trace readers to use..
+ * @param workload The workload to use.
+ * @param seed The seed to use for workload sampling.
+ */
+class ParquetTraceReader(
+    rawReaders: List<RawParquetTraceReader>,
+    workload: Workload,
+    seed: Int
+) : TraceReader<SimWorkload> {
+    /**
+     * The iterator over the actual trace.
+     */
+    private val iterator: Iterator<TraceEntry<SimWorkload>> =
+        rawReaders
+            .map { it.read() }
+            .run {
+                if (workload is CompositeWorkload) {
+                    this.zip(workload.workloads)
+                } else {
+                    this.zip(listOf(workload))
+                }
+            }
+            .map { sampleWorkload(it.first, workload, it.second, seed) }
+            .flatten()
+            .iterator()
+
+    override fun hasNext(): Boolean = iterator.hasNext()
+
+    override fun next(): TraceEntry<SimWorkload> = iterator.next()
+
+    override fun close() {}
+}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/RawParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/RawParquetTraceReader.kt
new file mode 100644
index 00000000..94193780
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/RawParquetTraceReader.kt
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2020 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.capelin.trace
+
+import org.apache.avro.generic.GenericData
+import org.opendc.format.trace.TraceEntry
+import org.opendc.format.trace.TraceReader
+import org.opendc.format.util.LocalParquetReader
+import org.opendc.simulator.compute.workload.SimTraceWorkload
+import org.opendc.simulator.compute.workload.SimWorkload
+import java.io.File
+import java.util.UUID
+
+/**
+ * A [TraceReader] for the internal VM workload trace format.
+ *
+ * @param path The directory of the traces.
+ */
+class RawParquetTraceReader(private val path: File) {
+    /**
+     * Read the fragments into memory.
+     */
+    private fun parseFragments(path: File): Map<String, List<SimTraceWorkload.Fragment>> {
+        val reader = LocalParquetReader<GenericData.Record>(File(path, "trace.parquet"))
+
+        val fragments = mutableMapOf<String, MutableList<SimTraceWorkload.Fragment>>()
+
+        return try {
+            while (true) {
+                val record = reader.read() ?: break
+
+                val id = record["id"].toString()
+                val duration = record["duration"] as Long
+                val cores = record["cores"] as Int
+                val cpuUsage = record["cpuUsage"] as Double
+
+                val fragment = SimTraceWorkload.Fragment(
+                    duration,
+                    cpuUsage,
+                    cores
+                )
+
+                fragments.getOrPut(id) { mutableListOf() }.add(fragment)
+            }
+
+            fragments
+        } finally {
+            reader.close()
+        }
+    }
+
+    /**
+     * Read the metadata into a workload.
+     */
+    private fun parseMeta(path: File, fragments: Map<String, List<SimTraceWorkload.Fragment>>): List<TraceEntry<SimWorkload>> {
+        val metaReader = LocalParquetReader<GenericData.Record>(File(path, "meta.parquet"))
+
+        var counter = 0
+        val entries = mutableListOf<TraceEntry<SimWorkload>>()
+
+        return try {
+            while (true) {
+                val record = metaReader.read() ?: break
+
+                val id = record["id"].toString()
+                if (!fragments.containsKey(id)) {
+                    continue
+                }
+
+                val submissionTime = record["submissionTime"] as Long
+                val endTime = record["endTime"] as Long
+                val maxCores = record["maxCores"] as Int
+                val requiredMemory = record["requiredMemory"] as Long
+                val uid = UUID.nameUUIDFromBytes("$id-${counter++}".toByteArray())
+
+                val vmFragments = fragments.getValue(id).asSequence()
+                val totalLoad = vmFragments.sumOf { it.usage } * 5 * 60 // avg MHz * duration = MFLOPs
+                val workload = SimTraceWorkload(vmFragments)
+                entries.add(
+                    TraceEntry(
+                        uid, id, submissionTime, workload,
+                        mapOf(
+                            "submit-time" to submissionTime,
+                            "end-time" to endTime,
+                            "total-load" to totalLoad,
+                            "cores" to maxCores,
+                            "required-memory" to requiredMemory,
+                            "workload" to workload
+                        )
+                    )
+                )
+            }
+
+            entries
+        } catch (e: Exception) {
+            e.printStackTrace()
+            throw e
+        } finally {
+            metaReader.close()
+        }
+    }
+
+    /**
+     * The entries in the trace.
+     */
+    private val entries: List<TraceEntry<SimWorkload>>
+
+    init {
+        val fragments = parseFragments(path)
+        entries = parseMeta(path, fragments)
+    }
+
+    /**
+     * Read the entries in the trace.
+     */
+    fun read(): List<TraceEntry<SimWorkload>> = entries
+}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt
deleted file mode 100644
index 7f25137e..00000000
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2020 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.experiments.capelin.trace
-
-import org.opendc.experiments.capelin.model.CompositeWorkload
-import org.opendc.experiments.capelin.model.Workload
-import org.opendc.format.trace.TraceEntry
-import org.opendc.format.trace.TraceReader
-import org.opendc.simulator.compute.interference.IMAGE_PERF_INTERFERENCE_MODEL
-import org.opendc.simulator.compute.interference.PerformanceInterferenceModel
-import org.opendc.simulator.compute.workload.SimWorkload
-import java.util.TreeSet
-
-/**
- * A [TraceReader] for the internal VM workload trace format.
- *
- * @param reader The internal trace reader to use.
- * @param performanceInterferenceModel The performance model covering the workload in the VM trace.
- * @param run The run to which this reader belongs.
- */
-public class Sc20ParquetTraceReader(
-    rawReaders: List<Sc20RawParquetTraceReader>,
-    performanceInterferenceModel: Map<String, PerformanceInterferenceModel>,
-    workload: Workload,
-    seed: Int
-) : TraceReader<SimWorkload> {
-    /**
-     * The iterator over the actual trace.
-     */
-    private val iterator: Iterator<TraceEntry<SimWorkload>> =
-        rawReaders
-            .map { it.read() }
-            .run {
-                if (workload is CompositeWorkload) {
-                    this.zip(workload.workloads)
-                } else {
-                    this.zip(listOf(workload))
-                }
-            }
-            .map { sampleWorkload(it.first, workload, it.second, seed) }
-            .flatten()
-            .run {
-                // Apply performance interference model
-                if (performanceInterferenceModel.isEmpty())
-                    this
-                else {
-                    map { entry ->
-                        val id = entry.name
-                        val relevantPerformanceInterferenceModelItems =
-                            performanceInterferenceModel[id] ?: PerformanceInterferenceModel(TreeSet())
-
-                        entry.copy(meta = entry.meta + mapOf(IMAGE_PERF_INTERFERENCE_MODEL to relevantPerformanceInterferenceModelItems))
-                    }
-                }
-            }
-            .iterator()
-
-    override fun hasNext(): Boolean = iterator.hasNext()
-
-    override fun next(): TraceEntry<SimWorkload> = iterator.next()
-
-    override fun close() {}
-}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt
deleted file mode 100644
index 54151c9f..00000000
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2020 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.experiments.capelin.trace
-
-import mu.KotlinLogging
-import org.apache.avro.generic.GenericData
-import org.opendc.format.trace.TraceEntry
-import org.opendc.format.trace.TraceReader
-import org.opendc.format.util.LocalParquetReader
-import org.opendc.simulator.compute.workload.SimTraceWorkload
-import org.opendc.simulator.compute.workload.SimWorkload
-import java.io.File
-import java.util.UUID
-
-private val logger = KotlinLogging.logger {}
-
-/**
- * A [TraceReader] for the internal VM workload trace format.
- *
- * @param path The directory of the traces.
- */
-public class Sc20RawParquetTraceReader(private val path: File) {
-    /**
-     * Read the fragments into memory.
-     */
-    private fun parseFragments(path: File): Map<String, List<SimTraceWorkload.Fragment>> {
-        val reader = LocalParquetReader<GenericData.Record>(File(path, "trace.parquet"))
-
-        val fragments = mutableMapOf<String, MutableList<SimTraceWorkload.Fragment>>()
-
-        return try {
-            while (true) {
-                val record = reader.read() ?: break
-
-                val id = record["id"].toString()
-                val duration = record["duration"] as Long
-                val cores = record["cores"] as Int
-                val cpuUsage = record["cpuUsage"] as Double
-
-                val fragment = SimTraceWorkload.Fragment(
-                    duration,
-                    cpuUsage,
-                    cores
-                )
-
-                fragments.getOrPut(id) { mutableListOf() }.add(fragment)
-            }
-
-            fragments
-        } finally {
-            reader.close()
-        }
-    }
-
-    /**
-     * Read the metadata into a workload.
-     */
-    private fun parseMeta(path: File, fragments: Map<String, List<SimTraceWorkload.Fragment>>): List<TraceEntry<SimWorkload>> {
-        val metaReader = LocalParquetReader<GenericData.Record>(File(path, "meta.parquet"))
-
-        var counter = 0
-        val entries = mutableListOf<TraceEntry<SimWorkload>>()
-
-        return try {
-            while (true) {
-                val record = metaReader.read() ?: break
-
-                val id = record["id"].toString()
-                if (!fragments.containsKey(id)) {
-                    continue
-                }
-
-                val submissionTime = record["submissionTime"] as Long
-                val endTime = record["endTime"] as Long
-                val maxCores = record["maxCores"] as Int
-                val requiredMemory = record["requiredMemory"] as Long
-                val uid = UUID.nameUUIDFromBytes("$id-${counter++}".toByteArray())
-
-                val vmFragments = fragments.getValue(id).asSequence()
-                val totalLoad = vmFragments.sumOf { it.usage } * 5 * 60 // avg MHz * duration = MFLOPs
-                val workload = SimTraceWorkload(vmFragments)
-                entries.add(
-                    TraceEntry(
-                        uid, id, submissionTime, workload,
-                        mapOf(
-                            "submit-time" to submissionTime,
-                            "end-time" to endTime,
-                            "total-load" to totalLoad,
-                            "cores" to maxCores,
-                            "required-memory" to requiredMemory,
-                            "workload" to workload
-                        )
-                    )
-                )
-            }
-
-            entries
-        } catch (e: Exception) {
-            e.printStackTrace()
-            throw e
-        } finally {
-            metaReader.close()
-        }
-    }
-
-    /**
-     * The entries in the trace.
-     */
-    private val entries: List<TraceEntry<SimWorkload>>
-
-    init {
-        val fragments = parseFragments(path)
-        entries = parseMeta(path, fragments)
-    }
-
-    /**
-     * Read the entries in the trace.
-     */
-    public fun read(): List<TraceEntry<SimWorkload>> = entries
-
-    /**
-     * Create a [TraceReader] instance.
-     */
-    public fun createReader(): TraceReader<SimWorkload> {
-        return object : TraceReader<SimWorkload>, Iterator<TraceEntry<SimWorkload>> by entries.iterator() {
-            override fun close() {}
-        }
-    }
-}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt
deleted file mode 100644
index 6792c2ab..00000000
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright (c) 2020 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.experiments.capelin.trace
-
-import mu.KotlinLogging
-import org.apache.avro.generic.GenericData
-import org.apache.parquet.avro.AvroParquetReader
-import org.apache.parquet.filter2.compat.FilterCompat
-import org.apache.parquet.filter2.predicate.FilterApi
-import org.apache.parquet.filter2.predicate.Statistics
-import org.apache.parquet.filter2.predicate.UserDefinedPredicate
-import org.apache.parquet.io.api.Binary
-import org.opendc.format.trace.TraceEntry
-import org.opendc.format.trace.TraceReader
-import org.opendc.format.util.LocalInputFile
-import org.opendc.simulator.compute.interference.IMAGE_PERF_INTERFERENCE_MODEL
-import org.opendc.simulator.compute.interference.PerformanceInterferenceModel
-import org.opendc.simulator.compute.workload.SimTraceWorkload
-import org.opendc.simulator.compute.workload.SimWorkload
-import java.io.File
-import java.io.Serializable
-import java.util.SortedSet
-import java.util.TreeSet
-import java.util.UUID
-import java.util.concurrent.ArrayBlockingQueue
-import kotlin.concurrent.thread
-import kotlin.random.Random
-
-private val logger = KotlinLogging.logger {}
-
-/**
- * A [TraceReader] for the internal VM workload trace format that streams workloads on the fly.
- *
- * @param traceFile The directory of the traces.
- * @param performanceInterferenceModel The performance model covering the workload in the VM trace.
- */
-public class Sc20StreamingParquetTraceReader(
-    traceFile: File,
-    performanceInterferenceModel: PerformanceInterferenceModel? = null,
-    selectedVms: List<String> = emptyList(),
-    random: Random
-) : TraceReader<SimWorkload> {
-    /**
-     * The internal iterator to use for this reader.
-     */
-    private val iterator: Iterator<TraceEntry<SimWorkload>>
-
-    /**
-     * The intermediate buffer to store the read records in.
-     */
-    private val queue = ArrayBlockingQueue<Pair<String, SimTraceWorkload.Fragment>>(1024)
-
-    /**
-     * An optional filter for filtering the selected VMs
-     */
-    private val filter =
-        if (selectedVms.isEmpty())
-            null
-        else
-            FilterCompat.get(
-                FilterApi.userDefined(
-                    FilterApi.binaryColumn("id"),
-                    SelectedVmFilter(
-                        TreeSet(selectedVms)
-                    )
-                )
-            )
-
-    /**
-     * A poisonous fragment.
-     */
-    private val poison = Pair("\u0000", SimTraceWorkload.Fragment(0, 0.0, 0))
-
-    /**
-     * The thread to read the records in.
-     */
-    private val readerThread = thread(start = true, name = "sc20-reader") {
-        val reader = AvroParquetReader
-            .builder<GenericData.Record>(LocalInputFile(File(traceFile, "trace.parquet")))
-            .disableCompatibility()
-            .withFilter(filter)
-            .build()
-
-        try {
-            while (true) {
-                val record = reader.read()
-
-                if (record == null) {
-                    queue.put(poison)
-                    break
-                }
-
-                val id = record["id"].toString()
-                val duration = record["duration"] as Long
-                val cores = record["cores"] as Int
-                val cpuUsage = record["cpuUsage"] as Double
-
-                val fragment = SimTraceWorkload.Fragment(
-                    duration,
-                    cpuUsage,
-                    cores
-                )
-
-                queue.put(id to fragment)
-            }
-        } catch (e: InterruptedException) {
-            // Do not rethrow this
-        } finally {
-            reader.close()
-        }
-    }
-
-    /**
-     * Fill the buffers with the VMs
-     */
-    private fun pull(buffers: Map<String, List<MutableList<SimTraceWorkload.Fragment>>>) {
-        if (!hasNext) {
-            return
-        }
-
-        val fragments = mutableListOf<Pair<String, SimTraceWorkload.Fragment>>()
-        queue.drainTo(fragments)
-
-        for ((id, fragment) in fragments) {
-            if (id == poison.first) {
-                hasNext = false
-                return
-            }
-            buffers[id]?.forEach { it.add(fragment) }
-        }
-    }
-
-    /**
-     * A flag to indicate whether the reader has more entries.
-     */
-    private var hasNext: Boolean = true
-
-    /**
-     * Initialize the reader.
-     */
-    init {
-        val takenIds = mutableSetOf<UUID>()
-        val entries = mutableMapOf<String, GenericData.Record>()
-        val buffers = mutableMapOf<String, MutableList<MutableList<SimTraceWorkload.Fragment>>>()
-
-        val metaReader = AvroParquetReader
-            .builder<GenericData.Record>(LocalInputFile(File(traceFile, "meta.parquet")))
-            .disableCompatibility()
-            .withFilter(filter)
-            .build()
-
-        while (true) {
-            val record = metaReader.read() ?: break
-            val id = record["id"].toString()
-            entries[id] = record
-        }
-
-        metaReader.close()
-
-        val selection = selectedVms.ifEmpty { entries.keys }
-
-        // Create the entry iterator
-        iterator = selection.asSequence()
-            .mapNotNull { entries[it] }
-            .mapIndexed { index, record ->
-                val id = record["id"].toString()
-                val submissionTime = record["submissionTime"] as Long
-                val endTime = record["endTime"] as Long
-                val maxCores = record["maxCores"] as Int
-                val requiredMemory = record["requiredMemory"] as Long
-                val uid = UUID.nameUUIDFromBytes("$id-$index".toByteArray())
-
-                assert(uid !in takenIds)
-                takenIds += uid
-
-                logger.info("Processing VM $id")
-
-                val internalBuffer = mutableListOf<SimTraceWorkload.Fragment>()
-                val externalBuffer = mutableListOf<SimTraceWorkload.Fragment>()
-                buffers.getOrPut(id) { mutableListOf() }.add(externalBuffer)
-                val fragments = sequence {
-                    var time = submissionTime
-                    repeat@ while (true) {
-                        if (externalBuffer.isEmpty()) {
-                            if (hasNext) {
-                                pull(buffers)
-                                continue
-                            } else {
-                                break
-                            }
-                        }
-
-                        internalBuffer.addAll(externalBuffer)
-                        externalBuffer.clear()
-
-                        for (fragment in internalBuffer) {
-                            yield(fragment)
-
-                            time += fragment.duration
-                            if (time >= endTime) {
-                                break@repeat
-                            }
-                        }
-
-                        internalBuffer.clear()
-                    }
-
-                    buffers.remove(id)
-                }
-                val relevantPerformanceInterferenceModelItems =
-                    if (performanceInterferenceModel != null)
-                        PerformanceInterferenceModel(
-                            performanceInterferenceModel.items.filter { it.workloadNames.contains(id) }.toSortedSet(),
-                            Random(random.nextInt())
-                        )
-                    else
-                        null
-                val workload = SimTraceWorkload(fragments)
-                val meta = mapOf(
-                    "cores" to maxCores,
-                    "required-memory" to requiredMemory,
-                    "workload" to workload
-                )
-
-                TraceEntry(
-                    uid, id, submissionTime, workload,
-                    if (performanceInterferenceModel != null)
-                        meta + mapOf(IMAGE_PERF_INTERFERENCE_MODEL to relevantPerformanceInterferenceModelItems as Any)
-                    else
-                        meta
-                )
-            }
-            .sortedBy { it.start }
-            .toList()
-            .iterator()
-    }
-
-    override fun hasNext(): Boolean = iterator.hasNext()
-
-    override fun next(): TraceEntry<SimWorkload> = iterator.next()
-
-    override fun close() {
-        readerThread.interrupt()
-    }
-
-    private class SelectedVmFilter(val selectedVms: SortedSet<String>) : UserDefinedPredicate<Binary>(), Serializable {
-        override fun keep(value: Binary?): Boolean = value != null && selectedVms.contains(value.toStringUsingUTF8())
-
-        override fun canDrop(statistics: Statistics<Binary>): Boolean {
-            val min = statistics.min
-            val max = statistics.max
-
-            return selectedVms.subSet(min.toStringUsingUTF8(), max.toStringUsingUTF8() + "\u0000").isEmpty()
-        }
-
-        override fun inverseCanDrop(statistics: Statistics<Binary>): Boolean {
-            val min = statistics.min
-            val max = statistics.max
-
-            return selectedVms.subSet(min.toStringUsingUTF8(), max.toStringUsingUTF8() + "\u0000").isNotEmpty()
-        }
-    }
-}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt
deleted file mode 100644
index d0031a66..00000000
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- * Copyright (c) 2020 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.experiments.capelin.trace
-
-import com.github.ajalt.clikt.core.CliktCommand
-import com.github.ajalt.clikt.parameters.arguments.argument
-import com.github.ajalt.clikt.parameters.groups.OptionGroup
-import com.github.ajalt.clikt.parameters.groups.groupChoice
-import com.github.ajalt.clikt.parameters.options.convert
-import com.github.ajalt.clikt.parameters.options.default
-import com.github.ajalt.clikt.parameters.options.defaultLazy
-import com.github.ajalt.clikt.parameters.options.option
-import com.github.ajalt.clikt.parameters.options.required
-import com.github.ajalt.clikt.parameters.options.split
-import com.github.ajalt.clikt.parameters.types.file
-import com.github.ajalt.clikt.parameters.types.long
-import me.tongfei.progressbar.ProgressBar
-import org.apache.avro.Schema
-import org.apache.avro.SchemaBuilder
-import org.apache.avro.generic.GenericData
-import org.apache.parquet.avro.AvroParquetWriter
-import org.apache.parquet.hadoop.ParquetWriter
-import org.apache.parquet.hadoop.metadata.CompressionCodecName
-import org.opendc.format.trace.sc20.Sc20VmPlacementReader
-import org.opendc.format.util.LocalOutputFile
-import java.io.BufferedReader
-import java.io.File
-import java.io.FileReader
-import java.util.Random
-import kotlin.math.max
-import kotlin.math.min
-
-/**
- * Represents the command for converting traces
- */
-public class TraceConverterCli : CliktCommand(name = "trace-converter") {
-    /**
-     * The directory where the trace should be stored.
-     */
-    private val outputPath by option("-O", "--output", help = "path to store the trace")
-        .file(canBeFile = false, mustExist = false)
-        .defaultLazy { File("output") }
-
-    /**
-     * The directory where the input trace is located.
-     */
-    private val inputPath by argument("input", help = "path to the input trace")
-        .file(canBeFile = false)
-
-    /**
-     * The input type of the trace.
-     */
-    private val type by option("-t", "--type", help = "input type of trace").groupChoice(
-        "solvinity" to SolvinityConversion(),
-        "bitbrains" to BitbrainsConversion(),
-        "azure" to AzureConversion()
-    )
-
-    override fun run() {
-        val metaSchema = SchemaBuilder
-            .record("meta")
-            .namespace("org.opendc.format.sc20")
-            .fields()
-            .name("id").type().stringType().noDefault()
-            .name("submissionTime").type().longType().noDefault()
-            .name("endTime").type().longType().noDefault()
-            .name("maxCores").type().intType().noDefault()
-            .name("requiredMemory").type().longType().noDefault()
-            .endRecord()
-        val schema = SchemaBuilder
-            .record("trace")
-            .namespace("org.opendc.format.sc20")
-            .fields()
-            .name("id").type().stringType().noDefault()
-            .name("time").type().longType().noDefault()
-            .name("duration").type().longType().noDefault()
-            .name("cores").type().intType().noDefault()
-            .name("cpuUsage").type().doubleType().noDefault()
-            .name("flops").type().longType().noDefault()
-            .endRecord()
-
-        val metaParquet = File(outputPath, "meta.parquet")
-        val traceParquet = File(outputPath, "trace.parquet")
-
-        if (metaParquet.exists()) {
-            metaParquet.delete()
-        }
-        if (traceParquet.exists()) {
-            traceParquet.delete()
-        }
-
-        val metaWriter = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(metaParquet))
-            .withSchema(metaSchema)
-            .withCompressionCodec(CompressionCodecName.SNAPPY)
-            .withPageSize(4 * 1024 * 1024) // For compression
-            .withRowGroupSize(16 * 1024 * 1024) // For write buffering (Page size)
-            .build()
-
-        val writer = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(traceParquet))
-            .withSchema(schema)
-            .withCompressionCodec(CompressionCodecName.SNAPPY)
-            .withPageSize(4 * 1024 * 1024) // For compression
-            .withRowGroupSize(16 * 1024 * 1024) // For write buffering (Page size)
-            .build()
-
-        try {
-            val type = type ?: throw IllegalArgumentException("Invalid trace conversion")
-            val allFragments = type.read(inputPath, metaSchema, metaWriter)
-            allFragments.sortWith(compareBy<Fragment> { it.tick }.thenBy { it.id })
-
-            for (fragment in allFragments) {
-                val record = GenericData.Record(schema)
-                record.put("id", fragment.id)
-                record.put("time", fragment.tick)
-                record.put("duration", fragment.duration)
-                record.put("cores", fragment.cores)
-                record.put("cpuUsage", fragment.usage)
-                record.put("flops", fragment.flops)
-
-                writer.write(record)
-            }
-        } finally {
-            writer.close()
-            metaWriter.close()
-        }
-    }
-}
-
-/**
- * The supported trace conversions.
- */
-public sealed class TraceConversion(name: String) : OptionGroup(name) {
-    /**
-     * Read the fragments of the trace.
-     */
-    public abstract fun read(
-        traceDirectory: File,
-        metaSchema: Schema,
-        metaWriter: ParquetWriter<GenericData.Record>
-    ): MutableList<Fragment>
-}
-
-public class SolvinityConversion : TraceConversion("Solvinity") {
-    private val clusters by option()
-        .split(",")
-
-    private val vmPlacements by option("--vm-placements", help = "file containing the VM placements")
-        .file(canBeDir = false)
-        .convert { it.inputStream().buffered().use { Sc20VmPlacementReader(it).construct() } }
-        .required()
-
-    override fun read(
-        traceDirectory: File,
-        metaSchema: Schema,
-        metaWriter: ParquetWriter<GenericData.Record>
-    ): MutableList<Fragment> {
-        val clusters = clusters?.toSet() ?: emptySet()
-        val timestampCol = 0
-        val cpuUsageCol = 1
-        val coreCol = 12
-        val provisionedMemoryCol = 20
-        val traceInterval = 5 * 60 * 1000L
-
-        // Identify start time of the entire trace
-        var minTimestamp = Long.MAX_VALUE
-        traceDirectory.walk()
-            .filterNot { it.isDirectory }
-            .filter { it.extension == "csv" || it.extension == "txt" }
-            .toList()
-            .forEach file@{ vmFile ->
-                BufferedReader(FileReader(vmFile)).use { reader ->
-                    reader.lineSequence()
-                        .chunked(128)
-                        .forEach { lines ->
-                            for (line in lines) {
-                                // Ignore comments in the trace
-                                if (line.startsWith("#") || line.isBlank()) {
-                                    continue
-                                }
-
-                                val vmId = vmFile.name
-
-                                // Check if VM in topology
-                                val clusterName = vmPlacements[vmId]
-                                if (clusterName == null || !clusters.contains(clusterName)) {
-                                    continue
-                                }
-
-                                val values = line.split("\t")
-                                val timestamp = (values[timestampCol].trim().toLong() - 5 * 60) * 1000L
-
-                                if (timestamp < minTimestamp) {
-                                    minTimestamp = timestamp
-                                }
-                                return@file
-                            }
-                        }
-                }
-            }
-
-        println("Start of trace at $minTimestamp")
-
-        val allFragments = mutableListOf<Fragment>()
-
-        val begin = 15 * 24 * 60 * 60 * 1000L
-        val end = 45 * 24 * 60 * 60 * 1000L
-
-        traceDirectory.walk()
-            .filterNot { it.isDirectory }
-            .filter { it.extension == "csv" || it.extension == "txt" }
-            .toList()
-            .forEach { vmFile ->
-                println(vmFile)
-
-                var vmId = ""
-                var maxCores = -1
-                var requiredMemory = -1L
-                var cores: Int
-                var minTime = Long.MAX_VALUE
-
-                val flopsFragments = sequence {
-                    var last: Fragment? = null
-
-                    BufferedReader(FileReader(vmFile)).use { reader ->
-                        reader.lineSequence()
-                            .chunked(128)
-                            .forEach { lines ->
-                                for (line in lines) {
-                                    // Ignore comments in the trace
-                                    if (line.startsWith("#") || line.isBlank()) {
-                                        continue
-                                    }
-
-                                    val values = line.split("\t")
-
-                                    vmId = vmFile.name
-
-                                    // Check if VM in topology
-                                    val clusterName = vmPlacements[vmId]
-                                    if (clusterName == null || !clusters.contains(clusterName)) {
-                                        continue
-                                    }
-
-                                    val timestamp =
-                                        (values[timestampCol].trim().toLong() - 5 * 60) * 1000L - minTimestamp
-                                    if (begin > timestamp || timestamp > end) {
-                                        continue
-                                    }
-
-                                    cores = values[coreCol].trim().toInt()
-                                    requiredMemory = max(requiredMemory, values[provisionedMemoryCol].trim().toLong())
-                                    maxCores = max(maxCores, cores)
-                                    minTime = min(minTime, timestamp)
-                                    val cpuUsage = values[cpuUsageCol].trim().toDouble() // MHz
-                                    requiredMemory = max(requiredMemory, values[provisionedMemoryCol].trim().toLong())
-                                    maxCores = max(maxCores, cores)
-
-                                    val flops: Long = (cpuUsage * 5 * 60).toLong()
-
-                                    last = if (last != null && last!!.flops == 0L && flops == 0L) {
-                                        val oldFragment = last!!
-                                        Fragment(
-                                            vmId,
-                                            oldFragment.tick,
-                                            oldFragment.flops + flops,
-                                            oldFragment.duration + traceInterval,
-                                            cpuUsage,
-                                            cores
-                                        )
-                                    } else {
-                                        val fragment =
-                                            Fragment(
-                                                vmId,
-                                                timestamp,
-                                                flops,
-                                                traceInterval,
-                                                cpuUsage,
-                                                cores
-                                            )
-                                        if (last != null) {
-                                            yield(last!!)
-                                        }
-                                        fragment
-                                    }
-                                }
-                            }
-                    }
-
-                    if (last != null) {
-                        yield(last!!)
-                    }
-                }
-
-                var maxTime = Long.MIN_VALUE
-                flopsFragments.filter { it.tick in begin until end }.forEach { fragment ->
-                    allFragments.add(fragment)
-                    maxTime = max(maxTime, fragment.tick)
-                }
-
-                if (minTime in begin until end) {
-                    val metaRecord = GenericData.Record(metaSchema)
-                    metaRecord.put("id", vmId)
-                    metaRecord.put("submissionTime", minTime)
-                    metaRecord.put("endTime", maxTime)
-                    metaRecord.put("maxCores", maxCores)
-                    metaRecord.put("requiredMemory", requiredMemory)
-                    metaWriter.write(metaRecord)
-                }
-            }
-
-        return allFragments
-    }
-}
-
-/**
- * Conversion of the Bitbrains public trace.
- */
-public class BitbrainsConversion : TraceConversion("Bitbrains") {
-    override fun read(
-        traceDirectory: File,
-        metaSchema: Schema,
-        metaWriter: ParquetWriter<GenericData.Record>
-    ): MutableList<Fragment> {
-        val timestampCol = 0
-        val cpuUsageCol = 3
-        val coreCol = 1
-        val provisionedMemoryCol = 5
-        val traceInterval = 5 * 60 * 1000L
-
-        val allFragments = mutableListOf<Fragment>()
-
-        traceDirectory.walk()
-            .filterNot { it.isDirectory }
-            .filter { it.extension == "csv" || it.extension == "txt" }
-            .toList()
-            .forEach { vmFile ->
-                println(vmFile)
-
-                var vmId = ""
-                var maxCores = -1
-                var requiredMemory = -1L
-                var cores: Int
-                var minTime = Long.MAX_VALUE
-
-                val flopsFragments = sequence {
-                    var last: Fragment? = null
-
-                    BufferedReader(FileReader(vmFile)).use { reader ->
-                        reader.lineSequence()
-                            .drop(1)
-                            .chunked(128)
-                            .forEach { lines ->
-                                for (line in lines) {
-                                    // Ignore comments in the trace
-                                    if (line.startsWith("#") || line.isBlank()) {
-                                        continue
-                                    }
-
-                                    val values = line.split(";\t")
-
-                                    vmId = vmFile.name
-
-                                    val timestamp = (values[timestampCol].trim().toLong() - 5 * 60) * 1000L
-
-                                    cores = values[coreCol].trim().toInt()
-                                    val provisionedMemory = values[provisionedMemoryCol].trim().toDouble() // KB
-                                    requiredMemory = max(requiredMemory, (provisionedMemory / 1000).toLong())
-                                    maxCores = max(maxCores, cores)
-                                    minTime = min(minTime, timestamp)
-                                    val cpuUsage = values[cpuUsageCol].trim().toDouble() // MHz
-
-                                    val flops: Long = (cpuUsage * 5 * 60).toLong()
-
-                                    last = if (last != null && last!!.flops == 0L && flops == 0L) {
-                                        val oldFragment = last!!
-                                        Fragment(
-                                            vmId,
-                                            oldFragment.tick,
-                                            oldFragment.flops + flops,
-                                            oldFragment.duration + traceInterval,
-                                            cpuUsage,
-                                            cores
-                                        )
-                                    } else {
-                                        val fragment =
-                                            Fragment(
-                                                vmId,
-                                                timestamp,
-                                                flops,
-                                                traceInterval,
-                                                cpuUsage,
-                                                cores
-                                            )
-                                        if (last != null) {
-                                            yield(last!!)
-                                        }
-                                        fragment
-                                    }
-                                }
-                            }
-                    }
-
-                    if (last != null) {
-                        yield(last!!)
-                    }
-                }
-
-                var maxTime = Long.MIN_VALUE
-                flopsFragments.forEach { fragment ->
-                    allFragments.add(fragment)
-                    maxTime = max(maxTime, fragment.tick)
-                }
-
-                val metaRecord = GenericData.Record(metaSchema)
-                metaRecord.put("id", vmId)
-                metaRecord.put("submissionTime", minTime)
-                metaRecord.put("endTime", maxTime)
-                metaRecord.put("maxCores", maxCores)
-                metaRecord.put("requiredMemory", requiredMemory)
-                metaWriter.write(metaRecord)
-            }
-
-        return allFragments
-    }
-}
-
-/**
- * Conversion of the Azure public VM trace.
- */
-public class AzureConversion : TraceConversion("Azure") {
-    private val seed by option(help = "seed for trace sampling")
-        .long()
-        .default(0)
-
-    override fun read(
-        traceDirectory: File,
-        metaSchema: Schema,
-        metaWriter: ParquetWriter<GenericData.Record>
-    ): MutableList<Fragment> {
-        val random = Random(seed)
-        val fraction = 0.01
-
-        // Read VM table
-        val vmIdTableCol = 0
-        val coreTableCol = 9
-        val provisionedMemoryTableCol = 10
-
-        var vmId: String
-        var cores: Int
-        var requiredMemory: Long
-
-        val vmIds = mutableSetOf<String>()
-        val vmIdToMetadata = mutableMapOf<String, VmInfo>()
-
-        BufferedReader(FileReader(File(traceDirectory, "vmtable.csv"))).use { reader ->
-            reader.lineSequence()
-                .chunked(1024)
-                .forEach { lines ->
-                    for (line in lines) {
-                        // Ignore comments in the trace
-                        if (line.startsWith("#") || line.isBlank()) {
-                            continue
-                        }
-                        // Sample only a fraction of the VMs
-                        if (random.nextDouble() > fraction) {
-                            continue
-                        }
-
-                        val values = line.split(",")
-
-                        // Exclude VMs with a large number of cores (not specified exactly)
-                        if (values[coreTableCol].contains(">")) {
-                            continue
-                        }
-
-                        vmId = values[vmIdTableCol].trim()
-                        cores = values[coreTableCol].trim().toInt()
-                        requiredMemory = values[provisionedMemoryTableCol].trim().toInt() * 1_000L // GB -> MB
-
-                        vmIds.add(vmId)
-                        vmIdToMetadata[vmId] = VmInfo(cores, requiredMemory, Long.MAX_VALUE, -1L)
-                    }
-                }
-        }
-
-        // Read VM metric reading files
-        val timestampCol = 0
-        val vmIdCol = 1
-        val cpuUsageCol = 4
-        val traceInterval = 5 * 60 * 1000L
-
-        val vmIdToFragments = mutableMapOf<String, MutableList<Fragment>>()
-        val vmIdToLastFragment = mutableMapOf<String, Fragment?>()
-        val allFragments = mutableListOf<Fragment>()
-
-        for (i in ProgressBar.wrap((1..195).toList(), "Reading Trace")) {
-            val readingsFile = File(File(traceDirectory, "readings"), "readings-$i.csv")
-            var timestamp: Long
-            var cpuUsage: Double
-
-            BufferedReader(FileReader(readingsFile)).use { reader ->
-                reader.lineSequence()
-                    .chunked(128)
-                    .forEach { lines ->
-                        for (line in lines) {
-                            // Ignore comments in the trace
-                            if (line.startsWith("#") || line.isBlank()) {
-                                continue
-                            }
-
-                            val values = line.split(",")
-                            vmId = values[vmIdCol].trim()
-
-                            // Ignore readings for VMs not in the sample
-                            if (!vmIds.contains(vmId)) {
-                                continue
-                            }
-
-                            timestamp = values[timestampCol].trim().toLong() * 1000L
-                            vmIdToMetadata[vmId]!!.minTime = min(vmIdToMetadata[vmId]!!.minTime, timestamp)
-                            cpuUsage = values[cpuUsageCol].trim().toDouble() * 3_000 // MHz
-                            vmIdToMetadata[vmId]!!.maxTime = max(vmIdToMetadata[vmId]!!.maxTime, timestamp)
-
-                            val flops: Long = (cpuUsage * 5 * 60).toLong()
-                            val lastFragment = vmIdToLastFragment[vmId]
-
-                            vmIdToLastFragment[vmId] =
-                                if (lastFragment != null && lastFragment.flops == 0L && flops == 0L) {
-                                    Fragment(
-                                        vmId,
-                                        lastFragment.tick,
-                                        lastFragment.flops + flops,
-                                        lastFragment.duration + traceInterval,
-                                        cpuUsage,
-                                        vmIdToMetadata[vmId]!!.cores
-                                    )
-                                } else {
-                                    val fragment =
-                                        Fragment(
-                                            vmId,
-                                            timestamp,
-                                            flops,
-                                            traceInterval,
-                                            cpuUsage,
-                                            vmIdToMetadata[vmId]!!.cores
-                                        )
-                                    if (lastFragment != null) {
-                                        if (vmIdToFragments[vmId] == null) {
-                                            vmIdToFragments[vmId] = mutableListOf()
-                                        }
-                                        vmIdToFragments[vmId]!!.add(lastFragment)
-                                        allFragments.add(lastFragment)
-                                    }
-                                    fragment
-                                }
-                        }
-                    }
-            }
-        }
-
-        for (entry in vmIdToLastFragment) {
-            if (entry.value != null) {
-                if (vmIdToFragments[entry.key] == null) {
-                    vmIdToFragments[entry.key] = mutableListOf()
-                }
-                vmIdToFragments[entry.key]!!.add(entry.value!!)
-            }
-        }
-
-        println("Read ${vmIdToLastFragment.size} VMs")
-
-        for (entry in vmIdToMetadata) {
-            val metaRecord = GenericData.Record(metaSchema)
-            metaRecord.put("id", entry.key)
-            metaRecord.put("submissionTime", entry.value.minTime)
-            metaRecord.put("endTime", entry.value.maxTime)
-            println("${entry.value.minTime} - ${entry.value.maxTime}")
-            metaRecord.put("maxCores", entry.value.cores)
-            metaRecord.put("requiredMemory", entry.value.requiredMemory)
-            metaWriter.write(metaRecord)
-        }
-
-        return allFragments
-    }
-}
-
-public data class Fragment(
-    public val id: String,
-    public val tick: Long,
-    public val flops: Long,
-    public val duration: Long,
-    public val usage: Double,
-    public val cores: Int
-)
-
-public class VmInfo(public val cores: Int, public val requiredMemory: Long, public var minTime: Long, public var maxTime: Long)
-
-/**
- * A script to convert a trace in text format into a Parquet trace.
- */
-public fun main(args: Array<String>): Unit = TraceConverterCli().main(args)
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/StreamingParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/StreamingParquetTraceReader.kt
new file mode 100644
index 00000000..a3b45f47
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/StreamingParquetTraceReader.kt
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2020 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.capelin.trace
+
+import mu.KotlinLogging
+import org.apache.avro.generic.GenericData
+import org.apache.parquet.avro.AvroParquetReader
+import org.apache.parquet.filter2.compat.FilterCompat
+import org.apache.parquet.filter2.predicate.FilterApi
+import org.apache.parquet.filter2.predicate.Statistics
+import org.apache.parquet.filter2.predicate.UserDefinedPredicate
+import org.apache.parquet.io.api.Binary
+import org.opendc.format.trace.TraceEntry
+import org.opendc.format.trace.TraceReader
+import org.opendc.format.util.LocalInputFile
+import org.opendc.simulator.compute.workload.SimTraceWorkload
+import org.opendc.simulator.compute.workload.SimWorkload
+import java.io.File
+import java.io.Serializable
+import java.util.SortedSet
+import java.util.TreeSet
+import java.util.UUID
+import java.util.concurrent.ArrayBlockingQueue
+import kotlin.concurrent.thread
+
+private val logger = KotlinLogging.logger {}
+
+/**
+ * A [TraceReader] for the internal VM workload trace format that streams workloads on the fly.
+ *
+ * @param traceFile The directory of the traces.
+ * @param selectedVms The list of VMs to read from the trace.
+ */
+class StreamingParquetTraceReader(traceFile: File, selectedVms: List<String> = emptyList()) : TraceReader<SimWorkload> {
+    /**
+     * The internal iterator to use for this reader.
+     */
+    private val iterator: Iterator<TraceEntry<SimWorkload>>
+
+    /**
+     * The intermediate buffer to store the read records in.
+     */
+    private val queue = ArrayBlockingQueue<Pair<String, SimTraceWorkload.Fragment>>(1024)
+
+    /**
+     * An optional filter for filtering the selected VMs
+     */
+    private val filter =
+        if (selectedVms.isEmpty())
+            null
+        else
+            FilterCompat.get(
+                FilterApi.userDefined(
+                    FilterApi.binaryColumn("id"),
+                    SelectedVmFilter(
+                        TreeSet(selectedVms)
+                    )
+                )
+            )
+
+    /**
+     * A poisonous fragment.
+     */
+    private val poison = Pair("\u0000", SimTraceWorkload.Fragment(0, 0.0, 0))
+
+    /**
+     * The thread to read the records in.
+     */
+    private val readerThread = thread(start = true, name = "sc20-reader") {
+        val reader = AvroParquetReader
+            .builder<GenericData.Record>(LocalInputFile(File(traceFile, "trace.parquet")))
+            .disableCompatibility()
+            .withFilter(filter)
+            .build()
+
+        try {
+            while (true) {
+                val record = reader.read()
+
+                if (record == null) {
+                    queue.put(poison)
+                    break
+                }
+
+                val id = record["id"].toString()
+                val duration = record["duration"] as Long
+                val cores = record["cores"] as Int
+                val cpuUsage = record["cpuUsage"] as Double
+
+                val fragment = SimTraceWorkload.Fragment(
+                    duration,
+                    cpuUsage,
+                    cores
+                )
+
+                queue.put(id to fragment)
+            }
+        } catch (e: InterruptedException) {
+            // Do not rethrow this
+        } finally {
+            reader.close()
+        }
+    }
+
+    /**
+     * Fill the buffers with the VMs
+     */
+    private fun pull(buffers: Map<String, List<MutableList<SimTraceWorkload.Fragment>>>) {
+        if (!hasNext) {
+            return
+        }
+
+        val fragments = mutableListOf<Pair<String, SimTraceWorkload.Fragment>>()
+        queue.drainTo(fragments)
+
+        for ((id, fragment) in fragments) {
+            if (id == poison.first) {
+                hasNext = false
+                return
+            }
+            buffers[id]?.forEach { it.add(fragment) }
+        }
+    }
+
+    /**
+     * A flag to indicate whether the reader has more entries.
+     */
+    private var hasNext: Boolean = true
+
+    /**
+     * Initialize the reader.
+     */
+    init {
+        val takenIds = mutableSetOf<UUID>()
+        val entries = mutableMapOf<String, GenericData.Record>()
+        val buffers = mutableMapOf<String, MutableList<MutableList<SimTraceWorkload.Fragment>>>()
+
+        val metaReader = AvroParquetReader
+            .builder<GenericData.Record>(LocalInputFile(File(traceFile, "meta.parquet")))
+            .disableCompatibility()
+            .withFilter(filter)
+            .build()
+
+        while (true) {
+            val record = metaReader.read() ?: break
+            val id = record["id"].toString()
+            entries[id] = record
+        }
+
+        metaReader.close()
+
+        val selection = selectedVms.ifEmpty { entries.keys }
+
+        // Create the entry iterator
+        iterator = selection.asSequence()
+            .mapNotNull { entries[it] }
+            .mapIndexed { index, record ->
+                val id = record["id"].toString()
+                val submissionTime = record["submissionTime"] as Long
+                val endTime = record["endTime"] as Long
+                val maxCores = record["maxCores"] as Int
+                val requiredMemory = record["requiredMemory"] as Long
+                val uid = UUID.nameUUIDFromBytes("$id-$index".toByteArray())
+
+                assert(uid !in takenIds)
+                takenIds += uid
+
+                logger.info("Processing VM $id")
+
+                val internalBuffer = mutableListOf<SimTraceWorkload.Fragment>()
+                val externalBuffer = mutableListOf<SimTraceWorkload.Fragment>()
+                buffers.getOrPut(id) { mutableListOf() }.add(externalBuffer)
+                val fragments = sequence {
+                    var time = submissionTime
+                    repeat@ while (true) {
+                        if (externalBuffer.isEmpty()) {
+                            if (hasNext) {
+                                pull(buffers)
+                                continue
+                            } else {
+                                break
+                            }
+                        }
+
+                        internalBuffer.addAll(externalBuffer)
+                        externalBuffer.clear()
+
+                        for (fragment in internalBuffer) {
+                            yield(fragment)
+
+                            time += fragment.duration
+                            if (time >= endTime) {
+                                break@repeat
+                            }
+                        }
+
+                        internalBuffer.clear()
+                    }
+
+                    buffers.remove(id)
+                }
+                val workload = SimTraceWorkload(fragments)
+                val meta = mapOf(
+                    "cores" to maxCores,
+                    "required-memory" to requiredMemory,
+                    "workload" to workload
+                )
+
+                TraceEntry(uid, id, submissionTime, workload, meta)
+            }
+            .sortedBy { it.start }
+            .toList()
+            .iterator()
+    }
+
+    override fun hasNext(): Boolean = iterator.hasNext()
+
+    override fun next(): TraceEntry<SimWorkload> = iterator.next()
+
+    override fun close() {
+        readerThread.interrupt()
+    }
+
+    private class SelectedVmFilter(val selectedVms: SortedSet<String>) : UserDefinedPredicate<Binary>(), Serializable {
+        override fun keep(value: Binary?): Boolean = value != null && selectedVms.contains(value.toStringUsingUTF8())
+
+        override fun canDrop(statistics: Statistics<Binary>): Boolean {
+            val min = statistics.min
+            val max = statistics.max
+
+            return selectedVms.subSet(min.toStringUsingUTF8(), max.toStringUsingUTF8() + "\u0000").isEmpty()
+        }
+
+        override fun inverseCanDrop(statistics: Statistics<Binary>): Boolean {
+            val min = statistics.min
+            val max = statistics.max
+
+            return selectedVms.subSet(min.toStringUsingUTF8(), max.toStringUsingUTF8() + "\u0000").isNotEmpty()
+        }
+    }
+}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/TraceConverter.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/TraceConverter.kt
new file mode 100644
index 00000000..7cd1f159
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/TraceConverter.kt
@@ -0,0 +1,620 @@
+/*
+ * Copyright (c) 2020 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.capelin.trace
+
+import com.github.ajalt.clikt.core.CliktCommand
+import com.github.ajalt.clikt.parameters.arguments.argument
+import com.github.ajalt.clikt.parameters.groups.OptionGroup
+import com.github.ajalt.clikt.parameters.groups.groupChoice
+import com.github.ajalt.clikt.parameters.options.convert
+import com.github.ajalt.clikt.parameters.options.default
+import com.github.ajalt.clikt.parameters.options.defaultLazy
+import com.github.ajalt.clikt.parameters.options.option
+import com.github.ajalt.clikt.parameters.options.required
+import com.github.ajalt.clikt.parameters.options.split
+import com.github.ajalt.clikt.parameters.types.file
+import com.github.ajalt.clikt.parameters.types.long
+import me.tongfei.progressbar.ProgressBar
+import org.apache.avro.Schema
+import org.apache.avro.SchemaBuilder
+import org.apache.avro.generic.GenericData
+import org.apache.parquet.avro.AvroParquetWriter
+import org.apache.parquet.hadoop.ParquetWriter
+import org.apache.parquet.hadoop.metadata.CompressionCodecName
+import org.opendc.format.util.LocalOutputFile
+import java.io.BufferedReader
+import java.io.File
+import java.io.FileReader
+import java.util.Random
+import kotlin.math.max
+import kotlin.math.min
+
+/**
+ * Represents the command for converting traces
+ */
+class TraceConverterCli : CliktCommand(name = "trace-converter") {
+    /**
+     * The directory where the trace should be stored.
+     */
+    private val outputPath by option("-O", "--output", help = "path to store the trace")
+        .file(canBeFile = false, mustExist = false)
+        .defaultLazy { File("output") }
+
+    /**
+     * The directory where the input trace is located.
+     */
+    private val inputPath by argument("input", help = "path to the input trace")
+        .file(canBeFile = false)
+
+    /**
+     * The input type of the trace.
+     */
+    private val type by option("-t", "--type", help = "input type of trace").groupChoice(
+        "solvinity" to SolvinityConversion(),
+        "bitbrains" to BitbrainsConversion(),
+        "azure" to AzureConversion()
+    )
+
+    override fun run() {
+        val metaSchema = SchemaBuilder
+            .record("meta")
+            .namespace("org.opendc.format.sc20")
+            .fields()
+            .name("id").type().stringType().noDefault()
+            .name("submissionTime").type().longType().noDefault()
+            .name("endTime").type().longType().noDefault()
+            .name("maxCores").type().intType().noDefault()
+            .name("requiredMemory").type().longType().noDefault()
+            .endRecord()
+        val schema = SchemaBuilder
+            .record("trace")
+            .namespace("org.opendc.format.sc20")
+            .fields()
+            .name("id").type().stringType().noDefault()
+            .name("time").type().longType().noDefault()
+            .name("duration").type().longType().noDefault()
+            .name("cores").type().intType().noDefault()
+            .name("cpuUsage").type().doubleType().noDefault()
+            .name("flops").type().longType().noDefault()
+            .endRecord()
+
+        val metaParquet = File(outputPath, "meta.parquet")
+        val traceParquet = File(outputPath, "trace.parquet")
+
+        if (metaParquet.exists()) {
+            metaParquet.delete()
+        }
+        if (traceParquet.exists()) {
+            traceParquet.delete()
+        }
+
+        val metaWriter = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(metaParquet))
+            .withSchema(metaSchema)
+            .withCompressionCodec(CompressionCodecName.SNAPPY)
+            .withPageSize(4 * 1024 * 1024) // For compression
+            .withRowGroupSize(16 * 1024 * 1024) // For write buffering (Page size)
+            .build()
+
+        val writer = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(traceParquet))
+            .withSchema(schema)
+            .withCompressionCodec(CompressionCodecName.SNAPPY)
+            .withPageSize(4 * 1024 * 1024) // For compression
+            .withRowGroupSize(16 * 1024 * 1024) // For write buffering (Page size)
+            .build()
+
+        try {
+            val type = type ?: throw IllegalArgumentException("Invalid trace conversion")
+            val allFragments = type.read(inputPath, metaSchema, metaWriter)
+            allFragments.sortWith(compareBy<Fragment> { it.tick }.thenBy { it.id })
+
+            for (fragment in allFragments) {
+                val record = GenericData.Record(schema)
+                record.put("id", fragment.id)
+                record.put("time", fragment.tick)
+                record.put("duration", fragment.duration)
+                record.put("cores", fragment.cores)
+                record.put("cpuUsage", fragment.usage)
+                record.put("flops", fragment.flops)
+
+                writer.write(record)
+            }
+        } finally {
+            writer.close()
+            metaWriter.close()
+        }
+    }
+}
+
+/**
+ * The supported trace conversions.
+ */
+sealed class TraceConversion(name: String) : OptionGroup(name) {
+    /**
+     * Read the fragments of the trace.
+     */
+    abstract fun read(
+        traceDirectory: File,
+        metaSchema: Schema,
+        metaWriter: ParquetWriter<GenericData.Record>
+    ): MutableList<Fragment>
+}
+
+class SolvinityConversion : TraceConversion("Solvinity") {
+    private val clusters by option()
+        .split(",")
+
+    private val vmPlacements by option("--vm-placements", help = "file containing the VM placements")
+        .file(canBeDir = false)
+        .convert { VmPlacementReader(it.inputStream()).use { reader -> reader.read() } }
+        .required()
+
+    override fun read(
+        traceDirectory: File,
+        metaSchema: Schema,
+        metaWriter: ParquetWriter<GenericData.Record>
+    ): MutableList<Fragment> {
+        val clusters = clusters?.toSet() ?: emptySet()
+        val timestampCol = 0
+        val cpuUsageCol = 1
+        val coreCol = 12
+        val provisionedMemoryCol = 20
+        val traceInterval = 5 * 60 * 1000L
+
+        // Identify start time of the entire trace
+        var minTimestamp = Long.MAX_VALUE
+        traceDirectory.walk()
+            .filterNot { it.isDirectory }
+            .filter { it.extension == "csv" || it.extension == "txt" }
+            .toList()
+            .forEach file@{ vmFile ->
+                BufferedReader(FileReader(vmFile)).use { reader ->
+                    reader.lineSequence()
+                        .chunked(128)
+                        .forEach { lines ->
+                            for (line in lines) {
+                                // Ignore comments in the trace
+                                if (line.startsWith("#") || line.isBlank()) {
+                                    continue
+                                }
+
+                                val vmId = vmFile.name
+
+                                // Check if VM in topology
+                                val clusterName = vmPlacements[vmId]
+                                if (clusterName == null || !clusters.contains(clusterName)) {
+                                    continue
+                                }
+
+                                val values = line.split("\t")
+                                val timestamp = (values[timestampCol].trim().toLong() - 5 * 60) * 1000L
+
+                                if (timestamp < minTimestamp) {
+                                    minTimestamp = timestamp
+                                }
+                                return@file
+                            }
+                        }
+                }
+            }
+
+        println("Start of trace at $minTimestamp")
+
+        val allFragments = mutableListOf<Fragment>()
+
+        val begin = 15 * 24 * 60 * 60 * 1000L
+        val end = 45 * 24 * 60 * 60 * 1000L
+
+        traceDirectory.walk()
+            .filterNot { it.isDirectory }
+            .filter { it.extension == "csv" || it.extension == "txt" }
+            .toList()
+            .forEach { vmFile ->
+                println(vmFile)
+
+                var vmId = ""
+                var maxCores = -1
+                var requiredMemory = -1L
+                var cores: Int
+                var minTime = Long.MAX_VALUE
+
+                val flopsFragments = sequence {
+                    var last: Fragment? = null
+
+                    BufferedReader(FileReader(vmFile)).use { reader ->
+                        reader.lineSequence()
+                            .chunked(128)
+                            .forEach { lines ->
+                                for (line in lines) {
+                                    // Ignore comments in the trace
+                                    if (line.startsWith("#") || line.isBlank()) {
+                                        continue
+                                    }
+
+                                    val values = line.split("\t")
+
+                                    vmId = vmFile.name
+
+                                    // Check if VM in topology
+                                    val clusterName = vmPlacements[vmId]
+                                    if (clusterName == null || !clusters.contains(clusterName)) {
+                                        continue
+                                    }
+
+                                    val timestamp =
+                                        (values[timestampCol].trim().toLong() - 5 * 60) * 1000L - minTimestamp
+                                    if (begin > timestamp || timestamp > end) {
+                                        continue
+                                    }
+
+                                    cores = values[coreCol].trim().toInt()
+                                    requiredMemory = max(requiredMemory, values[provisionedMemoryCol].trim().toLong())
+                                    maxCores = max(maxCores, cores)
+                                    minTime = min(minTime, timestamp)
+                                    val cpuUsage = values[cpuUsageCol].trim().toDouble() // MHz
+                                    requiredMemory = max(requiredMemory, values[provisionedMemoryCol].trim().toLong())
+                                    maxCores = max(maxCores, cores)
+
+                                    val flops: Long = (cpuUsage * 5 * 60).toLong()
+
+                                    last = if (last != null && last!!.flops == 0L && flops == 0L) {
+                                        val oldFragment = last!!
+                                        Fragment(
+                                            vmId,
+                                            oldFragment.tick,
+                                            oldFragment.flops + flops,
+                                            oldFragment.duration + traceInterval,
+                                            cpuUsage,
+                                            cores
+                                        )
+                                    } else {
+                                        val fragment =
+                                            Fragment(
+                                                vmId,
+                                                timestamp,
+                                                flops,
+                                                traceInterval,
+                                                cpuUsage,
+                                                cores
+                                            )
+                                        if (last != null) {
+                                            yield(last!!)
+                                        }
+                                        fragment
+                                    }
+                                }
+                            }
+                    }
+
+                    if (last != null) {
+                        yield(last!!)
+                    }
+                }
+
+                var maxTime = Long.MIN_VALUE
+                flopsFragments.filter { it.tick in begin until end }.forEach { fragment ->
+                    allFragments.add(fragment)
+                    maxTime = max(maxTime, fragment.tick)
+                }
+
+                if (minTime in begin until end) {
+                    val metaRecord = GenericData.Record(metaSchema)
+                    metaRecord.put("id", vmId)
+                    metaRecord.put("submissionTime", minTime)
+                    metaRecord.put("endTime", maxTime)
+                    metaRecord.put("maxCores", maxCores)
+                    metaRecord.put("requiredMemory", requiredMemory)
+                    metaWriter.write(metaRecord)
+                }
+            }
+
+        return allFragments
+    }
+}
+
+/**
+ * Conversion of the Bitbrains public trace.
+ */
+class BitbrainsConversion : TraceConversion("Bitbrains") {
+    override fun read(
+        traceDirectory: File,
+        metaSchema: Schema,
+        metaWriter: ParquetWriter<GenericData.Record>
+    ): MutableList<Fragment> {
+        val timestampCol = 0
+        val cpuUsageCol = 3
+        val coreCol = 1
+        val provisionedMemoryCol = 5
+        val traceInterval = 5 * 60 * 1000L
+
+        val allFragments = mutableListOf<Fragment>()
+
+        traceDirectory.walk()
+            .filterNot { it.isDirectory }
+            .filter { it.extension == "csv" || it.extension == "txt" }
+            .toList()
+            .forEach { vmFile ->
+                println(vmFile)
+
+                var vmId = ""
+                var maxCores = -1
+                var requiredMemory = -1L
+                var cores: Int
+                var minTime = Long.MAX_VALUE
+
+                val flopsFragments = sequence {
+                    var last: Fragment? = null
+
+                    BufferedReader(FileReader(vmFile)).use { reader ->
+                        reader.lineSequence()
+                            .drop(1)
+                            .chunked(128)
+                            .forEach { lines ->
+                                for (line in lines) {
+                                    // Ignore comments in the trace
+                                    if (line.startsWith("#") || line.isBlank()) {
+                                        continue
+                                    }
+
+                                    val values = line.split(";\t")
+
+                                    vmId = vmFile.name
+
+                                    val timestamp = (values[timestampCol].trim().toLong() - 5 * 60) * 1000L
+
+                                    cores = values[coreCol].trim().toInt()
+                                    val provisionedMemory = values[provisionedMemoryCol].trim().toDouble() // KB
+                                    requiredMemory = max(requiredMemory, (provisionedMemory / 1000).toLong())
+                                    maxCores = max(maxCores, cores)
+                                    minTime = min(minTime, timestamp)
+                                    val cpuUsage = values[cpuUsageCol].trim().toDouble() // MHz
+
+                                    val flops: Long = (cpuUsage * 5 * 60).toLong()
+
+                                    last = if (last != null && last!!.flops == 0L && flops == 0L) {
+                                        val oldFragment = last!!
+                                        Fragment(
+                                            vmId,
+                                            oldFragment.tick,
+                                            oldFragment.flops + flops,
+                                            oldFragment.duration + traceInterval,
+                                            cpuUsage,
+                                            cores
+                                        )
+                                    } else {
+                                        val fragment =
+                                            Fragment(
+                                                vmId,
+                                                timestamp,
+                                                flops,
+                                                traceInterval,
+                                                cpuUsage,
+                                                cores
+                                            )
+                                        if (last != null) {
+                                            yield(last!!)
+                                        }
+                                        fragment
+                                    }
+                                }
+                            }
+                    }
+
+                    if (last != null) {
+                        yield(last!!)
+                    }
+                }
+
+                var maxTime = Long.MIN_VALUE
+                flopsFragments.forEach { fragment ->
+                    allFragments.add(fragment)
+                    maxTime = max(maxTime, fragment.tick)
+                }
+
+                val metaRecord = GenericData.Record(metaSchema)
+                metaRecord.put("id", vmId)
+                metaRecord.put("submissionTime", minTime)
+                metaRecord.put("endTime", maxTime)
+                metaRecord.put("maxCores", maxCores)
+                metaRecord.put("requiredMemory", requiredMemory)
+                metaWriter.write(metaRecord)
+            }
+
+        return allFragments
+    }
+}
+
+/**
+ * Conversion of the Azure public VM trace.
+ */
+class AzureConversion : TraceConversion("Azure") {
+    private val seed by option(help = "seed for trace sampling")
+        .long()
+        .default(0)
+
+    override fun read(
+        traceDirectory: File,
+        metaSchema: Schema,
+        metaWriter: ParquetWriter<GenericData.Record>
+    ): MutableList<Fragment> {
+        val random = Random(seed)
+        val fraction = 0.01
+
+        // Read VM table
+        val vmIdTableCol = 0
+        val coreTableCol = 9
+        val provisionedMemoryTableCol = 10
+
+        var vmId: String
+        var cores: Int
+        var requiredMemory: Long
+
+        val vmIds = mutableSetOf<String>()
+        val vmIdToMetadata = mutableMapOf<String, VmInfo>()
+
+        BufferedReader(FileReader(File(traceDirectory, "vmtable.csv"))).use { reader ->
+            reader.lineSequence()
+                .chunked(1024)
+                .forEach { lines ->
+                    for (line in lines) {
+                        // Ignore comments in the trace
+                        if (line.startsWith("#") || line.isBlank()) {
+                            continue
+                        }
+                        // Sample only a fraction of the VMs
+                        if (random.nextDouble() > fraction) {
+                            continue
+                        }
+
+                        val values = line.split(",")
+
+                        // Exclude VMs with a large number of cores (not specified exactly)
+                        if (values[coreTableCol].contains(">")) {
+                            continue
+                        }
+
+                        vmId = values[vmIdTableCol].trim()
+                        cores = values[coreTableCol].trim().toInt()
+                        requiredMemory = values[provisionedMemoryTableCol].trim().toInt() * 1_000L // GB -> MB
+
+                        vmIds.add(vmId)
+                        vmIdToMetadata[vmId] = VmInfo(cores, requiredMemory, Long.MAX_VALUE, -1L)
+                    }
+                }
+        }
+
+        // Read VM metric reading files
+        val timestampCol = 0
+        val vmIdCol = 1
+        val cpuUsageCol = 4
+        val traceInterval = 5 * 60 * 1000L
+
+        val vmIdToFragments = mutableMapOf<String, MutableList<Fragment>>()
+        val vmIdToLastFragment = mutableMapOf<String, Fragment?>()
+        val allFragments = mutableListOf<Fragment>()
+
+        for (i in ProgressBar.wrap((1..195).toList(), "Reading Trace")) {
+            val readingsFile = File(File(traceDirectory, "readings"), "readings-$i.csv")
+            var timestamp: Long
+            var cpuUsage: Double
+
+            BufferedReader(FileReader(readingsFile)).use { reader ->
+                reader.lineSequence()
+                    .chunked(128)
+                    .forEach { lines ->
+                        for (line in lines) {
+                            // Ignore comments in the trace
+                            if (line.startsWith("#") || line.isBlank()) {
+                                continue
+                            }
+
+                            val values = line.split(",")
+                            vmId = values[vmIdCol].trim()
+
+                            // Ignore readings for VMs not in the sample
+                            if (!vmIds.contains(vmId)) {
+                                continue
+                            }
+
+                            timestamp = values[timestampCol].trim().toLong() * 1000L
+                            vmIdToMetadata[vmId]!!.minTime = min(vmIdToMetadata[vmId]!!.minTime, timestamp)
+                            cpuUsage = values[cpuUsageCol].trim().toDouble() * 3_000 // MHz
+                            vmIdToMetadata[vmId]!!.maxTime = max(vmIdToMetadata[vmId]!!.maxTime, timestamp)
+
+                            val flops: Long = (cpuUsage * 5 * 60).toLong()
+                            val lastFragment = vmIdToLastFragment[vmId]
+
+                            vmIdToLastFragment[vmId] =
+                                if (lastFragment != null && lastFragment.flops == 0L && flops == 0L) {
+                                    Fragment(
+                                        vmId,
+                                        lastFragment.tick,
+                                        lastFragment.flops + flops,
+                                        lastFragment.duration + traceInterval,
+                                        cpuUsage,
+                                        vmIdToMetadata[vmId]!!.cores
+                                    )
+                                } else {
+                                    val fragment =
+                                        Fragment(
+                                            vmId,
+                                            timestamp,
+                                            flops,
+                                            traceInterval,
+                                            cpuUsage,
+                                            vmIdToMetadata[vmId]!!.cores
+                                        )
+                                    if (lastFragment != null) {
+                                        if (vmIdToFragments[vmId] == null) {
+                                            vmIdToFragments[vmId] = mutableListOf()
+                                        }
+                                        vmIdToFragments[vmId]!!.add(lastFragment)
+                                        allFragments.add(lastFragment)
+                                    }
+                                    fragment
+                                }
+                        }
+                    }
+            }
+        }
+
+        for (entry in vmIdToLastFragment) {
+            if (entry.value != null) {
+                if (vmIdToFragments[entry.key] == null) {
+                    vmIdToFragments[entry.key] = mutableListOf()
+                }
+                vmIdToFragments[entry.key]!!.add(entry.value!!)
+            }
+        }
+
+        println("Read ${vmIdToLastFragment.size} VMs")
+
+        for (entry in vmIdToMetadata) {
+            val metaRecord = GenericData.Record(metaSchema)
+            metaRecord.put("id", entry.key)
+            metaRecord.put("submissionTime", entry.value.minTime)
+            metaRecord.put("endTime", entry.value.maxTime)
+            println("${entry.value.minTime} - ${entry.value.maxTime}")
+            metaRecord.put("maxCores", entry.value.cores)
+            metaRecord.put("requiredMemory", entry.value.requiredMemory)
+            metaWriter.write(metaRecord)
+        }
+
+        return allFragments
+    }
+}
+
+data class Fragment(
+    val id: String,
+    val tick: Long,
+    val flops: Long,
+    val duration: Long,
+    val usage: Double,
+    val cores: Int
+)
+
+class VmInfo(val cores: Int, val requiredMemory: Long, var minTime: Long, var maxTime: Long)
+
+/**
+ * A script to convert a trace in text format into a Parquet trace.
+ */
+fun main(args: Array<String>): Unit = TraceConverterCli().main(args)
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/VmPlacementReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/VmPlacementReader.kt
new file mode 100644
index 00000000..fb641f1b
--- /dev/null
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/VmPlacementReader.kt
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.experiments.capelin.trace
+
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
+import com.fasterxml.jackson.module.kotlin.readValue
+import java.io.InputStream
+
+/**
+ * A parser for the JSON VM placement data files used for the SC20 paper.
+ *
+ * @param input The input stream to read from.
+ * @param mapper The Jackson object mapper to use.
+ */
+public class VmPlacementReader(
+    private val input: InputStream,
+    private val mapper: ObjectMapper = jacksonObjectMapper()
+) : AutoCloseable {
+    /**
+     * Read the VM placements.
+     */
+    fun read(): Map<String, String> {
+        return mapper.readValue<Map<String, String>>(input)
+            .mapKeys { "vm__workload__${it.key}.txt" }
+            .mapValues { it.value.split("/")[1] } // Clusters have format XX0 / X00
+    }
+
+    override fun close() {
+        input.close()
+    }
+}
-- 
cgit v1.2.3