summaryrefslogtreecommitdiff
path: root/opendc-trace/opendc-trace-opendc
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2021-09-21 12:04:15 +0200
committerGitHub <noreply@github.com>2021-09-21 12:04:15 +0200
commit322d91db03a7d74a00ec623ce624f979c0b77c03 (patch)
tree73201888564accde4cfa107f4ffdb15e9f93d45c /opendc-trace/opendc-trace-opendc
parent453c25c4b453fa0af26bebbd8863abfb79218119 (diff)
parent68ef3700ed2f69bcf0118bb69eda71e6b1f4d54f (diff)
merge: Add support for trace writing
This pull request extends the trace API to support writing new traces. - Unify columns of different tables - Support column lookup via index - Use index lookup in trace loader - Add property for describing partition keys - Simplify TraceFormat SPI interface - Add support for writing traces **Breaking API Changes** - `TraceFormat` SPI interface has been redesigned.
Diffstat (limited to 'opendc-trace/opendc-trace-opendc')
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTable.kt53
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableReader.kt82
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt123
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTable.kt53
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableReader.kt82
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt106
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTrace.kt49
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt97
-rw-r--r--opendc-trace/opendc-trace-opendc/src/test/kotlin/org/opendc/trace/opendc/OdcVmTraceFormatTest.kt49
9 files changed, 416 insertions, 278 deletions
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTable.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTable.kt
deleted file mode 100644
index bee4ba7e..00000000
--- a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTable.kt
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.trace.opendc
-
-import org.apache.avro.generic.GenericRecord
-import org.opendc.trace.*
-import org.opendc.trace.util.parquet.LocalParquetReader
-import java.nio.file.Path
-
-/**
- * The resource state [Table] in the OpenDC virtual machine trace format.
- */
-internal class OdcVmResourceStateTable(private val path: Path) : Table {
- override val name: String = TABLE_RESOURCE_STATES
- override val isSynthetic: Boolean = false
-
- override val columns: List<TableColumn<*>> = listOf(
- RESOURCE_STATE_ID,
- RESOURCE_STATE_TIMESTAMP,
- RESOURCE_STATE_DURATION,
- RESOURCE_STATE_CPU_COUNT,
- RESOURCE_STATE_CPU_USAGE,
- )
-
- override fun newReader(): TableReader {
- val reader = LocalParquetReader<GenericRecord>(path.resolve("trace.parquet"))
- return OdcVmResourceStateTableReader(reader)
- }
-
- override fun newReader(partition: String): TableReader {
- throw IllegalArgumentException("Unknown partition $partition")
- }
-}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableReader.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableReader.kt
index df3bcfa6..b5043f82 100644
--- a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableReader.kt
+++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableReader.kt
@@ -55,54 +55,46 @@ internal class OdcVmResourceStateTableReader(private val reader: LocalParquetRea
return record != null
}
- override fun hasColumn(column: TableColumn<*>): Boolean {
- return when (column) {
- RESOURCE_STATE_ID -> true
- RESOURCE_STATE_TIMESTAMP -> true
- RESOURCE_STATE_DURATION -> true
- RESOURCE_STATE_CPU_COUNT -> true
- RESOURCE_STATE_CPU_USAGE -> true
- else -> false
- }
+ override fun resolve(column: TableColumn<*>): Int = columns[column] ?: -1
+
+ override fun isNull(index: Int): Boolean {
+ check(index in 0..columns.size) { "Invalid column index" }
+ return get(index) == null
}
- override fun <T> get(column: TableColumn<T>): T {
+ override fun get(index: Int): Any? {
val record = checkNotNull(record) { "Reader in invalid state" }
- @Suppress("UNCHECKED_CAST")
- val res: Any = when (column) {
- RESOURCE_STATE_ID -> record[COL_ID].toString()
- RESOURCE_STATE_TIMESTAMP -> Instant.ofEpochMilli(record[COL_TIMESTAMP] as Long)
- RESOURCE_STATE_DURATION -> Duration.ofMillis(record[COL_DURATION] as Long)
- RESOURCE_STATE_CPU_COUNT -> getInt(RESOURCE_STATE_CPU_COUNT)
- RESOURCE_STATE_CPU_USAGE -> getDouble(RESOURCE_STATE_CPU_USAGE)
+ return when (index) {
+ COL_ID -> record[AVRO_COL_ID].toString()
+ COL_TIMESTAMP -> Instant.ofEpochMilli(record[AVRO_COL_TIMESTAMP] as Long)
+ COL_DURATION -> Duration.ofMillis(record[AVRO_COL_DURATION] as Long)
+ COL_CPU_COUNT -> getInt(index)
+ COL_CPU_USAGE -> getDouble(index)
else -> throw IllegalArgumentException("Invalid column")
}
-
- @Suppress("UNCHECKED_CAST")
- return res as T
}
- override fun getBoolean(column: TableColumn<Boolean>): Boolean {
+ override fun getBoolean(index: Int): Boolean {
throw IllegalArgumentException("Invalid column")
}
- override fun getInt(column: TableColumn<Int>): Int {
+ override fun getInt(index: Int): Int {
val record = checkNotNull(record) { "Reader in invalid state" }
- return when (column) {
- RESOURCE_STATE_CPU_COUNT -> record[COL_CPU_COUNT] as Int
+ return when (index) {
+ COL_CPU_COUNT -> record[AVRO_COL_CPU_COUNT] as Int
else -> throw IllegalArgumentException("Invalid column")
}
}
- override fun getLong(column: TableColumn<Long>): Long {
+ override fun getLong(index: Int): Long {
throw IllegalArgumentException("Invalid column")
}
- override fun getDouble(column: TableColumn<Double>): Double {
+ override fun getDouble(index: Int): Double {
val record = checkNotNull(record) { "Reader in invalid state" }
- return when (column) {
- RESOURCE_STATE_CPU_USAGE -> (record[COL_CPU_USAGE] as Number).toDouble()
+ return when (index) {
+ COL_CPU_USAGE -> (record[AVRO_COL_CPU_USAGE] as Number).toDouble()
else -> throw IllegalArgumentException("Invalid column")
}
}
@@ -118,20 +110,34 @@ internal class OdcVmResourceStateTableReader(private val reader: LocalParquetRea
*/
private fun initColumns(schema: Schema) {
try {
- COL_ID = schema.getField("id").pos()
- COL_TIMESTAMP = (schema.getField("timestamp") ?: schema.getField("time")).pos()
- COL_DURATION = schema.getField("duration").pos()
- COL_CPU_COUNT = (schema.getField("cpu_count") ?: schema.getField("cores")).pos()
- COL_CPU_USAGE = (schema.getField("cpu_usage") ?: schema.getField("cpuUsage")).pos()
+ AVRO_COL_ID = schema.getField("id").pos()
+ AVRO_COL_TIMESTAMP = (schema.getField("timestamp") ?: schema.getField("time")).pos()
+ AVRO_COL_DURATION = schema.getField("duration").pos()
+ AVRO_COL_CPU_COUNT = (schema.getField("cpu_count") ?: schema.getField("cores")).pos()
+ AVRO_COL_CPU_USAGE = (schema.getField("cpu_usage") ?: schema.getField("cpuUsage")).pos()
} catch (e: NullPointerException) {
// This happens when the field we are trying to access does not exist
throw IllegalArgumentException("Invalid schema", e)
}
}
- private var COL_ID = -1
- private var COL_TIMESTAMP = -1
- private var COL_DURATION = -1
- private var COL_CPU_COUNT = -1
- private var COL_CPU_USAGE = -1
+ private var AVRO_COL_ID = -1
+ private var AVRO_COL_TIMESTAMP = -1
+ private var AVRO_COL_DURATION = -1
+ private var AVRO_COL_CPU_COUNT = -1
+ private var AVRO_COL_CPU_USAGE = -1
+
+ private val COL_ID = 0
+ private val COL_TIMESTAMP = 1
+ private val COL_DURATION = 2
+ private val COL_CPU_COUNT = 3
+ private val COL_CPU_USAGE = 4
+
+ private val columns = mapOf(
+ RESOURCE_ID to COL_ID,
+ RESOURCE_STATE_TIMESTAMP to COL_TIMESTAMP,
+ RESOURCE_STATE_DURATION to COL_DURATION,
+ RESOURCE_CPU_COUNT to COL_CPU_COUNT,
+ RESOURCE_STATE_CPU_USAGE to COL_CPU_USAGE,
+ )
}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt
new file mode 100644
index 00000000..15a8cb85
--- /dev/null
+++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.trace.opendc
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericRecord
+import org.apache.avro.generic.GenericRecordBuilder
+import org.apache.parquet.hadoop.ParquetWriter
+import org.opendc.trace.*
+import java.time.Duration
+import java.time.Instant
+
+/**
+ * A [TableWriter] implementation for the OpenDC virtual machine trace format.
+ */
+internal class OdcVmResourceStateTableWriter(
+ private val writer: ParquetWriter<GenericRecord>,
+ private val schema: Schema
+) : TableWriter {
+ /**
+ * The current builder for the record that is being written.
+ */
+ private var builder: GenericRecordBuilder? = null
+
+ /**
+ * The fields belonging to the resource state schema.
+ */
+ private val fields = schema.fields
+
+ override fun startRow() {
+ builder = GenericRecordBuilder(schema)
+ }
+
+ override fun endRow() {
+ val builder = checkNotNull(builder) { "No active row" }
+ this.builder = null
+
+ val record = builder.build()
+ val id = record[COL_ID] as String
+ val timestamp = record[COL_TIMESTAMP] as Long
+
+ check(lastId != id || timestamp >= lastTimestamp) { "Records need to be ordered by (id, timestamp)" }
+
+ writer.write(builder.build())
+
+ lastId = id
+ lastTimestamp = timestamp
+ }
+
+ override fun resolve(column: TableColumn<*>): Int {
+ val schema = schema
+ return when (column) {
+ RESOURCE_ID -> schema.getField("id").pos()
+ RESOURCE_STATE_TIMESTAMP -> (schema.getField("timestamp") ?: schema.getField("time")).pos()
+ RESOURCE_STATE_DURATION -> schema.getField("duration").pos()
+ RESOURCE_CPU_COUNT -> (schema.getField("cpu_count") ?: schema.getField("cores")).pos()
+ RESOURCE_STATE_CPU_USAGE -> (schema.getField("cpu_usage") ?: schema.getField("cpuUsage")).pos()
+ else -> -1
+ }
+ }
+
+ override fun set(index: Int, value: Any) {
+ val builder = checkNotNull(builder) { "No active row" }
+
+ builder.set(
+ fields[index],
+ when (index) {
+ COL_TIMESTAMP -> (value as Instant).toEpochMilli()
+ COL_DURATION -> (value as Duration).toMillis()
+ else -> value
+ }
+ )
+ }
+
+ override fun setBoolean(index: Int, value: Boolean) = set(index, value)
+
+ override fun setInt(index: Int, value: Int) = set(index, value)
+
+ override fun setLong(index: Int, value: Long) = set(index, value)
+
+ override fun setDouble(index: Int, value: Double) = set(index, value)
+
+ override fun flush() {
+ // Not available
+ }
+
+ override fun close() {
+ writer.close()
+ }
+
+ /**
+ * Last column values that are used to check for correct partitioning.
+ */
+ private var lastId: String? = null
+ private var lastTimestamp: Long = Long.MIN_VALUE
+
+ /**
+ * Columns with special behavior.
+ */
+ private val COL_ID = resolve(RESOURCE_ID)
+ private val COL_TIMESTAMP = resolve(RESOURCE_STATE_TIMESTAMP)
+ private val COL_DURATION = resolve(RESOURCE_STATE_DURATION)
+}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTable.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTable.kt
deleted file mode 100644
index b1456560..00000000
--- a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTable.kt
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.trace.opendc
-
-import org.apache.avro.generic.GenericRecord
-import org.opendc.trace.*
-import org.opendc.trace.util.parquet.LocalParquetReader
-import java.nio.file.Path
-
-/**
- * The resource [Table] for the OpenDC virtual machine trace format.
- */
-internal class OdcVmResourceTable(private val path: Path) : Table {
- override val name: String = TABLE_RESOURCES
- override val isSynthetic: Boolean = false
-
- override val columns: List<TableColumn<*>> = listOf(
- RESOURCE_ID,
- RESOURCE_START_TIME,
- RESOURCE_STOP_TIME,
- RESOURCE_CPU_COUNT,
- RESOURCE_MEM_CAPACITY,
- )
-
- override fun newReader(): TableReader {
- val reader = LocalParquetReader<GenericRecord>(path.resolve("meta.parquet"))
- return OdcVmResourceTableReader(reader)
- }
-
- override fun newReader(partition: String): TableReader {
- throw IllegalArgumentException("Unknown partition $partition")
- }
-}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableReader.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableReader.kt
index c52da62d..d93929aa 100644
--- a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableReader.kt
+++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableReader.kt
@@ -54,56 +54,48 @@ internal class OdcVmResourceTableReader(private val reader: LocalParquetReader<G
return record != null
}
- override fun hasColumn(column: TableColumn<*>): Boolean {
- return when (column) {
- RESOURCE_ID -> true
- RESOURCE_START_TIME -> true
- RESOURCE_STOP_TIME -> true
- RESOURCE_CPU_COUNT -> true
- RESOURCE_MEM_CAPACITY -> true
- else -> false
- }
+ override fun resolve(column: TableColumn<*>): Int = columns[column] ?: -1
+
+ override fun isNull(index: Int): Boolean {
+ check(index in 0..columns.size) { "Invalid column index" }
+ return get(index) == null
}
- override fun <T> get(column: TableColumn<T>): T {
+ override fun get(index: Int): Any? {
val record = checkNotNull(record) { "Reader in invalid state" }
- @Suppress("UNCHECKED_CAST")
- val res: Any = when (column) {
- RESOURCE_ID -> record[COL_ID].toString()
- RESOURCE_START_TIME -> Instant.ofEpochMilli(record[COL_START_TIME] as Long)
- RESOURCE_STOP_TIME -> Instant.ofEpochMilli(record[COL_STOP_TIME] as Long)
- RESOURCE_CPU_COUNT -> getInt(RESOURCE_CPU_COUNT)
- RESOURCE_MEM_CAPACITY -> getDouble(RESOURCE_MEM_CAPACITY)
+ return when (index) {
+ COL_ID -> record[AVRO_COL_ID].toString()
+ COL_START_TIME -> Instant.ofEpochMilli(record[AVRO_COL_START_TIME] as Long)
+ COL_STOP_TIME -> Instant.ofEpochMilli(record[AVRO_COL_STOP_TIME] as Long)
+ COL_CPU_COUNT -> getInt(index)
+ COL_MEM_CAPACITY -> getDouble(index)
else -> throw IllegalArgumentException("Invalid column")
}
-
- @Suppress("UNCHECKED_CAST")
- return res as T
}
- override fun getBoolean(column: TableColumn<Boolean>): Boolean {
+ override fun getBoolean(index: Int): Boolean {
throw IllegalArgumentException("Invalid column")
}
- override fun getInt(column: TableColumn<Int>): Int {
+ override fun getInt(index: Int): Int {
val record = checkNotNull(record) { "Reader in invalid state" }
- return when (column) {
- RESOURCE_CPU_COUNT -> record[COL_CPU_COUNT] as Int
+ return when (index) {
+ COL_CPU_COUNT -> record[AVRO_COL_CPU_COUNT] as Int
else -> throw IllegalArgumentException("Invalid column")
}
}
- override fun getLong(column: TableColumn<Long>): Long {
+ override fun getLong(index: Int): Long {
throw IllegalArgumentException("Invalid column")
}
- override fun getDouble(column: TableColumn<Double>): Double {
+ override fun getDouble(index: Int): Double {
val record = checkNotNull(record) { "Reader in invalid state" }
- return when (column) {
- RESOURCE_MEM_CAPACITY -> (record[COL_MEM_CAPACITY] as Number).toDouble()
+ return when (index) {
+ COL_MEM_CAPACITY -> (record[AVRO_COL_MEM_CAPACITY] as Number).toDouble()
else -> throw IllegalArgumentException("Invalid column")
}
}
@@ -119,20 +111,34 @@ internal class OdcVmResourceTableReader(private val reader: LocalParquetReader<G
*/
private fun initColumns(schema: Schema) {
try {
- COL_ID = schema.getField("id").pos()
- COL_START_TIME = (schema.getField("start_time") ?: schema.getField("submissionTime")).pos()
- COL_STOP_TIME = (schema.getField("stop_time") ?: schema.getField("endTime")).pos()
- COL_CPU_COUNT = (schema.getField("cpu_count") ?: schema.getField("maxCores")).pos()
- COL_MEM_CAPACITY = (schema.getField("mem_capacity") ?: schema.getField("requiredMemory")).pos()
+ AVRO_COL_ID = schema.getField("id").pos()
+ AVRO_COL_START_TIME = (schema.getField("start_time") ?: schema.getField("submissionTime")).pos()
+ AVRO_COL_STOP_TIME = (schema.getField("stop_time") ?: schema.getField("endTime")).pos()
+ AVRO_COL_CPU_COUNT = (schema.getField("cpu_count") ?: schema.getField("maxCores")).pos()
+ AVRO_COL_MEM_CAPACITY = (schema.getField("mem_capacity") ?: schema.getField("requiredMemory")).pos()
} catch (e: NullPointerException) {
// This happens when the field we are trying to access does not exist
throw IllegalArgumentException("Invalid schema")
}
}
- private var COL_ID = -1
- private var COL_START_TIME = -1
- private var COL_STOP_TIME = -1
- private var COL_CPU_COUNT = -1
- private var COL_MEM_CAPACITY = -1
+ private var AVRO_COL_ID = -1
+ private var AVRO_COL_START_TIME = -1
+ private var AVRO_COL_STOP_TIME = -1
+ private var AVRO_COL_CPU_COUNT = -1
+ private var AVRO_COL_MEM_CAPACITY = -1
+
+ private val COL_ID = 0
+ private val COL_START_TIME = 1
+ private val COL_STOP_TIME = 2
+ private val COL_CPU_COUNT = 3
+ private val COL_MEM_CAPACITY = 4
+
+ private val columns = mapOf(
+ RESOURCE_ID to COL_ID,
+ RESOURCE_START_TIME to COL_START_TIME,
+ RESOURCE_STOP_TIME to COL_STOP_TIME,
+ RESOURCE_CPU_COUNT to COL_CPU_COUNT,
+ RESOURCE_MEM_CAPACITY to COL_MEM_CAPACITY,
+ )
}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt
new file mode 100644
index 00000000..9cc6ca7d
--- /dev/null
+++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.trace.opendc
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericRecord
+import org.apache.avro.generic.GenericRecordBuilder
+import org.apache.parquet.hadoop.ParquetWriter
+import org.opendc.trace.*
+import java.time.Instant
+import kotlin.math.roundToLong
+
+/**
+ * A [TableWriter] implementation for the OpenDC virtual machine trace format.
+ */
+internal class OdcVmResourceTableWriter(
+ private val writer: ParquetWriter<GenericRecord>,
+ private val schema: Schema
+) : TableWriter {
+ /**
+ * The current builder for the record that is being written.
+ */
+ private var builder: GenericRecordBuilder? = null
+
+ /**
+ * The fields belonging to the resource schema.
+ */
+ private val fields = schema.fields
+
+ override fun startRow() {
+ builder = GenericRecordBuilder(schema)
+ }
+
+ override fun endRow() {
+ val builder = checkNotNull(builder) { "No active row" }
+ this.builder = null
+ writer.write(builder.build())
+ }
+
+ override fun resolve(column: TableColumn<*>): Int {
+ val schema = schema
+ return when (column) {
+ RESOURCE_ID -> schema.getField("id").pos()
+ RESOURCE_START_TIME -> (schema.getField("start_time") ?: schema.getField("submissionTime")).pos()
+ RESOURCE_STOP_TIME -> (schema.getField("stop_time") ?: schema.getField("endTime")).pos()
+ RESOURCE_CPU_COUNT -> (schema.getField("cpu_count") ?: schema.getField("maxCores")).pos()
+ RESOURCE_MEM_CAPACITY -> (schema.getField("mem_capacity") ?: schema.getField("requiredMemory")).pos()
+ else -> -1
+ }
+ }
+
+ override fun set(index: Int, value: Any) {
+ val builder = checkNotNull(builder) { "No active row" }
+ builder.set(
+ fields[index],
+ when (index) {
+ COL_START_TIME, COL_STOP_TIME -> (value as Instant).toEpochMilli()
+ COL_MEM_CAPACITY -> (value as Double).roundToLong()
+ else -> value
+ }
+ )
+ }
+
+ override fun setBoolean(index: Int, value: Boolean) = set(index, value)
+
+ override fun setInt(index: Int, value: Int) = set(index, value)
+
+ override fun setLong(index: Int, value: Long) = set(index, value)
+
+ override fun setDouble(index: Int, value: Double) = set(index, value)
+
+ override fun flush() {
+ // Not available
+ }
+
+ override fun close() {
+ writer.close()
+ }
+
+ /**
+ * Columns with special behavior.
+ */
+ private val COL_START_TIME = resolve(RESOURCE_START_TIME)
+ private val COL_STOP_TIME = resolve(RESOURCE_STOP_TIME)
+ private val COL_MEM_CAPACITY = resolve(RESOURCE_MEM_CAPACITY)
+}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTrace.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTrace.kt
deleted file mode 100644
index 3e5029b4..00000000
--- a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTrace.kt
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2021 AtLarge Research
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-package org.opendc.trace.opendc
-
-import org.opendc.trace.TABLE_RESOURCES
-import org.opendc.trace.TABLE_RESOURCE_STATES
-import org.opendc.trace.Table
-import org.opendc.trace.Trace
-import java.nio.file.Path
-
-/**
- * A [Trace] in the OpenDC virtual machine trace format.
- */
-public class OdcVmTrace internal constructor(private val path: Path) : Trace {
- override val tables: List<String> = listOf(TABLE_RESOURCES, TABLE_RESOURCE_STATES)
-
- override fun containsTable(name: String): Boolean =
- name == TABLE_RESOURCES || name == TABLE_RESOURCE_STATES
-
- override fun getTable(name: String): Table? {
- return when (name) {
- TABLE_RESOURCES -> OdcVmResourceTable(path)
- TABLE_RESOURCE_STATES -> OdcVmResourceStateTable(path)
- else -> null
- }
- }
-
- override fun toString(): String = "OdcVmTrace[$path]"
-}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt
index 8edba725..9b32f8fd 100644
--- a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt
+++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt
@@ -24,11 +24,18 @@ package org.opendc.trace.opendc
import org.apache.avro.Schema
import org.apache.avro.SchemaBuilder
+import org.apache.avro.generic.GenericRecord
+import org.apache.parquet.avro.AvroParquetWriter
+import org.apache.parquet.hadoop.ParquetFileWriter
+import org.apache.parquet.hadoop.metadata.CompressionCodecName
+import org.opendc.trace.*
+import org.opendc.trace.spi.TableDetails
import org.opendc.trace.spi.TraceFormat
+import org.opendc.trace.util.parquet.LocalOutputFile
+import org.opendc.trace.util.parquet.LocalParquetReader
import org.opendc.trace.util.parquet.TIMESTAMP_SCHEMA
-import java.net.URL
-import java.nio.file.Paths
-import kotlin.io.path.exists
+import java.nio.file.Files
+import java.nio.file.Path
/**
* A [TraceFormat] implementation of the OpenDC virtual machine trace format.
@@ -39,13 +46,83 @@ public class OdcVmTraceFormat : TraceFormat {
*/
override val name: String = "opendc-vm"
- /**
- * Open a Bitbrains Parquet trace.
- */
- override fun open(url: URL): OdcVmTrace {
- val path = Paths.get(url.toURI())
- require(path.exists()) { "URL $url does not exist" }
- return OdcVmTrace(path)
+ override fun create(path: Path) {
+ // Construct directory containing the trace files
+ Files.createDirectory(path)
+
+ val tables = getTables(path)
+
+ for (table in tables) {
+ val writer = newWriter(path, table)
+ writer.close()
+ }
+ }
+
+ override fun getTables(path: Path): List<String> = listOf(TABLE_RESOURCES, TABLE_RESOURCE_STATES)
+
+ override fun getDetails(path: Path, table: String): TableDetails {
+ return when (table) {
+ TABLE_RESOURCES -> TableDetails(
+ listOf(
+ RESOURCE_ID,
+ RESOURCE_START_TIME,
+ RESOURCE_STOP_TIME,
+ RESOURCE_CPU_COUNT,
+ RESOURCE_MEM_CAPACITY,
+ )
+ )
+ TABLE_RESOURCE_STATES -> TableDetails(
+ listOf(
+ RESOURCE_ID,
+ RESOURCE_STATE_TIMESTAMP,
+ RESOURCE_STATE_DURATION,
+ RESOURCE_CPU_COUNT,
+ RESOURCE_STATE_CPU_USAGE,
+ ),
+ listOf(RESOURCE_ID, RESOURCE_STATE_TIMESTAMP)
+ )
+ else -> throw IllegalArgumentException("Table $table not supported")
+ }
+ }
+
+ override fun newReader(path: Path, table: String): TableReader {
+ return when (table) {
+ TABLE_RESOURCES -> {
+ val reader = LocalParquetReader<GenericRecord>(path.resolve("meta.parquet"))
+ OdcVmResourceTableReader(reader)
+ }
+ TABLE_RESOURCE_STATES -> {
+ val reader = LocalParquetReader<GenericRecord>(path.resolve("trace.parquet"))
+ OdcVmResourceStateTableReader(reader)
+ }
+ else -> throw IllegalArgumentException("Table $table not supported")
+ }
+ }
+
+ override fun newWriter(path: Path, table: String): TableWriter {
+ return when (table) {
+ TABLE_RESOURCES -> {
+ val schema = RESOURCES_SCHEMA
+ val writer = AvroParquetWriter.builder<GenericRecord>(LocalOutputFile(path.resolve("meta.parquet")))
+ .withSchema(schema)
+ .withCompressionCodec(CompressionCodecName.ZSTD)
+ .withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
+ .build()
+ OdcVmResourceTableWriter(writer, schema)
+ }
+ TABLE_RESOURCE_STATES -> {
+ val schema = RESOURCE_STATES_SCHEMA
+ val writer = AvroParquetWriter.builder<GenericRecord>(LocalOutputFile(path.resolve("trace.parquet")))
+ .withSchema(schema)
+ .withCompressionCodec(CompressionCodecName.ZSTD)
+ .withDictionaryEncoding("id", true)
+ .withBloomFilterEnabled("id", true)
+ .withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
+ .build()
+ OdcVmResourceStateTableWriter(writer, schema)
+ }
+ else -> throw IllegalArgumentException("Table $table not supported")
+ }
}
public companion object {
diff --git a/opendc-trace/opendc-trace-opendc/src/test/kotlin/org/opendc/trace/opendc/OdcVmTraceFormatTest.kt b/opendc-trace/opendc-trace-opendc/src/test/kotlin/org/opendc/trace/opendc/OdcVmTraceFormatTest.kt
index 42eb369e..bfe0f881 100644
--- a/opendc-trace/opendc-trace-opendc/src/test/kotlin/org/opendc/trace/opendc/OdcVmTraceFormatTest.kt
+++ b/opendc-trace/opendc-trace-opendc/src/test/kotlin/org/opendc/trace/opendc/OdcVmTraceFormatTest.kt
@@ -29,8 +29,7 @@ import org.junit.jupiter.api.assertThrows
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.ValueSource
import org.opendc.trace.*
-import java.io.File
-import java.net.URL
+import java.nio.file.Paths
/**
* Test suite for the [OdcVmTraceFormat] implementation.
@@ -39,52 +38,30 @@ internal class OdcVmTraceFormatTest {
private val format = OdcVmTraceFormat()
@Test
- fun testTraceExists() {
- val url = File("src/test/resources/trace-v2.1").toURI().toURL()
- assertDoesNotThrow { format.open(url) }
- }
-
- @Test
- fun testTraceDoesNotExists() {
- val url = File("src/test/resources/trace-v2.1").toURI().toURL()
- assertThrows<IllegalArgumentException> {
- format.open(URL(url.toString() + "help"))
- }
- }
-
- @Test
fun testTables() {
- val url = File("src/test/resources/trace-v2.1").toURI().toURL()
- val trace = format.open(url)
+ val path = Paths.get("src/test/resources/trace-v2.1")
- assertEquals(listOf(TABLE_RESOURCES, TABLE_RESOURCE_STATES), trace.tables)
+ assertEquals(listOf(TABLE_RESOURCES, TABLE_RESOURCE_STATES), format.getTables(path))
}
@Test
fun testTableExists() {
- val url = File("src/test/resources/trace-v2.1").toURI().toURL()
- val table = format.open(url).getTable(TABLE_RESOURCE_STATES)
+ val path = Paths.get("src/test/resources/trace-v2.1")
- assertNotNull(table)
- assertDoesNotThrow { table!!.newReader() }
+ assertDoesNotThrow { format.getDetails(path, TABLE_RESOURCE_STATES) }
}
@Test
fun testTableDoesNotExist() {
- val url = File("src/test/resources/trace-v2.1").toURI().toURL()
- val trace = format.open(url)
-
- assertFalse(trace.containsTable("test"))
- assertNull(trace.getTable("test"))
+ val path = Paths.get("src/test/resources/trace-v2.1")
+ assertThrows<IllegalArgumentException> { format.getDetails(path, "test") }
}
@ParameterizedTest
@ValueSource(strings = ["trace-v2.0", "trace-v2.1"])
fun testResources(name: String) {
- val url = File("src/test/resources/$name").toURI().toURL()
- val trace = format.open(url)
-
- val reader = trace.getTable(TABLE_RESOURCES)!!.newReader()
+ val path = Paths.get("src/test/resources/$name")
+ val reader = format.newReader(path, TABLE_RESOURCES)
assertAll(
{ assertTrue(reader.nextRow()) },
@@ -104,14 +81,12 @@ internal class OdcVmTraceFormatTest {
@ParameterizedTest
@ValueSource(strings = ["trace-v2.0", "trace-v2.1"])
fun testSmoke(name: String) {
- val url = File("src/test/resources/$name").toURI().toURL()
- val trace = format.open(url)
-
- val reader = trace.getTable(TABLE_RESOURCE_STATES)!!.newReader()
+ val path = Paths.get("src/test/resources/$name")
+ val reader = format.newReader(path, TABLE_RESOURCE_STATES)
assertAll(
{ assertTrue(reader.nextRow()) },
- { assertEquals("1019", reader.get(RESOURCE_STATE_ID)) },
+ { assertEquals("1019", reader.get(RESOURCE_ID)) },
{ assertEquals(1376314846, reader.get(RESOURCE_STATE_TIMESTAMP).epochSecond) },
{ assertEquals(0.0, reader.getDouble(RESOURCE_STATE_CPU_USAGE), 0.01) }
)