diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2021-09-21 12:04:15 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-09-21 12:04:15 +0200 |
| commit | 322d91db03a7d74a00ec623ce624f979c0b77c03 (patch) | |
| tree | 73201888564accde4cfa107f4ffdb15e9f93d45c /opendc-trace/opendc-trace-wtf/src | |
| parent | 453c25c4b453fa0af26bebbd8863abfb79218119 (diff) | |
| parent | 68ef3700ed2f69bcf0118bb69eda71e6b1f4d54f (diff) | |
merge: Add support for trace writing
This pull request extends the trace API to support writing new traces.
- Unify columns of different tables
- Support column lookup via index
- Use index lookup in trace loader
- Add property for describing partition keys
- Simplify TraceFormat SPI interface
- Add support for writing traces
**Breaking API Changes**
- `TraceFormat` SPI interface has been redesigned.
Diffstat (limited to 'opendc-trace/opendc-trace-wtf/src')
5 files changed, 147 insertions, 201 deletions
diff --git a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTable.kt b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTable.kt deleted file mode 100644 index 74202718..00000000 --- a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTable.kt +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2021 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.trace.wtf - -import org.apache.avro.generic.GenericRecord -import org.opendc.trace.* -import org.opendc.trace.util.parquet.LocalParquetReader -import java.nio.file.Path - -/** - * A [Table] containing the tasks in a GWF trace. - */ -internal class WtfTaskTable(private val path: Path) : Table { - override val name: String = TABLE_TASKS - - override val isSynthetic: Boolean = false - - override val columns: List<TableColumn<*>> = listOf( - TASK_ID, - TASK_WORKFLOW_ID, - TASK_SUBMIT_TIME, - TASK_WAIT_TIME, - TASK_RUNTIME, - TASK_REQ_NCPUS, - TASK_PARENTS, - TASK_CHILDREN, - TASK_GROUP_ID, - TASK_USER_ID - ) - - override fun newReader(): TableReader { - val reader = LocalParquetReader<GenericRecord>(path.resolve("tasks/schema-1.0")) - return WtfTaskTableReader(reader) - } - - override fun newReader(partition: String): TableReader { - throw IllegalArgumentException("Invalid partition $partition") - } - - override fun toString(): String = "WtfTaskTable" -} diff --git a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTableReader.kt b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTableReader.kt index 5e2463f8..45ec25dd 100644 --- a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTableReader.kt +++ b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTableReader.kt @@ -22,6 +22,7 @@ package org.opendc.trace.wtf +import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.opendc.trace.* import org.opendc.trace.util.parquet.LocalParquetReader @@ -37,73 +38,126 @@ internal class WtfTaskTableReader(private val reader: LocalParquetReader<Generic */ private var record: GenericRecord? = null + /** + * A flag to indicate that the columns have been initialized. + */ + private var hasInitializedColumns = false + override fun nextRow(): Boolean { - record = reader.read() + val record = reader.read() + this.record = record + + if (!hasInitializedColumns && record != null) { + initColumns(record.schema) + hasInitializedColumns = true + } + return record != null } - override fun hasColumn(column: TableColumn<*>): Boolean { - return when (column) { - TASK_ID -> true - TASK_WORKFLOW_ID -> true - TASK_SUBMIT_TIME -> true - TASK_WAIT_TIME -> true - TASK_RUNTIME -> true - TASK_REQ_NCPUS -> true - TASK_PARENTS -> true - TASK_CHILDREN -> true - TASK_GROUP_ID -> true - TASK_USER_ID -> true - else -> false - } + override fun resolve(column: TableColumn<*>): Int = columns[column] ?: -1 + + override fun isNull(index: Int): Boolean { + check(index in 0..columns.size) { "Invalid column index" } + return get(index) == null } - override fun <T> get(column: TableColumn<T>): T { + override fun get(index: Int): Any? { val record = checkNotNull(record) { "Reader in invalid state" } - @Suppress("UNCHECKED_CAST") - val res: Any = when (column) { - TASK_ID -> (record["id"] as Long).toString() - TASK_WORKFLOW_ID -> (record["workflow_id"] as Long).toString() - TASK_SUBMIT_TIME -> Instant.ofEpochMilli(record["ts_submit"] as Long) - TASK_WAIT_TIME -> Duration.ofMillis(record["wait_time"] as Long) - TASK_RUNTIME -> Duration.ofMillis(record["runtime"] as Long) - TASK_REQ_NCPUS -> (record["resource_amount_requested"] as Double).toInt() - TASK_PARENTS -> (record["parents"] as ArrayList<GenericRecord>).map { it["item"].toString() }.toSet() - TASK_CHILDREN -> (record["children"] as ArrayList<GenericRecord>).map { it["item"].toString() }.toSet() - TASK_GROUP_ID -> record["group_id"] - TASK_USER_ID -> record["user_id"] + return when (index) { + COL_ID -> (record[AVRO_COL_ID] as Long).toString() + COL_WORKFLOW_ID -> (record[AVRO_COL_WORKFLOW_ID] as Long).toString() + COL_SUBMIT_TIME -> Instant.ofEpochMilli(record[AVRO_COL_SUBMIT_TIME] as Long) + COL_WAIT_TIME -> Duration.ofMillis(record[AVRO_COL_WAIT_TIME] as Long) + COL_RUNTIME -> Duration.ofMillis(record[AVRO_COL_RUNTIME] as Long) + COL_REQ_NCPUS, COL_GROUP_ID, COL_USER_ID -> getInt(index) + COL_PARENTS -> (record[AVRO_COL_PARENTS] as ArrayList<GenericRecord>).map { it["item"].toString() }.toSet() + COL_CHILDREN -> (record[AVRO_COL_CHILDREN] as ArrayList<GenericRecord>).map { it["item"].toString() }.toSet() else -> throw IllegalArgumentException("Invalid column") } - - @Suppress("UNCHECKED_CAST") - return res as T } - override fun getBoolean(column: TableColumn<Boolean>): Boolean { + override fun getBoolean(index: Int): Boolean { throw IllegalArgumentException("Invalid column") } - override fun getInt(column: TableColumn<Int>): Int { + override fun getInt(index: Int): Int { val record = checkNotNull(record) { "Reader in invalid state" } - return when (column) { - TASK_REQ_NCPUS -> (record["resource_amount_requested"] as Double).toInt() - TASK_GROUP_ID -> record["group_id"] as Int - TASK_USER_ID -> record["user_id"] as Int + return when (index) { + COL_REQ_NCPUS -> (record[AVRO_COL_REQ_NCPUS] as Double).toInt() + COL_GROUP_ID -> record[AVRO_COL_GROUP_ID] as Int + COL_USER_ID -> record[AVRO_COL_USER_ID] as Int else -> throw IllegalArgumentException("Invalid column") } } - override fun getLong(column: TableColumn<Long>): Long { + override fun getLong(index: Int): Long { throw IllegalArgumentException("Invalid column") } - override fun getDouble(column: TableColumn<Double>): Double { + override fun getDouble(index: Int): Double { throw IllegalArgumentException("Invalid column") } override fun close() { reader.close() } + + /** + * Initialize the columns for the reader based on [schema]. + */ + private fun initColumns(schema: Schema) { + try { + AVRO_COL_ID = schema.getField("id").pos() + AVRO_COL_WORKFLOW_ID = schema.getField("workflow_id").pos() + AVRO_COL_SUBMIT_TIME = schema.getField("ts_submit").pos() + AVRO_COL_WAIT_TIME = schema.getField("wait_time").pos() + AVRO_COL_RUNTIME = schema.getField("runtime").pos() + AVRO_COL_REQ_NCPUS = schema.getField("resource_amount_requested").pos() + AVRO_COL_PARENTS = schema.getField("parents").pos() + AVRO_COL_CHILDREN = schema.getField("children").pos() + AVRO_COL_GROUP_ID = schema.getField("group_id").pos() + AVRO_COL_USER_ID = schema.getField("user_id").pos() + } catch (e: NullPointerException) { + // This happens when the field we are trying to access does not exist + throw IllegalArgumentException("Invalid schema", e) + } + } + + private var AVRO_COL_ID = -1 + private var AVRO_COL_WORKFLOW_ID = -1 + private var AVRO_COL_SUBMIT_TIME = -1 + private var AVRO_COL_WAIT_TIME = -1 + private var AVRO_COL_RUNTIME = -1 + private var AVRO_COL_REQ_NCPUS = -1 + private var AVRO_COL_PARENTS = -1 + private var AVRO_COL_CHILDREN = -1 + private var AVRO_COL_GROUP_ID = -1 + private var AVRO_COL_USER_ID = -1 + + private val COL_ID = 0 + private val COL_WORKFLOW_ID = 1 + private val COL_SUBMIT_TIME = 2 + private val COL_WAIT_TIME = 3 + private val COL_RUNTIME = 4 + private val COL_REQ_NCPUS = 5 + private val COL_PARENTS = 6 + private val COL_CHILDREN = 7 + private val COL_GROUP_ID = 8 + private val COL_USER_ID = 9 + + private val columns = mapOf( + TASK_ID to COL_ID, + TASK_WORKFLOW_ID to COL_WORKFLOW_ID, + TASK_SUBMIT_TIME to COL_SUBMIT_TIME, + TASK_WAIT_TIME to COL_WAIT_TIME, + TASK_RUNTIME to COL_RUNTIME, + TASK_REQ_NCPUS to COL_REQ_NCPUS, + TASK_PARENTS to COL_PARENTS, + TASK_CHILDREN to COL_CHILDREN, + TASK_GROUP_ID to COL_GROUP_ID, + TASK_USER_ID to COL_USER_ID, + ) } diff --git a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTrace.kt b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTrace.kt deleted file mode 100644 index a755a107..00000000 --- a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTrace.kt +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2021 AtLarge Research - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package org.opendc.trace.wtf - -import org.opendc.trace.TABLE_TASKS -import org.opendc.trace.Table -import org.opendc.trace.Trace -import java.nio.file.Path - -/** - * [Trace] implementation for the WTF format. - */ -public class WtfTrace internal constructor(private val path: Path) : Trace { - override val tables: List<String> = listOf(TABLE_TASKS) - - override fun containsTable(name: String): Boolean = TABLE_TASKS == name - - override fun getTable(name: String): Table? { - if (!containsTable(name)) { - return null - } - - return WtfTaskTable(path) - } - - override fun toString(): String = "WtfTrace[$path]" -} diff --git a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt index 781cb335..ef88d295 100644 --- a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt +++ b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt @@ -22,10 +22,12 @@ package org.opendc.trace.wtf +import org.apache.avro.generic.GenericRecord +import org.opendc.trace.* +import org.opendc.trace.spi.TableDetails import org.opendc.trace.spi.TraceFormat -import java.net.URL -import java.nio.file.Paths -import kotlin.io.path.exists +import org.opendc.trace.util.parquet.LocalParquetReader +import java.nio.file.Path /** * A [TraceFormat] implementation for the Workflow Trace Format (WTF). @@ -33,9 +35,44 @@ import kotlin.io.path.exists public class WtfTraceFormat : TraceFormat { override val name: String = "wtf" - override fun open(url: URL): WtfTrace { - val path = Paths.get(url.toURI()) - require(path.exists()) { "URL $url does not exist" } - return WtfTrace(path) + override fun create(path: Path) { + throw UnsupportedOperationException("Writing not supported for this format") + } + + override fun getTables(path: Path): List<String> = listOf(TABLE_TASKS) + + override fun getDetails(path: Path, table: String): TableDetails { + return when (table) { + TABLE_TASKS -> TableDetails( + listOf( + TASK_ID, + TASK_WORKFLOW_ID, + TASK_SUBMIT_TIME, + TASK_WAIT_TIME, + TASK_RUNTIME, + TASK_REQ_NCPUS, + TASK_PARENTS, + TASK_CHILDREN, + TASK_GROUP_ID, + TASK_USER_ID + ), + listOf(TASK_SUBMIT_TIME) + ) + else -> throw IllegalArgumentException("Table $table not supported") + } + } + + override fun newReader(path: Path, table: String): TableReader { + return when (table) { + TABLE_TASKS -> { + val reader = LocalParquetReader<GenericRecord>(path.resolve("tasks/schema-1.0")) + WtfTaskTableReader(reader) + } + else -> throw IllegalArgumentException("Table $table not supported") + } + } + + override fun newWriter(path: Path, table: String): TableWriter { + throw UnsupportedOperationException("Writing not supported for this format") } } diff --git a/opendc-trace/opendc-trace-wtf/src/test/kotlin/org/opendc/trace/wtf/WtfTraceFormatTest.kt b/opendc-trace/opendc-trace-wtf/src/test/kotlin/org/opendc/trace/wtf/WtfTraceFormatTest.kt index b155f265..09c3703a 100644 --- a/opendc-trace/opendc-trace-wtf/src/test/kotlin/org/opendc/trace/wtf/WtfTraceFormatTest.kt +++ b/opendc-trace/opendc-trace-wtf/src/test/kotlin/org/opendc/trace/wtf/WtfTraceFormatTest.kt @@ -26,8 +26,7 @@ import org.junit.jupiter.api.Assertions.* import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows import org.opendc.trace.* -import java.io.File -import java.net.URL +import java.nio.file.Paths import java.time.Duration import java.time.Instant @@ -35,51 +34,25 @@ import java.time.Instant * Test suite for the [WtfTraceFormat] class. */ class WtfTraceFormatTest { - @Test - fun testTraceExists() { - val input = File("src/test/resources/wtf-trace").toURI().toURL() - val format = WtfTraceFormat() - org.junit.jupiter.api.assertDoesNotThrow { - format.open(input) - } - } - - @Test - fun testTraceDoesNotExists() { - val input = File("src/test/resources/wtf-trace").toURI().toURL() - val format = WtfTraceFormat() - assertThrows<IllegalArgumentException> { - format.open(URL(input.toString() + "help")) - } - } + private val format = WtfTraceFormat() @Test fun testTables() { - val input = File("src/test/resources/wtf-trace").toURI().toURL() - val format = WtfTraceFormat() - val trace = format.open(input) - - assertEquals(listOf(TABLE_TASKS), trace.tables) + val path = Paths.get("src/test/resources/wtf-trace") + assertEquals(listOf(TABLE_TASKS), format.getTables(path)) } @Test fun testTableExists() { - val input = File("src/test/resources/wtf-trace").toURI().toURL() - val format = WtfTraceFormat() - val table = format.open(input).getTable(TABLE_TASKS) - - assertNotNull(table) - org.junit.jupiter.api.assertDoesNotThrow { table!!.newReader() } + val path = Paths.get("src/test/resources/wtf-trace") + assertDoesNotThrow { format.getDetails(path, TABLE_TASKS) } } @Test fun testTableDoesNotExist() { - val input = File("src/test/resources/wtf-trace").toURI().toURL() - val format = WtfTraceFormat() - val trace = format.open(input) + val path = Paths.get("src/test/resources/wtf-trace") - assertFalse(trace.containsTable("test")) - assertNull(trace.getTable("test")) + assertThrows<IllegalArgumentException> { format.getDetails(path, "test") } } /** @@ -87,9 +60,8 @@ class WtfTraceFormatTest { */ @Test fun testTableReader() { - val input = File("src/test/resources/wtf-trace") - val trace = WtfTraceFormat().open(input.toURI().toURL()) - val reader = trace.getTable(TABLE_TASKS)!!.newReader() + val path = Paths.get("src/test/resources/wtf-trace") + val reader = format.newReader(path, TABLE_TASKS) assertAll( { assertTrue(reader.nextRow()) }, @@ -111,13 +83,4 @@ class WtfTraceFormatTest { reader.close() } - - @Test - fun testTableReaderPartition() { - val input = File("src/test/resources/wtf-trace").toURI().toURL() - val format = WtfTraceFormat() - val table = format.open(input).getTable(TABLE_TASKS)!! - - assertThrows<IllegalArgumentException> { table.newReader("test") } - } } |
