diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-06-09 10:31:41 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-06-09 10:31:41 +0200 |
| commit | d146814bbbb86bfcb19ccb94250424703e9179e5 (patch) | |
| tree | bf20f51b434d56e60ad013568ac1a32b912a3b5e /opendc-trace/opendc-trace-gwf/src | |
| parent | 61b6550d7a476ab1aae45a5b9385dfd6ca4f6b6f (diff) | |
| parent | 9d759c9bc987965fae8b0c16c000772c546bf3a2 (diff) | |
merge: Introduce schema for trace API (#88)
This pull request updates the OpenDC trace API to support proper specification
of a schema of the tables exposed by the traces. This functionality makes it easier
for the API consumer to understand the types exposed by the API.
## Implementation Notes :hammer_and_pick:
* Introduce type system for trace API
* Add benchmarks for odcvm trace format
* Add benchmarks for Azure trace format
* Add conformance suite for OpenDC trace API
## External Dependencies :four_leaf_clover:
* N/A
## Breaking API Changes :warning:
* Removal of typed `TableColumn`. Instead, `TableColumn` instances are now
used to describe the columns belonging to some table.
* `TableReader` and `TableWriter` do not support accessing arbitrary objects
anymore. Instead, only the types supported by the type system are exposed.
Diffstat (limited to 'opendc-trace/opendc-trace-gwf/src')
3 files changed, 120 insertions, 44 deletions
diff --git a/opendc-trace/opendc-trace-gwf/src/main/kotlin/org/opendc/trace/gwf/GwfTaskTableReader.kt b/opendc-trace/opendc-trace-gwf/src/main/kotlin/org/opendc/trace/gwf/GwfTaskTableReader.kt index 42a9469e..f9a171e9 100644 --- a/opendc-trace/opendc-trace-gwf/src/main/kotlin/org/opendc/trace/gwf/GwfTaskTableReader.kt +++ b/opendc-trace/opendc-trace-gwf/src/main/kotlin/org/opendc/trace/gwf/GwfTaskTableReader.kt @@ -27,23 +27,34 @@ import com.fasterxml.jackson.dataformat.csv.CsvParser import com.fasterxml.jackson.dataformat.csv.CsvSchema import org.opendc.trace.* import org.opendc.trace.conv.* +import org.opendc.trace.util.convertTo import java.time.Duration import java.time.Instant +import java.util.* import java.util.regex.Pattern /** * A [TableReader] implementation for the GWF format. */ internal class GwfTaskTableReader(private val parser: CsvParser) : TableReader { + /** + * A flag to indicate whether a single row has been read already. + */ + private var isStarted = false + init { parser.schema = schema } override fun nextRow(): Boolean { + if (!isStarted) { + isStarted = true + } + // Reset the row state reset() - if (!nextStart()) { + if (parser.isClosed || !nextStart()) { return false } @@ -68,51 +79,106 @@ internal class GwfTaskTableReader(private val parser: CsvParser) : TableReader { return true } - override fun resolve(column: TableColumn<*>): Int = columns[column] ?: -1 + override fun resolve(name: String): Int { + return when (name) { + TASK_ID -> COL_JOB_ID + TASK_WORKFLOW_ID -> COL_WORKFLOW_ID + TASK_SUBMIT_TIME -> COL_SUBMIT_TIME + TASK_RUNTIME -> COL_RUNTIME + TASK_ALLOC_NCPUS -> COL_NPROC + TASK_REQ_NCPUS -> COL_REQ_NPROC + TASK_PARENTS -> COL_DEPS + else -> -1 + } + } override fun isNull(index: Int): Boolean { - check(index in 0..columns.size) { "Invalid column" } + require(index in 0..COL_DEPS) { "Invalid column" } return false } - override fun get(index: Int): Any? { + override fun getBoolean(index: Int): Boolean { + throw IllegalArgumentException("Invalid column") + } + + override fun getInt(index: Int): Int { + checkActive() + return when (index) { + COL_REQ_NPROC -> reqNProcs + COL_NPROC -> nProcs + else -> throw IllegalArgumentException("Invalid column") + } + } + + override fun getLong(index: Int): Long { + throw IllegalArgumentException("Invalid column") + } + + override fun getFloat(index: Int): Float { + throw IllegalArgumentException("Invalid column") + } + + override fun getDouble(index: Int): Double { + throw IllegalArgumentException("Invalid column") + } + + override fun getString(index: Int): String? { + checkActive() return when (index) { COL_JOB_ID -> jobId COL_WORKFLOW_ID -> workflowId - COL_SUBMIT_TIME -> submitTime - COL_RUNTIME -> runtime - COL_REQ_NPROC -> getInt(index) - COL_NPROC -> getInt(index) - COL_DEPS -> dependencies else -> throw IllegalArgumentException("Invalid column") } } - override fun getBoolean(index: Int): Boolean { + override fun getUUID(index: Int): UUID? { throw IllegalArgumentException("Invalid column") } - override fun getInt(index: Int): Int { + override fun getInstant(index: Int): Instant? { + checkActive() return when (index) { - COL_REQ_NPROC -> reqNProcs - COL_NPROC -> nProcs + COL_SUBMIT_TIME -> submitTime else -> throw IllegalArgumentException("Invalid column") } } - override fun getLong(index: Int): Long { + override fun getDuration(index: Int): Duration? { + checkActive() + return when (index) { + COL_RUNTIME -> runtime + else -> throw IllegalArgumentException("Invalid column") + } + } + + override fun <T> getList(index: Int, elementType: Class<T>): List<T>? { throw IllegalArgumentException("Invalid column") } - override fun getDouble(index: Int): Double { + override fun <K, V> getMap(index: Int, keyType: Class<K>, valueType: Class<V>): Map<K, V>? { throw IllegalArgumentException("Invalid column") } + override fun <T> getSet(index: Int, elementType: Class<T>): Set<T>? { + checkActive() + return when (index) { + COL_DEPS -> TYPE_DEPS.convertTo(dependencies, elementType) + else -> throw IllegalArgumentException("Invalid column") + } + } + override fun close() { parser.close() } /** + * Helper method to check if the reader is active. + */ + private fun checkActive() { + check(isStarted && !parser.isClosed) { "No active row. Did you call nextRow()?" } + } + + /** * The pattern used to parse the parents. */ private val pattern = Pattern.compile("\\s+") @@ -180,15 +246,7 @@ internal class GwfTaskTableReader(private val parser: CsvParser) : TableReader { private val COL_REQ_NPROC = 5 private val COL_DEPS = 6 - private val columns = mapOf( - TASK_ID to COL_JOB_ID, - TASK_WORKFLOW_ID to COL_WORKFLOW_ID, - TASK_SUBMIT_TIME to COL_SUBMIT_TIME, - TASK_RUNTIME to COL_RUNTIME, - TASK_ALLOC_NCPUS to COL_NPROC, - TASK_REQ_NCPUS to COL_REQ_NPROC, - TASK_PARENTS to COL_DEPS - ) + private val TYPE_DEPS = TableColumnType.Set(TableColumnType.String) companion object { /** diff --git a/opendc-trace/opendc-trace-gwf/src/main/kotlin/org/opendc/trace/gwf/GwfTraceFormat.kt b/opendc-trace/opendc-trace-gwf/src/main/kotlin/org/opendc/trace/gwf/GwfTraceFormat.kt index 8d9eab82..ca63b624 100644 --- a/opendc-trace/opendc-trace-gwf/src/main/kotlin/org/opendc/trace/gwf/GwfTraceFormat.kt +++ b/opendc-trace/opendc-trace-gwf/src/main/kotlin/org/opendc/trace/gwf/GwfTraceFormat.kt @@ -56,21 +56,20 @@ public class GwfTraceFormat : TraceFormat { return when (table) { TABLE_TASKS -> TableDetails( listOf( - TASK_WORKFLOW_ID, - TASK_ID, - TASK_SUBMIT_TIME, - TASK_RUNTIME, - TASK_REQ_NCPUS, - TASK_ALLOC_NCPUS, - TASK_PARENTS, - ), - listOf(TASK_WORKFLOW_ID) + TableColumn(TASK_WORKFLOW_ID, TableColumnType.String), + TableColumn(TASK_ID, TableColumnType.String), + TableColumn(TASK_SUBMIT_TIME, TableColumnType.Instant), + TableColumn(TASK_RUNTIME, TableColumnType.Duration), + TableColumn(TASK_REQ_NCPUS, TableColumnType.Int), + TableColumn(TASK_ALLOC_NCPUS, TableColumnType.Int), + TableColumn(TASK_PARENTS, TableColumnType.Set(TableColumnType.String)), + ) ) else -> throw IllegalArgumentException("Table $table not supported") } } - override fun newReader(path: Path, table: String, projection: List<TableColumn<*>>?): TableReader { + override fun newReader(path: Path, table: String, projection: List<String>?): TableReader { return when (table) { TABLE_TASKS -> GwfTaskTableReader(factory.createParser(path.toFile())) else -> throw IllegalArgumentException("Table $table not supported") diff --git a/opendc-trace/opendc-trace-gwf/src/test/kotlin/org/opendc/trace/gwf/GwfTraceFormatTest.kt b/opendc-trace/opendc-trace-gwf/src/test/kotlin/org/opendc/trace/gwf/GwfTraceFormatTest.kt index 411d45d0..a8c3a715 100644 --- a/opendc-trace/opendc-trace-gwf/src/test/kotlin/org/opendc/trace/gwf/GwfTraceFormatTest.kt +++ b/opendc-trace/opendc-trace-gwf/src/test/kotlin/org/opendc/trace/gwf/GwfTraceFormatTest.kt @@ -24,7 +24,10 @@ package org.opendc.trace.gwf import org.junit.jupiter.api.* import org.junit.jupiter.api.Assertions.* +import org.opendc.trace.TableColumn +import org.opendc.trace.TableReader import org.opendc.trace.conv.* +import org.opendc.trace.testkit.TableReaderTestKit import java.nio.file.Paths import java.time.Duration import java.time.Instant @@ -32,6 +35,7 @@ import java.time.Instant /** * Test suite for the [GwfTraceFormat] class. */ +@DisplayName("GWF TraceFormat") internal class GwfTraceFormatTest { private val format = GwfTraceFormat() @@ -62,11 +66,11 @@ internal class GwfTraceFormatTest { assertAll( { assertTrue(reader.nextRow()) }, - { assertEquals("0", reader.get(TASK_WORKFLOW_ID)) }, - { assertEquals("1", reader.get(TASK_ID)) }, - { assertEquals(Instant.ofEpochSecond(16), reader.get(TASK_SUBMIT_TIME)) }, - { assertEquals(Duration.ofSeconds(11), reader.get(TASK_RUNTIME)) }, - { assertEquals(emptySet<String>(), reader.get(TASK_PARENTS)) }, + { assertEquals("0", reader.getString(TASK_WORKFLOW_ID)) }, + { assertEquals("1", reader.getString(TASK_ID)) }, + { assertEquals(Instant.ofEpochSecond(16), reader.getInstant(TASK_SUBMIT_TIME)) }, + { assertEquals(Duration.ofSeconds(11), reader.getDuration(TASK_RUNTIME)) }, + { assertEquals(emptySet<String>(), reader.getSet(TASK_PARENTS, String::class.java)) }, ) } @@ -81,11 +85,26 @@ internal class GwfTraceFormatTest { assertAll( { assertTrue(reader.nextRow()) }, - { assertEquals("0", reader.get(TASK_WORKFLOW_ID)) }, - { assertEquals("7", reader.get(TASK_ID)) }, - { assertEquals(Instant.ofEpochSecond(87), reader.get(TASK_SUBMIT_TIME)) }, - { assertEquals(Duration.ofSeconds(11), reader.get(TASK_RUNTIME)) }, - { assertEquals(setOf("4", "5", "6"), reader.get(TASK_PARENTS)) }, + { assertEquals("0", reader.getString(TASK_WORKFLOW_ID)) }, + { assertEquals("7", reader.getString(TASK_ID)) }, + { assertEquals(Instant.ofEpochSecond(87), reader.getInstant(TASK_SUBMIT_TIME)) }, + { assertEquals(Duration.ofSeconds(11), reader.getDuration(TASK_RUNTIME)) }, + { assertEquals(setOf("4", "5", "6"), reader.getSet(TASK_PARENTS, String::class.java)) }, ) } + + @DisplayName("TableReader for Tasks") + @Nested + inner class TasksTableReaderTest : TableReaderTestKit() { + override lateinit var reader: TableReader + override lateinit var columns: List<TableColumn> + + @BeforeEach + fun setUp() { + val path = Paths.get(checkNotNull(GwfTraceFormatTest::class.java.getResource("/trace.gwf")).toURI()) + + columns = format.getDetails(path, TABLE_TASKS).columns + reader = format.newReader(path, TABLE_TASKS, null) + } + } } |
