summaryrefslogtreecommitdiff
path: root/opendc-trace/opendc-trace-wtf/src
diff options
context:
space:
mode:
authorFabian Mastenbroek <mail.fabianm@gmail.com>2021-10-25 14:53:54 +0200
committerFabian Mastenbroek <mail.fabianm@gmail.com>2021-10-25 14:53:54 +0200
commitaa9b32f8cd1467e9718959f400f6777e5d71737d (patch)
treeb88bbede15108c6855d7f94ded4c7054df186a72 /opendc-trace/opendc-trace-wtf/src
parenteb0e0a3bc557c05a70eead388797ab850ea87366 (diff)
parentb7a71e5b4aa77b41ef41deec2ace42b67a5a13a7 (diff)
merge: Integrate v2.1 progress into public repository
This pull request integrates the changes planned for the v2.1 release of OpenDC into the public Github repository in order to sync the progress of both repositories.
Diffstat (limited to 'opendc-trace/opendc-trace-wtf/src')
-rw-r--r--opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTableReader.kt163
-rw-r--r--opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt78
-rw-r--r--opendc-trace/opendc-trace-wtf/src/main/resources/META-INF/services/org.opendc.trace.spi.TraceFormat1
-rw-r--r--opendc-trace/opendc-trace-wtf/src/test/kotlin/org/opendc/trace/wtf/WtfTraceFormatTest.kt86
-rw-r--r--opendc-trace/opendc-trace-wtf/src/test/resources/wtf-trace/tasks/schema-1.0/part.0.parquetbin0 -> 87475 bytes
5 files changed, 328 insertions, 0 deletions
diff --git a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTableReader.kt b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTableReader.kt
new file mode 100644
index 00000000..45ec25dd
--- /dev/null
+++ b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTaskTableReader.kt
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.trace.wtf
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericRecord
+import org.opendc.trace.*
+import org.opendc.trace.util.parquet.LocalParquetReader
+import java.time.Duration
+import java.time.Instant
+
+/**
+ * A [TableReader] implementation for the WTF format.
+ */
+internal class WtfTaskTableReader(private val reader: LocalParquetReader<GenericRecord>) : TableReader {
+ /**
+ * The current record.
+ */
+ private var record: GenericRecord? = null
+
+ /**
+ * A flag to indicate that the columns have been initialized.
+ */
+ private var hasInitializedColumns = false
+
+ override fun nextRow(): Boolean {
+ val record = reader.read()
+ this.record = record
+
+ if (!hasInitializedColumns && record != null) {
+ initColumns(record.schema)
+ hasInitializedColumns = true
+ }
+
+ return record != null
+ }
+
+ override fun resolve(column: TableColumn<*>): Int = columns[column] ?: -1
+
+ override fun isNull(index: Int): Boolean {
+ check(index in 0..columns.size) { "Invalid column index" }
+ return get(index) == null
+ }
+
+ override fun get(index: Int): Any? {
+ val record = checkNotNull(record) { "Reader in invalid state" }
+ @Suppress("UNCHECKED_CAST")
+ return when (index) {
+ COL_ID -> (record[AVRO_COL_ID] as Long).toString()
+ COL_WORKFLOW_ID -> (record[AVRO_COL_WORKFLOW_ID] as Long).toString()
+ COL_SUBMIT_TIME -> Instant.ofEpochMilli(record[AVRO_COL_SUBMIT_TIME] as Long)
+ COL_WAIT_TIME -> Duration.ofMillis(record[AVRO_COL_WAIT_TIME] as Long)
+ COL_RUNTIME -> Duration.ofMillis(record[AVRO_COL_RUNTIME] as Long)
+ COL_REQ_NCPUS, COL_GROUP_ID, COL_USER_ID -> getInt(index)
+ COL_PARENTS -> (record[AVRO_COL_PARENTS] as ArrayList<GenericRecord>).map { it["item"].toString() }.toSet()
+ COL_CHILDREN -> (record[AVRO_COL_CHILDREN] as ArrayList<GenericRecord>).map { it["item"].toString() }.toSet()
+ else -> throw IllegalArgumentException("Invalid column")
+ }
+ }
+
+ override fun getBoolean(index: Int): Boolean {
+ throw IllegalArgumentException("Invalid column")
+ }
+
+ override fun getInt(index: Int): Int {
+ val record = checkNotNull(record) { "Reader in invalid state" }
+
+ return when (index) {
+ COL_REQ_NCPUS -> (record[AVRO_COL_REQ_NCPUS] as Double).toInt()
+ COL_GROUP_ID -> record[AVRO_COL_GROUP_ID] as Int
+ COL_USER_ID -> record[AVRO_COL_USER_ID] as Int
+ else -> throw IllegalArgumentException("Invalid column")
+ }
+ }
+
+ override fun getLong(index: Int): Long {
+ throw IllegalArgumentException("Invalid column")
+ }
+
+ override fun getDouble(index: Int): Double {
+ throw IllegalArgumentException("Invalid column")
+ }
+
+ override fun close() {
+ reader.close()
+ }
+
+ /**
+ * Initialize the columns for the reader based on [schema].
+ */
+ private fun initColumns(schema: Schema) {
+ try {
+ AVRO_COL_ID = schema.getField("id").pos()
+ AVRO_COL_WORKFLOW_ID = schema.getField("workflow_id").pos()
+ AVRO_COL_SUBMIT_TIME = schema.getField("ts_submit").pos()
+ AVRO_COL_WAIT_TIME = schema.getField("wait_time").pos()
+ AVRO_COL_RUNTIME = schema.getField("runtime").pos()
+ AVRO_COL_REQ_NCPUS = schema.getField("resource_amount_requested").pos()
+ AVRO_COL_PARENTS = schema.getField("parents").pos()
+ AVRO_COL_CHILDREN = schema.getField("children").pos()
+ AVRO_COL_GROUP_ID = schema.getField("group_id").pos()
+ AVRO_COL_USER_ID = schema.getField("user_id").pos()
+ } catch (e: NullPointerException) {
+ // This happens when the field we are trying to access does not exist
+ throw IllegalArgumentException("Invalid schema", e)
+ }
+ }
+
+ private var AVRO_COL_ID = -1
+ private var AVRO_COL_WORKFLOW_ID = -1
+ private var AVRO_COL_SUBMIT_TIME = -1
+ private var AVRO_COL_WAIT_TIME = -1
+ private var AVRO_COL_RUNTIME = -1
+ private var AVRO_COL_REQ_NCPUS = -1
+ private var AVRO_COL_PARENTS = -1
+ private var AVRO_COL_CHILDREN = -1
+ private var AVRO_COL_GROUP_ID = -1
+ private var AVRO_COL_USER_ID = -1
+
+ private val COL_ID = 0
+ private val COL_WORKFLOW_ID = 1
+ private val COL_SUBMIT_TIME = 2
+ private val COL_WAIT_TIME = 3
+ private val COL_RUNTIME = 4
+ private val COL_REQ_NCPUS = 5
+ private val COL_PARENTS = 6
+ private val COL_CHILDREN = 7
+ private val COL_GROUP_ID = 8
+ private val COL_USER_ID = 9
+
+ private val columns = mapOf(
+ TASK_ID to COL_ID,
+ TASK_WORKFLOW_ID to COL_WORKFLOW_ID,
+ TASK_SUBMIT_TIME to COL_SUBMIT_TIME,
+ TASK_WAIT_TIME to COL_WAIT_TIME,
+ TASK_RUNTIME to COL_RUNTIME,
+ TASK_REQ_NCPUS to COL_REQ_NCPUS,
+ TASK_PARENTS to COL_PARENTS,
+ TASK_CHILDREN to COL_CHILDREN,
+ TASK_GROUP_ID to COL_GROUP_ID,
+ TASK_USER_ID to COL_USER_ID,
+ )
+}
diff --git a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt
new file mode 100644
index 00000000..ef88d295
--- /dev/null
+++ b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.trace.wtf
+
+import org.apache.avro.generic.GenericRecord
+import org.opendc.trace.*
+import org.opendc.trace.spi.TableDetails
+import org.opendc.trace.spi.TraceFormat
+import org.opendc.trace.util.parquet.LocalParquetReader
+import java.nio.file.Path
+
+/**
+ * A [TraceFormat] implementation for the Workflow Trace Format (WTF).
+ */
+public class WtfTraceFormat : TraceFormat {
+ override val name: String = "wtf"
+
+ override fun create(path: Path) {
+ throw UnsupportedOperationException("Writing not supported for this format")
+ }
+
+ override fun getTables(path: Path): List<String> = listOf(TABLE_TASKS)
+
+ override fun getDetails(path: Path, table: String): TableDetails {
+ return when (table) {
+ TABLE_TASKS -> TableDetails(
+ listOf(
+ TASK_ID,
+ TASK_WORKFLOW_ID,
+ TASK_SUBMIT_TIME,
+ TASK_WAIT_TIME,
+ TASK_RUNTIME,
+ TASK_REQ_NCPUS,
+ TASK_PARENTS,
+ TASK_CHILDREN,
+ TASK_GROUP_ID,
+ TASK_USER_ID
+ ),
+ listOf(TASK_SUBMIT_TIME)
+ )
+ else -> throw IllegalArgumentException("Table $table not supported")
+ }
+ }
+
+ override fun newReader(path: Path, table: String): TableReader {
+ return when (table) {
+ TABLE_TASKS -> {
+ val reader = LocalParquetReader<GenericRecord>(path.resolve("tasks/schema-1.0"))
+ WtfTaskTableReader(reader)
+ }
+ else -> throw IllegalArgumentException("Table $table not supported")
+ }
+ }
+
+ override fun newWriter(path: Path, table: String): TableWriter {
+ throw UnsupportedOperationException("Writing not supported for this format")
+ }
+}
diff --git a/opendc-trace/opendc-trace-wtf/src/main/resources/META-INF/services/org.opendc.trace.spi.TraceFormat b/opendc-trace/opendc-trace-wtf/src/main/resources/META-INF/services/org.opendc.trace.spi.TraceFormat
new file mode 100644
index 00000000..32da52ff
--- /dev/null
+++ b/opendc-trace/opendc-trace-wtf/src/main/resources/META-INF/services/org.opendc.trace.spi.TraceFormat
@@ -0,0 +1 @@
+org.opendc.trace.wtf.WtfTraceFormat
diff --git a/opendc-trace/opendc-trace-wtf/src/test/kotlin/org/opendc/trace/wtf/WtfTraceFormatTest.kt b/opendc-trace/opendc-trace-wtf/src/test/kotlin/org/opendc/trace/wtf/WtfTraceFormatTest.kt
new file mode 100644
index 00000000..09c3703a
--- /dev/null
+++ b/opendc-trace/opendc-trace-wtf/src/test/kotlin/org/opendc/trace/wtf/WtfTraceFormatTest.kt
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.trace.wtf
+
+import org.junit.jupiter.api.Assertions.*
+import org.junit.jupiter.api.Test
+import org.junit.jupiter.api.assertThrows
+import org.opendc.trace.*
+import java.nio.file.Paths
+import java.time.Duration
+import java.time.Instant
+
+/**
+ * Test suite for the [WtfTraceFormat] class.
+ */
+class WtfTraceFormatTest {
+ private val format = WtfTraceFormat()
+
+ @Test
+ fun testTables() {
+ val path = Paths.get("src/test/resources/wtf-trace")
+ assertEquals(listOf(TABLE_TASKS), format.getTables(path))
+ }
+
+ @Test
+ fun testTableExists() {
+ val path = Paths.get("src/test/resources/wtf-trace")
+ assertDoesNotThrow { format.getDetails(path, TABLE_TASKS) }
+ }
+
+ @Test
+ fun testTableDoesNotExist() {
+ val path = Paths.get("src/test/resources/wtf-trace")
+
+ assertThrows<IllegalArgumentException> { format.getDetails(path, "test") }
+ }
+
+ /**
+ * Smoke test for parsing WTF traces.
+ */
+ @Test
+ fun testTableReader() {
+ val path = Paths.get("src/test/resources/wtf-trace")
+ val reader = format.newReader(path, TABLE_TASKS)
+
+ assertAll(
+ { assertTrue(reader.nextRow()) },
+ { assertEquals("362334516345962206", reader.get(TASK_ID)) },
+ { assertEquals("1078341553348591493", reader.get(TASK_WORKFLOW_ID)) },
+ { assertEquals(Instant.ofEpochMilli(245604), reader.get(TASK_SUBMIT_TIME)) },
+ { assertEquals(Duration.ofMillis(8163), reader.get(TASK_RUNTIME)) },
+ { assertEquals(setOf("584055316413447529", "133113685133695608", "1008582348422865408"), reader.get(TASK_PARENTS)) },
+ )
+
+ assertAll(
+ { assertTrue(reader.nextRow()) },
+ { assertEquals("502010169100446658", reader.get(TASK_ID)) },
+ { assertEquals("1078341553348591493", reader.get(TASK_WORKFLOW_ID)) },
+ { assertEquals(Instant.ofEpochMilli(251325), reader.get(TASK_SUBMIT_TIME)) },
+ { assertEquals(Duration.ofMillis(8216), reader.get(TASK_RUNTIME)) },
+ { assertEquals(setOf("584055316413447529", "133113685133695608", "1008582348422865408"), reader.get(TASK_PARENTS)) },
+ )
+
+ reader.close()
+ }
+}
diff --git a/opendc-trace/opendc-trace-wtf/src/test/resources/wtf-trace/tasks/schema-1.0/part.0.parquet b/opendc-trace/opendc-trace-wtf/src/test/resources/wtf-trace/tasks/schema-1.0/part.0.parquet
new file mode 100644
index 00000000..d2044038
--- /dev/null
+++ b/opendc-trace/opendc-trace-wtf/src/test/resources/wtf-trace/tasks/schema-1.0/part.0.parquet
Binary files differ