summaryrefslogtreecommitdiff
path: root/opendc-trace/opendc-trace-opendc/src
diff options
context:
space:
mode:
Diffstat (limited to 'opendc-trace/opendc-trace-opendc/src')
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt123
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt106
-rw-r--r--opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt43
3 files changed, 272 insertions, 0 deletions
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt
new file mode 100644
index 00000000..15a8cb85
--- /dev/null
+++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.trace.opendc
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericRecord
+import org.apache.avro.generic.GenericRecordBuilder
+import org.apache.parquet.hadoop.ParquetWriter
+import org.opendc.trace.*
+import java.time.Duration
+import java.time.Instant
+
+/**
+ * A [TableWriter] implementation for the OpenDC virtual machine trace format.
+ */
+internal class OdcVmResourceStateTableWriter(
+ private val writer: ParquetWriter<GenericRecord>,
+ private val schema: Schema
+) : TableWriter {
+ /**
+ * The current builder for the record that is being written.
+ */
+ private var builder: GenericRecordBuilder? = null
+
+ /**
+ * The fields belonging to the resource state schema.
+ */
+ private val fields = schema.fields
+
+ override fun startRow() {
+ builder = GenericRecordBuilder(schema)
+ }
+
+ override fun endRow() {
+ val builder = checkNotNull(builder) { "No active row" }
+ this.builder = null
+
+ val record = builder.build()
+ val id = record[COL_ID] as String
+ val timestamp = record[COL_TIMESTAMP] as Long
+
+ check(lastId != id || timestamp >= lastTimestamp) { "Records need to be ordered by (id, timestamp)" }
+
+ writer.write(builder.build())
+
+ lastId = id
+ lastTimestamp = timestamp
+ }
+
+ override fun resolve(column: TableColumn<*>): Int {
+ val schema = schema
+ return when (column) {
+ RESOURCE_ID -> schema.getField("id").pos()
+ RESOURCE_STATE_TIMESTAMP -> (schema.getField("timestamp") ?: schema.getField("time")).pos()
+ RESOURCE_STATE_DURATION -> schema.getField("duration").pos()
+ RESOURCE_CPU_COUNT -> (schema.getField("cpu_count") ?: schema.getField("cores")).pos()
+ RESOURCE_STATE_CPU_USAGE -> (schema.getField("cpu_usage") ?: schema.getField("cpuUsage")).pos()
+ else -> -1
+ }
+ }
+
+ override fun set(index: Int, value: Any) {
+ val builder = checkNotNull(builder) { "No active row" }
+
+ builder.set(
+ fields[index],
+ when (index) {
+ COL_TIMESTAMP -> (value as Instant).toEpochMilli()
+ COL_DURATION -> (value as Duration).toMillis()
+ else -> value
+ }
+ )
+ }
+
+ override fun setBoolean(index: Int, value: Boolean) = set(index, value)
+
+ override fun setInt(index: Int, value: Int) = set(index, value)
+
+ override fun setLong(index: Int, value: Long) = set(index, value)
+
+ override fun setDouble(index: Int, value: Double) = set(index, value)
+
+ override fun flush() {
+ // Not available
+ }
+
+ override fun close() {
+ writer.close()
+ }
+
+ /**
+ * Last column values that are used to check for correct partitioning.
+ */
+ private var lastId: String? = null
+ private var lastTimestamp: Long = Long.MIN_VALUE
+
+ /**
+ * Columns with special behavior.
+ */
+ private val COL_ID = resolve(RESOURCE_ID)
+ private val COL_TIMESTAMP = resolve(RESOURCE_STATE_TIMESTAMP)
+ private val COL_DURATION = resolve(RESOURCE_STATE_DURATION)
+}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt
new file mode 100644
index 00000000..9cc6ca7d
--- /dev/null
+++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.trace.opendc
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericRecord
+import org.apache.avro.generic.GenericRecordBuilder
+import org.apache.parquet.hadoop.ParquetWriter
+import org.opendc.trace.*
+import java.time.Instant
+import kotlin.math.roundToLong
+
+/**
+ * A [TableWriter] implementation for the OpenDC virtual machine trace format.
+ */
+internal class OdcVmResourceTableWriter(
+ private val writer: ParquetWriter<GenericRecord>,
+ private val schema: Schema
+) : TableWriter {
+ /**
+ * The current builder for the record that is being written.
+ */
+ private var builder: GenericRecordBuilder? = null
+
+ /**
+ * The fields belonging to the resource schema.
+ */
+ private val fields = schema.fields
+
+ override fun startRow() {
+ builder = GenericRecordBuilder(schema)
+ }
+
+ override fun endRow() {
+ val builder = checkNotNull(builder) { "No active row" }
+ this.builder = null
+ writer.write(builder.build())
+ }
+
+ override fun resolve(column: TableColumn<*>): Int {
+ val schema = schema
+ return when (column) {
+ RESOURCE_ID -> schema.getField("id").pos()
+ RESOURCE_START_TIME -> (schema.getField("start_time") ?: schema.getField("submissionTime")).pos()
+ RESOURCE_STOP_TIME -> (schema.getField("stop_time") ?: schema.getField("endTime")).pos()
+ RESOURCE_CPU_COUNT -> (schema.getField("cpu_count") ?: schema.getField("maxCores")).pos()
+ RESOURCE_MEM_CAPACITY -> (schema.getField("mem_capacity") ?: schema.getField("requiredMemory")).pos()
+ else -> -1
+ }
+ }
+
+ override fun set(index: Int, value: Any) {
+ val builder = checkNotNull(builder) { "No active row" }
+ builder.set(
+ fields[index],
+ when (index) {
+ COL_START_TIME, COL_STOP_TIME -> (value as Instant).toEpochMilli()
+ COL_MEM_CAPACITY -> (value as Double).roundToLong()
+ else -> value
+ }
+ )
+ }
+
+ override fun setBoolean(index: Int, value: Boolean) = set(index, value)
+
+ override fun setInt(index: Int, value: Int) = set(index, value)
+
+ override fun setLong(index: Int, value: Long) = set(index, value)
+
+ override fun setDouble(index: Int, value: Double) = set(index, value)
+
+ override fun flush() {
+ // Not available
+ }
+
+ override fun close() {
+ writer.close()
+ }
+
+ /**
+ * Columns with special behavior.
+ */
+ private val COL_START_TIME = resolve(RESOURCE_START_TIME)
+ private val COL_STOP_TIME = resolve(RESOURCE_STOP_TIME)
+ private val COL_MEM_CAPACITY = resolve(RESOURCE_MEM_CAPACITY)
+}
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt
index 29818147..9b32f8fd 100644
--- a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt
+++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt
@@ -25,11 +25,16 @@ package org.opendc.trace.opendc
import org.apache.avro.Schema
import org.apache.avro.SchemaBuilder
import org.apache.avro.generic.GenericRecord
+import org.apache.parquet.avro.AvroParquetWriter
+import org.apache.parquet.hadoop.ParquetFileWriter
+import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.opendc.trace.*
import org.opendc.trace.spi.TableDetails
import org.opendc.trace.spi.TraceFormat
+import org.opendc.trace.util.parquet.LocalOutputFile
import org.opendc.trace.util.parquet.LocalParquetReader
import org.opendc.trace.util.parquet.TIMESTAMP_SCHEMA
+import java.nio.file.Files
import java.nio.file.Path
/**
@@ -41,6 +46,18 @@ public class OdcVmTraceFormat : TraceFormat {
*/
override val name: String = "opendc-vm"
+ override fun create(path: Path) {
+ // Construct directory containing the trace files
+ Files.createDirectory(path)
+
+ val tables = getTables(path)
+
+ for (table in tables) {
+ val writer = newWriter(path, table)
+ writer.close()
+ }
+ }
+
override fun getTables(path: Path): List<String> = listOf(TABLE_RESOURCES, TABLE_RESOURCE_STATES)
override fun getDetails(path: Path, table: String): TableDetails {
@@ -82,6 +99,32 @@ public class OdcVmTraceFormat : TraceFormat {
}
}
+ override fun newWriter(path: Path, table: String): TableWriter {
+ return when (table) {
+ TABLE_RESOURCES -> {
+ val schema = RESOURCES_SCHEMA
+ val writer = AvroParquetWriter.builder<GenericRecord>(LocalOutputFile(path.resolve("meta.parquet")))
+ .withSchema(schema)
+ .withCompressionCodec(CompressionCodecName.ZSTD)
+ .withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
+ .build()
+ OdcVmResourceTableWriter(writer, schema)
+ }
+ TABLE_RESOURCE_STATES -> {
+ val schema = RESOURCE_STATES_SCHEMA
+ val writer = AvroParquetWriter.builder<GenericRecord>(LocalOutputFile(path.resolve("trace.parquet")))
+ .withSchema(schema)
+ .withCompressionCodec(CompressionCodecName.ZSTD)
+ .withDictionaryEncoding("id", true)
+ .withBloomFilterEnabled("id", true)
+ .withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
+ .build()
+ OdcVmResourceStateTableWriter(writer, schema)
+ }
+ else -> throw IllegalArgumentException("Table $table not supported")
+ }
+ }
+
public companion object {
/**
* Schema for the resources table in the trace.