diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2021-09-21 11:34:34 +0200 |
|---|---|---|
| committer | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2021-09-21 11:34:34 +0200 |
| commit | 68ef3700ed2f69bcf0118bb69eda71e6b1f4d54f (patch) | |
| tree | 73201888564accde4cfa107f4ffdb15e9f93d45c /opendc-trace/opendc-trace-opendc | |
| parent | c7fff03408ee3109d0a39a96c043584a2d8f67ca (diff) | |
feat(trace): Add support for writing traces
This change adds a new API for writing traces in a trace format.
Currently, writing is only supported by the OpenDC VM format, but over
time the other formats will also have support for writing added.
Diffstat (limited to 'opendc-trace/opendc-trace-opendc')
3 files changed, 272 insertions, 0 deletions
diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt new file mode 100644 index 00000000..15a8cb85 --- /dev/null +++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceStateTableWriter.kt @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.trace.opendc + +import org.apache.avro.Schema +import org.apache.avro.generic.GenericRecord +import org.apache.avro.generic.GenericRecordBuilder +import org.apache.parquet.hadoop.ParquetWriter +import org.opendc.trace.* +import java.time.Duration +import java.time.Instant + +/** + * A [TableWriter] implementation for the OpenDC virtual machine trace format. + */ +internal class OdcVmResourceStateTableWriter( + private val writer: ParquetWriter<GenericRecord>, + private val schema: Schema +) : TableWriter { + /** + * The current builder for the record that is being written. + */ + private var builder: GenericRecordBuilder? = null + + /** + * The fields belonging to the resource state schema. + */ + private val fields = schema.fields + + override fun startRow() { + builder = GenericRecordBuilder(schema) + } + + override fun endRow() { + val builder = checkNotNull(builder) { "No active row" } + this.builder = null + + val record = builder.build() + val id = record[COL_ID] as String + val timestamp = record[COL_TIMESTAMP] as Long + + check(lastId != id || timestamp >= lastTimestamp) { "Records need to be ordered by (id, timestamp)" } + + writer.write(builder.build()) + + lastId = id + lastTimestamp = timestamp + } + + override fun resolve(column: TableColumn<*>): Int { + val schema = schema + return when (column) { + RESOURCE_ID -> schema.getField("id").pos() + RESOURCE_STATE_TIMESTAMP -> (schema.getField("timestamp") ?: schema.getField("time")).pos() + RESOURCE_STATE_DURATION -> schema.getField("duration").pos() + RESOURCE_CPU_COUNT -> (schema.getField("cpu_count") ?: schema.getField("cores")).pos() + RESOURCE_STATE_CPU_USAGE -> (schema.getField("cpu_usage") ?: schema.getField("cpuUsage")).pos() + else -> -1 + } + } + + override fun set(index: Int, value: Any) { + val builder = checkNotNull(builder) { "No active row" } + + builder.set( + fields[index], + when (index) { + COL_TIMESTAMP -> (value as Instant).toEpochMilli() + COL_DURATION -> (value as Duration).toMillis() + else -> value + } + ) + } + + override fun setBoolean(index: Int, value: Boolean) = set(index, value) + + override fun setInt(index: Int, value: Int) = set(index, value) + + override fun setLong(index: Int, value: Long) = set(index, value) + + override fun setDouble(index: Int, value: Double) = set(index, value) + + override fun flush() { + // Not available + } + + override fun close() { + writer.close() + } + + /** + * Last column values that are used to check for correct partitioning. + */ + private var lastId: String? = null + private var lastTimestamp: Long = Long.MIN_VALUE + + /** + * Columns with special behavior. + */ + private val COL_ID = resolve(RESOURCE_ID) + private val COL_TIMESTAMP = resolve(RESOURCE_STATE_TIMESTAMP) + private val COL_DURATION = resolve(RESOURCE_STATE_DURATION) +} diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt new file mode 100644 index 00000000..9cc6ca7d --- /dev/null +++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmResourceTableWriter.kt @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.trace.opendc + +import org.apache.avro.Schema +import org.apache.avro.generic.GenericRecord +import org.apache.avro.generic.GenericRecordBuilder +import org.apache.parquet.hadoop.ParquetWriter +import org.opendc.trace.* +import java.time.Instant +import kotlin.math.roundToLong + +/** + * A [TableWriter] implementation for the OpenDC virtual machine trace format. + */ +internal class OdcVmResourceTableWriter( + private val writer: ParquetWriter<GenericRecord>, + private val schema: Schema +) : TableWriter { + /** + * The current builder for the record that is being written. + */ + private var builder: GenericRecordBuilder? = null + + /** + * The fields belonging to the resource schema. + */ + private val fields = schema.fields + + override fun startRow() { + builder = GenericRecordBuilder(schema) + } + + override fun endRow() { + val builder = checkNotNull(builder) { "No active row" } + this.builder = null + writer.write(builder.build()) + } + + override fun resolve(column: TableColumn<*>): Int { + val schema = schema + return when (column) { + RESOURCE_ID -> schema.getField("id").pos() + RESOURCE_START_TIME -> (schema.getField("start_time") ?: schema.getField("submissionTime")).pos() + RESOURCE_STOP_TIME -> (schema.getField("stop_time") ?: schema.getField("endTime")).pos() + RESOURCE_CPU_COUNT -> (schema.getField("cpu_count") ?: schema.getField("maxCores")).pos() + RESOURCE_MEM_CAPACITY -> (schema.getField("mem_capacity") ?: schema.getField("requiredMemory")).pos() + else -> -1 + } + } + + override fun set(index: Int, value: Any) { + val builder = checkNotNull(builder) { "No active row" } + builder.set( + fields[index], + when (index) { + COL_START_TIME, COL_STOP_TIME -> (value as Instant).toEpochMilli() + COL_MEM_CAPACITY -> (value as Double).roundToLong() + else -> value + } + ) + } + + override fun setBoolean(index: Int, value: Boolean) = set(index, value) + + override fun setInt(index: Int, value: Int) = set(index, value) + + override fun setLong(index: Int, value: Long) = set(index, value) + + override fun setDouble(index: Int, value: Double) = set(index, value) + + override fun flush() { + // Not available + } + + override fun close() { + writer.close() + } + + /** + * Columns with special behavior. + */ + private val COL_START_TIME = resolve(RESOURCE_START_TIME) + private val COL_STOP_TIME = resolve(RESOURCE_STOP_TIME) + private val COL_MEM_CAPACITY = resolve(RESOURCE_MEM_CAPACITY) +} diff --git a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt index 29818147..9b32f8fd 100644 --- a/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt +++ b/opendc-trace/opendc-trace-opendc/src/main/kotlin/org/opendc/trace/opendc/OdcVmTraceFormat.kt @@ -25,11 +25,16 @@ package org.opendc.trace.opendc import org.apache.avro.Schema import org.apache.avro.SchemaBuilder import org.apache.avro.generic.GenericRecord +import org.apache.parquet.avro.AvroParquetWriter +import org.apache.parquet.hadoop.ParquetFileWriter +import org.apache.parquet.hadoop.metadata.CompressionCodecName import org.opendc.trace.* import org.opendc.trace.spi.TableDetails import org.opendc.trace.spi.TraceFormat +import org.opendc.trace.util.parquet.LocalOutputFile import org.opendc.trace.util.parquet.LocalParquetReader import org.opendc.trace.util.parquet.TIMESTAMP_SCHEMA +import java.nio.file.Files import java.nio.file.Path /** @@ -41,6 +46,18 @@ public class OdcVmTraceFormat : TraceFormat { */ override val name: String = "opendc-vm" + override fun create(path: Path) { + // Construct directory containing the trace files + Files.createDirectory(path) + + val tables = getTables(path) + + for (table in tables) { + val writer = newWriter(path, table) + writer.close() + } + } + override fun getTables(path: Path): List<String> = listOf(TABLE_RESOURCES, TABLE_RESOURCE_STATES) override fun getDetails(path: Path, table: String): TableDetails { @@ -82,6 +99,32 @@ public class OdcVmTraceFormat : TraceFormat { } } + override fun newWriter(path: Path, table: String): TableWriter { + return when (table) { + TABLE_RESOURCES -> { + val schema = RESOURCES_SCHEMA + val writer = AvroParquetWriter.builder<GenericRecord>(LocalOutputFile(path.resolve("meta.parquet"))) + .withSchema(schema) + .withCompressionCodec(CompressionCodecName.ZSTD) + .withWriteMode(ParquetFileWriter.Mode.OVERWRITE) + .build() + OdcVmResourceTableWriter(writer, schema) + } + TABLE_RESOURCE_STATES -> { + val schema = RESOURCE_STATES_SCHEMA + val writer = AvroParquetWriter.builder<GenericRecord>(LocalOutputFile(path.resolve("trace.parquet"))) + .withSchema(schema) + .withCompressionCodec(CompressionCodecName.ZSTD) + .withDictionaryEncoding("id", true) + .withBloomFilterEnabled("id", true) + .withWriteMode(ParquetFileWriter.Mode.OVERWRITE) + .build() + OdcVmResourceStateTableWriter(writer, schema) + } + else -> throw IllegalArgumentException("Table $table not supported") + } + } + public companion object { /** * Schema for the resources table in the trace. |
