diff options
| author | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-12-13 23:59:47 +0000 |
|---|---|---|
| committer | Fabian Mastenbroek <mail.fabianm@gmail.com> | 2022-12-14 00:24:24 +0000 |
| commit | 3542350909b1213240e5097a1793a7c0733f6196 (patch) | |
| tree | e602b041f6c054e40e01548ae918cc7faf21e2a7 /opendc-trace/opendc-trace-wtf/src/main | |
| parent | de739998fde6b856e40f8a98f78efddc0c57f167 (diff) | |
fix(trace/wtf): Disable Parquet strict typing
This change fixes an issue where some of the traces from the Workflow
Trace Archive would fail to load with the trace format in OpenDC. This
was caused by one of the fields being stored as a double, while the
formats expects it to be a long.
Parquet does not support unioning primitive types. Therefore, we have to
disable strict type checking when reading the file. Furthermore, we need
to support double entries for storing the workflow ids.
Diffstat (limited to 'opendc-trace/opendc-trace-wtf/src/main')
2 files changed, 6 insertions, 1 deletions
diff --git a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt index bf834778..c25b512c 100644 --- a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt +++ b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/WtfTraceFormat.kt @@ -78,7 +78,7 @@ public class WtfTraceFormat : TraceFormat { override fun newReader(path: Path, table: String, projection: List<String>?): TableReader { return when (table) { TABLE_TASKS -> { - val reader = LocalParquetReader(path.resolve("tasks/schema-1.0"), TaskReadSupport(projection)) + val reader = LocalParquetReader(path.resolve("tasks/schema-1.0"), TaskReadSupport(projection), strictTyping = false) WtfTaskTableReader(reader) } else -> throw IllegalArgumentException("Table $table not supported") diff --git a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/parquet/TaskRecordMaterializer.kt b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/parquet/TaskRecordMaterializer.kt index f188a3ff..055be0c3 100644 --- a/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/parquet/TaskRecordMaterializer.kt +++ b/opendc-trace/opendc-trace-wtf/src/main/kotlin/org/opendc/trace/wtf/parquet/TaskRecordMaterializer.kt @@ -30,6 +30,7 @@ import org.apache.parquet.schema.MessageType import java.time.Duration import java.time.Instant import kotlin.math.roundToInt +import kotlin.math.roundToLong /** * A [RecordMaterializer] for [Task] records. @@ -145,6 +146,10 @@ internal class TaskRecordMaterializer(schema: MessageType) : RecordMaterializer< override fun addLong(value: Long) { relations.add(value.toString()) } + + override fun addDouble(value: Double) { + relations.add(value.roundToLong().toString()) + } } private val listConverter = object : GroupConverter() { |
