From 412d8d597511122f114d69a4ba64c6b55dd192f9 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Fri, 29 Apr 2022 12:09:51 +0200 Subject: feat(trace/calcite): Add Calcite (SQL) integration This change adds support for querying workload trace formats implemented using the OpenDC API through Apache Calcite. This allows users to write SQL queries to explore the workload traces. --- .../main/kotlin/org/opendc/trace/TableColumn.kt | 2 +- .../opendc/trace/conv/InterferenceGroupColumns.kt | 7 +++--- .../org/opendc/trace/conv/ResourceColumns.kt | 15 ++++++------- .../org/opendc/trace/conv/ResourceStateColumns.kt | 25 +++++++++++----------- .../kotlin/org/opendc/trace/conv/TableColumns.kt | 4 +++- .../kotlin/org/opendc/trace/conv/TaskColumns.kt | 25 +++++++++++----------- 6 files changed, 38 insertions(+), 40 deletions(-) (limited to 'opendc-trace/opendc-trace-api/src') diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/TableColumn.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/TableColumn.kt index 776c40c0..b77a2982 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/TableColumn.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/TableColumn.kt @@ -33,7 +33,7 @@ public class TableColumn(public val name: String, type: Class) { /** * The type of the column. */ - private val type: Class<*> = type + public val type: Class<*> = type /** * Determine whether the type of the column is a subtype of [column]. diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/InterferenceGroupColumns.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/InterferenceGroupColumns.kt index 532f6d24..5e8859e4 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/InterferenceGroupColumns.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/InterferenceGroupColumns.kt @@ -24,22 +24,21 @@ package org.opendc.trace.conv import org.opendc.trace.TableColumn -import org.opendc.trace.column /** * Members of the interference group. */ @JvmField -public val INTERFERENCE_GROUP_MEMBERS: TableColumn> = column("interference_group:members") +public val INTERFERENCE_GROUP_MEMBERS: TableColumn> = column("members") /** * Target load after which the interference occurs. */ @JvmField -public val INTERFERENCE_GROUP_TARGET: TableColumn = column("interference_group:target") +public val INTERFERENCE_GROUP_TARGET: TableColumn = column("target") /** * Performance score when the interference occurs. */ @JvmField -public val INTERFERENCE_GROUP_SCORE: TableColumn = column("interference_group:score") +public val INTERFERENCE_GROUP_SCORE: TableColumn = column("score") diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceColumns.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceColumns.kt index e9fc5d44..e602e534 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceColumns.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceColumns.kt @@ -24,47 +24,46 @@ package org.opendc.trace.conv import org.opendc.trace.TableColumn -import org.opendc.trace.column import java.time.Instant /** * Identifier of the resource. */ @JvmField -public val RESOURCE_ID: TableColumn = column("resource:id") +public val RESOURCE_ID: TableColumn = column("id") /** * The cluster to which the resource belongs. */ @JvmField -public val RESOURCE_CLUSTER_ID: TableColumn = column("resource:cluster_id") +public val RESOURCE_CLUSTER_ID: TableColumn = column("cluster_id") /** * Start time for the resource. */ @JvmField -public val RESOURCE_START_TIME: TableColumn = column("resource:start_time") +public val RESOURCE_START_TIME: TableColumn = column("start_time") /** * End time for the resource. */ @JvmField -public val RESOURCE_STOP_TIME: TableColumn = column("resource:stop_time") +public val RESOURCE_STOP_TIME: TableColumn = column("stop_time") /** * Number of CPUs for the resource. */ @JvmField -public val RESOURCE_CPU_COUNT: TableColumn = column("resource:cpu_count") +public val RESOURCE_CPU_COUNT: TableColumn = column("cpu_count") /** * Total CPU capacity of the resource in MHz. */ @JvmField -public val RESOURCE_CPU_CAPACITY: TableColumn = column("resource:cpu_capacity") +public val RESOURCE_CPU_CAPACITY: TableColumn = column("cpu_capacity") /** * Memory capacity for the resource in KB. */ @JvmField -public val RESOURCE_MEM_CAPACITY: TableColumn = column("resource:mem_capacity") +public val RESOURCE_MEM_CAPACITY: TableColumn = column("mem_capacity") diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceStateColumns.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceStateColumns.kt index d5bbafd7..3a44f817 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceStateColumns.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/ResourceStateColumns.kt @@ -24,7 +24,6 @@ package org.opendc.trace.conv import org.opendc.trace.TableColumn -import org.opendc.trace.column import java.time.Duration import java.time.Instant @@ -32,70 +31,70 @@ import java.time.Instant * The timestamp at which the state was recorded. */ @JvmField -public val RESOURCE_STATE_TIMESTAMP: TableColumn = column("resource_state:timestamp") +public val RESOURCE_STATE_TIMESTAMP: TableColumn = column("timestamp") /** * Duration for the state. */ @JvmField -public val RESOURCE_STATE_DURATION: TableColumn = column("resource_state:duration") +public val RESOURCE_STATE_DURATION: TableColumn = column("duration") /** * A flag to indicate that the resource is powered on. */ @JvmField -public val RESOURCE_STATE_POWERED_ON: TableColumn = column("resource_state:powered_on") +public val RESOURCE_STATE_POWERED_ON: TableColumn = column("powered_on") /** * Total CPU usage of the resource in MHz. */ @JvmField -public val RESOURCE_STATE_CPU_USAGE: TableColumn = column("resource_state:cpu_usage") +public val RESOURCE_STATE_CPU_USAGE: TableColumn = column("cpu_usage") /** * Total CPU usage of the resource in percentage. */ @JvmField -public val RESOURCE_STATE_CPU_USAGE_PCT: TableColumn = column("resource_state:cpu_usage_pct") +public val RESOURCE_STATE_CPU_USAGE_PCT: TableColumn = column("cpu_usage_pct") /** * Total CPU demand of the resource in MHz. */ @JvmField -public val RESOURCE_STATE_CPU_DEMAND: TableColumn = column("resource_state:cpu_demand") +public val RESOURCE_STATE_CPU_DEMAND: TableColumn = column("cpu_demand") /** * CPU ready percentage. */ @JvmField -public val RESOURCE_STATE_CPU_READY_PCT: TableColumn = column("resource_state:cpu_ready_pct") +public val RESOURCE_STATE_CPU_READY_PCT: TableColumn = column("cpu_ready_pct") /** * Memory usage of the resource in KB. */ @JvmField -public val RESOURCE_STATE_MEM_USAGE: TableColumn = column("resource_state:mem_usage") +public val RESOURCE_STATE_MEM_USAGE: TableColumn = column("mem_usage") /** * Disk read throughput of the resource in KB/s. */ @JvmField -public val RESOURCE_STATE_DISK_READ: TableColumn = column("resource_state:disk_read") +public val RESOURCE_STATE_DISK_READ: TableColumn = column("disk_read") /** * Disk write throughput of the resource in KB/s. */ @JvmField -public val RESOURCE_STATE_DISK_WRITE: TableColumn = column("resource_state:disk_write") +public val RESOURCE_STATE_DISK_WRITE: TableColumn = column("disk_write") /** * Network receive throughput of the resource in KB/s. */ @JvmField -public val RESOURCE_STATE_NET_RX: TableColumn = column("resource_state:net_rx") +public val RESOURCE_STATE_NET_RX: TableColumn = column("net_rx") /** * Network transmit throughput of the resource in KB/s. */ @JvmField -public val RESOURCE_STATE_NET_TX: TableColumn = column("resource_state:net_tx") +public val RESOURCE_STATE_NET_TX: TableColumn = column("net_tx") diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/TableColumns.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/TableColumns.kt index 31a58360..a58505e9 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/TableColumns.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/TableColumns.kt @@ -21,7 +21,9 @@ */ @file:JvmName("TableColumns") -package org.opendc.trace +package org.opendc.trace.conv + +import org.opendc.trace.TableColumn /** * Construct a [TableColumn] with the specified [name] and type [T]. diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/TaskColumns.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/TaskColumns.kt index 397c0794..e6daafb7 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/TaskColumns.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/conv/TaskColumns.kt @@ -24,7 +24,6 @@ package org.opendc.trace.conv import org.opendc.trace.TableColumn -import org.opendc.trace.column import java.time.Duration import java.time.Instant @@ -32,70 +31,70 @@ import java.time.Instant * A column containing the task identifier. */ @JvmField -public val TASK_ID: TableColumn = column("task:id") +public val TASK_ID: TableColumn = column("id") /** * A column containing the identifier of the workflow. */ @JvmField -public val TASK_WORKFLOW_ID: TableColumn = column("task:workflow_id") +public val TASK_WORKFLOW_ID: TableColumn = column("workflow_id") /** * A column containing the submission time of the task. */ @JvmField -public val TASK_SUBMIT_TIME: TableColumn = column("task:submit_time") +public val TASK_SUBMIT_TIME: TableColumn = column("submit_time") /** * A column containing the wait time of the task. */ @JvmField -public val TASK_WAIT_TIME: TableColumn = column("task:wait_time") +public val TASK_WAIT_TIME: TableColumn = column("wait_time") /** * A column containing the runtime time of the task. */ @JvmField -public val TASK_RUNTIME: TableColumn = column("task:runtime") +public val TASK_RUNTIME: TableColumn = column("runtime") /** * A column containing the parents of a task. */ @JvmField -public val TASK_PARENTS: TableColumn> = column("task:parents") +public val TASK_PARENTS: TableColumn> = column("parents") /** * A column containing the children of a task. */ @JvmField -public val TASK_CHILDREN: TableColumn> = column("task:children") +public val TASK_CHILDREN: TableColumn> = column("children") /** * A column containing the requested CPUs of a task. */ @JvmField -public val TASK_REQ_NCPUS: TableColumn = column("task:req_ncpus") +public val TASK_REQ_NCPUS: TableColumn = column("req_ncpus") /** * A column containing the allocated CPUs of a task. */ @JvmField -public val TASK_ALLOC_NCPUS: TableColumn = column("task:alloc_ncpus") +public val TASK_ALLOC_NCPUS: TableColumn = column("alloc_ncpus") /** * A column containing the status of a task. */ @JvmField -public val TASK_STATUS: TableColumn = column("task:status") +public val TASK_STATUS: TableColumn = column("status") /** * A column containing the group id of a task. */ @JvmField -public val TASK_GROUP_ID: TableColumn = column("task:group_id") +public val TASK_GROUP_ID: TableColumn = column("group_id") /** * A column containing the user id of a task. */ @JvmField -public val TASK_USER_ID: TableColumn = column("task:user_id") +public val TASK_USER_ID: TableColumn = column("user_id") -- cgit v1.2.3 From 670cd279ea7789e07b6d778a21fdec68347ab305 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Mon, 2 May 2022 14:17:55 +0200 Subject: feat(trace/api): Add support for projecting tables This change adds support for projecting certain columns of a table. This enables faster reading for tables with high number of columns. Currently, we support projection in the Parquet-based workload formats. Other formats are text-based and will probably not benefit much from projection. --- .../opendc-trace-api/src/main/kotlin/org/opendc/trace/Table.kt | 6 ++++-- .../src/main/kotlin/org/opendc/trace/internal/TableImpl.kt | 4 +++- .../src/main/kotlin/org/opendc/trace/spi/TraceFormat.kt | 4 +++- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'opendc-trace/opendc-trace-api/src') diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/Table.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/Table.kt index b0181cbc..05d0234a 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/Table.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/Table.kt @@ -42,9 +42,11 @@ public interface Table { public val partitionKeys: List> /** - * Open a [TableReader] for this table. + * Open a [TableReader] for a projection of this table. + * + * @param projection The list of columns to fetch from the table or `null` if no projection is performed. */ - public fun newReader(): TableReader + public fun newReader(projection: List>? = null): TableReader /** * Open a [TableWriter] for this table. diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/internal/TableImpl.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/internal/TableImpl.kt index 24551edb..b848e19a 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/internal/TableImpl.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/internal/TableImpl.kt @@ -43,7 +43,9 @@ internal class TableImpl(val trace: TraceImpl, override val name: String) : Tabl override val partitionKeys: List> get() = details.partitionKeys - override fun newReader(): TableReader = trace.format.newReader(trace.path, name) + override fun newReader(projection: List>?): TableReader { + return trace.format.newReader(trace.path, name, projection) + } override fun newWriter(): TableWriter = trace.format.newWriter(trace.path, name) diff --git a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/spi/TraceFormat.kt b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/spi/TraceFormat.kt index f2e610db..47761e0f 100644 --- a/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/spi/TraceFormat.kt +++ b/opendc-trace/opendc-trace-api/src/main/kotlin/org/opendc/trace/spi/TraceFormat.kt @@ -22,6 +22,7 @@ package org.opendc.trace.spi +import org.opendc.trace.TableColumn import org.opendc.trace.TableReader import org.opendc.trace.TableWriter import java.nio.file.Path @@ -68,10 +69,11 @@ public interface TraceFormat { * * @param path The path to the trace to open. * @param table The name of the table to open a [TableReader] for. + * @param projection The list of [TableColumn]s to project or `null` if no projection is performed. * @throws IllegalArgumentException If [table] does not exist. * @return A [TableReader] instance for the table. */ - public fun newReader(path: Path, table: String): TableReader + public fun newReader(path: Path, table: String, projection: List>?): TableReader /** * Open a [TableWriter] for the specified [table]. -- cgit v1.2.3