summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/build.yml3
-rw-r--r--gradle/libs.versions.toml4
-rw-r--r--opendc-experiments/opendc-experiments-capelin/build.gradle.kts5
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/telemetry/parquet/ParquetEventWriter.kt5
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt1
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt14
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt17
-rw-r--r--opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt8
-rw-r--r--opendc-experiments/opendc-experiments-serverless20/build.gradle.kts6
-rw-r--r--opendc-experiments/opendc-experiments-tf20/build.gradle.kts5
-rw-r--r--opendc-format/build.gradle.kts24
-rw-r--r--opendc-format/src/main/kotlin/org/opendc/format/trace/wtf/WtfTraceReader.kt76
-rw-r--r--opendc-format/src/main/kotlin/org/opendc/format/util/LocalInputFile.kt107
-rw-r--r--opendc-format/src/main/kotlin/org/opendc/format/util/LocalOutputFile.kt95
-rw-r--r--opendc-format/src/main/kotlin/org/opendc/format/util/LocalParquetReader.kt112
-rw-r--r--opendc-format/src/test/kotlin/org/opendc/format/trace/wtf/WtfTraceReaderTest.kt3
-rw-r--r--opendc-format/src/test/kotlin/org/opendc/format/util/ParquetTest.kt125
17 files changed, 527 insertions, 83 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b0e8cc38..28e5846b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,6 +12,9 @@ jobs:
matrix:
os: [ubuntu-latest]
java: [8, 16]
+ include:
+ - os: windows-latest
+ java: 16
steps:
- name: Checkout repository
uses: actions/checkout@v2
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 1d7fdd3e..22f713f6 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -5,6 +5,7 @@ slf4j = "1.7.30"
log4j = "2.14.1"
opentelemetry-main = "1.2.0"
opentelemetry-metrics = "1.2.0-alpha"
+hadoop = "3.3.0"
[libraries]
kotlinx-coroutines = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version = "1.5.0" }
@@ -45,4 +46,5 @@ kotlinx-benchmark-runtime-jvm = { module = "org.jetbrains.kotlinx:kotlinx-benchm
# Other
mongodb = { module = "org.mongodb:mongodb-driver-sync", version = "4.2.3" }
classgraph = { module = "io.github.classgraph:classgraph", version = "4.8.105" }
-hadoop-client = { module = "org.apache.hadoop:hadoop-client", version = "3.3.0" }
+hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" }
+hadoop-mapreduce-client-core = { module = "org.apache.hadoop:hadoop-mapreduce-client-core", version.ref = "hadoop" }
diff --git a/opendc-experiments/opendc-experiments-capelin/build.gradle.kts b/opendc-experiments/opendc-experiments-capelin/build.gradle.kts
index 0dade513..324cae3e 100644
--- a/opendc-experiments/opendc-experiments-capelin/build.gradle.kts
+++ b/opendc-experiments/opendc-experiments-capelin/build.gradle.kts
@@ -44,10 +44,5 @@ dependencies {
implementation(libs.clikt)
implementation(libs.parquet)
- implementation(libs.hadoop.client) {
- exclude(group = "org.slf4j", module = "slf4j-log4j12")
- exclude(group = "log4j")
- }
-
testImplementation(libs.log4j.slf4j)
}
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/telemetry/parquet/ParquetEventWriter.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/telemetry/parquet/ParquetEventWriter.kt
index 4fa6ae66..d8f7ff75 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/telemetry/parquet/ParquetEventWriter.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/telemetry/parquet/ParquetEventWriter.kt
@@ -25,10 +25,10 @@ package org.opendc.experiments.capelin.telemetry.parquet
import mu.KotlinLogging
import org.apache.avro.Schema
import org.apache.avro.generic.GenericData
-import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.opendc.experiments.capelin.telemetry.Event
+import org.opendc.format.util.LocalOutputFile
import java.io.Closeable
import java.io.File
import java.util.concurrent.ArrayBlockingQueue
@@ -52,8 +52,7 @@ public open class ParquetEventWriter<in T : Event>(
/**
* The writer to write the Parquet file.
*/
- @Suppress("DEPRECATION")
- private val writer = AvroParquetWriter.builder<GenericData.Record>(Path(path.absolutePath))
+ private val writer = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(path))
.withSchema(schema)
.withCompressionCodec(CompressionCodecName.SNAPPY)
.withPageSize(4 * 1024 * 1024) // For compression
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt
index a8462a51..7f25137e 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20ParquetTraceReader.kt
@@ -38,7 +38,6 @@ import java.util.TreeSet
* @param performanceInterferenceModel The performance model covering the workload in the VM trace.
* @param run The run to which this reader belongs.
*/
-@OptIn(ExperimentalStdlibApi::class)
public class Sc20ParquetTraceReader(
rawReaders: List<Sc20RawParquetTraceReader>,
performanceInterferenceModel: Map<String, PerformanceInterferenceModel>,
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt
index bd27cf02..54151c9f 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20RawParquetTraceReader.kt
@@ -24,10 +24,9 @@ package org.opendc.experiments.capelin.trace
import mu.KotlinLogging
import org.apache.avro.generic.GenericData
-import org.apache.hadoop.fs.Path
-import org.apache.parquet.avro.AvroParquetReader
import org.opendc.format.trace.TraceEntry
import org.opendc.format.trace.TraceReader
+import org.opendc.format.util.LocalParquetReader
import org.opendc.simulator.compute.workload.SimTraceWorkload
import org.opendc.simulator.compute.workload.SimWorkload
import java.io.File
@@ -40,16 +39,12 @@ private val logger = KotlinLogging.logger {}
*
* @param path The directory of the traces.
*/
-@OptIn(ExperimentalStdlibApi::class)
public class Sc20RawParquetTraceReader(private val path: File) {
/**
* Read the fragments into memory.
*/
private fun parseFragments(path: File): Map<String, List<SimTraceWorkload.Fragment>> {
- @Suppress("DEPRECATION")
- val reader = AvroParquetReader.builder<GenericData.Record>(Path(path.absolutePath, "trace.parquet"))
- .disableCompatibility()
- .build()
+ val reader = LocalParquetReader<GenericData.Record>(File(path, "trace.parquet"))
val fragments = mutableMapOf<String, MutableList<SimTraceWorkload.Fragment>>()
@@ -81,10 +76,7 @@ public class Sc20RawParquetTraceReader(private val path: File) {
* Read the metadata into a workload.
*/
private fun parseMeta(path: File, fragments: Map<String, List<SimTraceWorkload.Fragment>>): List<TraceEntry<SimWorkload>> {
- @Suppress("DEPRECATION")
- val metaReader = AvroParquetReader.builder<GenericData.Record>(Path(path.absolutePath, "meta.parquet"))
- .disableCompatibility()
- .build()
+ val metaReader = LocalParquetReader<GenericData.Record>(File(path, "meta.parquet"))
var counter = 0
val entries = mutableListOf<TraceEntry<SimWorkload>>()
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt
index c5294b55..6792c2ab 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20StreamingParquetTraceReader.kt
@@ -24,7 +24,6 @@ package org.opendc.experiments.capelin.trace
import mu.KotlinLogging
import org.apache.avro.generic.GenericData
-import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetReader
import org.apache.parquet.filter2.compat.FilterCompat
import org.apache.parquet.filter2.predicate.FilterApi
@@ -33,6 +32,7 @@ import org.apache.parquet.filter2.predicate.UserDefinedPredicate
import org.apache.parquet.io.api.Binary
import org.opendc.format.trace.TraceEntry
import org.opendc.format.trace.TraceReader
+import org.opendc.format.util.LocalInputFile
import org.opendc.simulator.compute.interference.IMAGE_PERF_INTERFERENCE_MODEL
import org.opendc.simulator.compute.interference.PerformanceInterferenceModel
import org.opendc.simulator.compute.workload.SimTraceWorkload
@@ -54,7 +54,6 @@ private val logger = KotlinLogging.logger {}
* @param traceFile The directory of the traces.
* @param performanceInterferenceModel The performance model covering the workload in the VM trace.
*/
-@OptIn(ExperimentalStdlibApi::class)
public class Sc20StreamingParquetTraceReader(
traceFile: File,
performanceInterferenceModel: PerformanceInterferenceModel? = null,
@@ -96,10 +95,10 @@ public class Sc20StreamingParquetTraceReader(
* The thread to read the records in.
*/
private val readerThread = thread(start = true, name = "sc20-reader") {
- @Suppress("DEPRECATION")
- val reader = AvroParquetReader.builder<GenericData.Record>(Path(traceFile.absolutePath, "trace.parquet"))
+ val reader = AvroParquetReader
+ .builder<GenericData.Record>(LocalInputFile(File(traceFile, "trace.parquet")))
.disableCompatibility()
- .run { if (filter != null) withFilter(filter) else this }
+ .withFilter(filter)
.build()
try {
@@ -164,10 +163,10 @@ public class Sc20StreamingParquetTraceReader(
val entries = mutableMapOf<String, GenericData.Record>()
val buffers = mutableMapOf<String, MutableList<MutableList<SimTraceWorkload.Fragment>>>()
- @Suppress("DEPRECATION")
- val metaReader = AvroParquetReader.builder<GenericData.Record>(Path(traceFile.absolutePath, "meta.parquet"))
+ val metaReader = AvroParquetReader
+ .builder<GenericData.Record>(LocalInputFile(File(traceFile, "meta.parquet")))
.disableCompatibility()
- .run { if (filter != null) withFilter(filter) else this }
+ .withFilter(filter)
.build()
while (true) {
@@ -178,7 +177,7 @@ public class Sc20StreamingParquetTraceReader(
metaReader.close()
- val selection = if (selectedVms.isEmpty()) entries.keys else selectedVms
+ val selection = selectedVms.ifEmpty { entries.keys }
// Create the entry iterator
iterator = selection.asSequence()
diff --git a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt
index 1f9e289c..d0031a66 100644
--- a/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt
+++ b/opendc-experiments/opendc-experiments-capelin/src/main/kotlin/org/opendc/experiments/capelin/trace/Sc20TraceConverter.kt
@@ -38,11 +38,11 @@ import me.tongfei.progressbar.ProgressBar
import org.apache.avro.Schema
import org.apache.avro.SchemaBuilder
import org.apache.avro.generic.GenericData
-import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.apache.parquet.hadoop.ParquetWriter
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.opendc.format.trace.sc20.Sc20VmPlacementReader
+import org.opendc.format.util.LocalOutputFile
import java.io.BufferedReader
import java.io.File
import java.io.FileReader
@@ -109,16 +109,14 @@ public class TraceConverterCli : CliktCommand(name = "trace-converter") {
traceParquet.delete()
}
- @Suppress("DEPRECATION")
- val metaWriter = AvroParquetWriter.builder<GenericData.Record>(Path(metaParquet.toURI()))
+ val metaWriter = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(metaParquet))
.withSchema(metaSchema)
.withCompressionCodec(CompressionCodecName.SNAPPY)
.withPageSize(4 * 1024 * 1024) // For compression
.withRowGroupSize(16 * 1024 * 1024) // For write buffering (Page size)
.build()
- @Suppress("DEPRECATION")
- val writer = AvroParquetWriter.builder<GenericData.Record>(Path(traceParquet.toURI()))
+ val writer = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(traceParquet))
.withSchema(schema)
.withCompressionCodec(CompressionCodecName.SNAPPY)
.withPageSize(4 * 1024 * 1024) // For compression
diff --git a/opendc-experiments/opendc-experiments-serverless20/build.gradle.kts b/opendc-experiments/opendc-experiments-serverless20/build.gradle.kts
index 88479765..7d68cb3a 100644
--- a/opendc-experiments/opendc-experiments-serverless20/build.gradle.kts
+++ b/opendc-experiments/opendc-experiments-serverless20/build.gradle.kts
@@ -37,10 +37,4 @@ dependencies {
implementation(projects.opendcTelemetry.opendcTelemetrySdk)
implementation(libs.kotlin.logging)
implementation(libs.config)
-
- implementation(libs.parquet)
- implementation(libs.hadoop.client) {
- exclude(group = "org.slf4j", module = "slf4j-log4j12")
- exclude(group = "log4j")
- }
}
diff --git a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
index 64483bd4..b088045b 100644
--- a/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
+++ b/opendc-experiments/opendc-experiments-tf20/build.gradle.kts
@@ -38,9 +38,4 @@ dependencies {
implementation(projects.opendcUtils)
implementation(libs.kotlin.logging)
- implementation(libs.parquet)
- implementation(libs.hadoop.client) {
- exclude(group = "org.slf4j", module = "slf4j-log4j12")
- exclude(group = "log4j")
- }
}
diff --git a/opendc-format/build.gradle.kts b/opendc-format/build.gradle.kts
index e95cb666..e19e0ec8 100644
--- a/opendc-format/build.gradle.kts
+++ b/opendc-format/build.gradle.kts
@@ -40,9 +40,29 @@ dependencies {
}
implementation(kotlin("reflect"))
- implementation(libs.parquet)
- implementation(libs.hadoop.client) {
+ /* This configuration is necessary for a slim dependency on Apache Parquet */
+ implementation(libs.parquet) {
+ exclude(group = "org.apache.hadoop")
+ }
+ runtimeOnly(libs.hadoop.common) {
exclude(group = "org.slf4j", module = "slf4j-log4j12")
exclude(group = "log4j")
+ exclude(group = "org.apache.hadoop")
+ exclude(group = "org.apache.curator")
+ exclude(group = "org.apache.zookeeper")
+ exclude(group = "org.apache.kerby")
+ exclude(group = "org.apache.httpcomponents")
+ exclude(group = "org.apache.htrace")
+ exclude(group = "commons-cli")
+ exclude(group = "javax.servlet")
+ exclude(group = "org.eclipse.jetty")
+ exclude(group = "com.sun.jersey")
+ exclude(group = "com.jcraft")
+ exclude(group = "dnsjava")
+ }
+ runtimeOnly(libs.hadoop.mapreduce.client.core) {
+ isTransitive = false
}
+
+ testRuntimeOnly(libs.slf4j.simple)
}
diff --git a/opendc-format/src/main/kotlin/org/opendc/format/trace/wtf/WtfTraceReader.kt b/opendc-format/src/main/kotlin/org/opendc/format/trace/wtf/WtfTraceReader.kt
index feadf61f..dde1b340 100644
--- a/opendc-format/src/main/kotlin/org/opendc/format/trace/wtf/WtfTraceReader.kt
+++ b/opendc-format/src/main/kotlin/org/opendc/format/trace/wtf/WtfTraceReader.kt
@@ -23,15 +23,16 @@
package org.opendc.format.trace.wtf
import org.apache.avro.generic.GenericRecord
-import org.apache.hadoop.fs.Path
-import org.apache.parquet.avro.AvroParquetReader
import org.opendc.format.trace.TraceEntry
import org.opendc.format.trace.TraceReader
+import org.opendc.format.util.LocalParquetReader
import org.opendc.simulator.compute.workload.SimFlopsWorkload
import org.opendc.workflow.api.Job
import org.opendc.workflow.api.Task
import org.opendc.workflow.api.WORKFLOW_TASK_CORES
import org.opendc.workflow.api.WORKFLOW_TASK_DEADLINE
+import java.io.File
+import java.nio.file.Path
import java.util.UUID
import kotlin.math.min
@@ -41,13 +42,20 @@ import kotlin.math.min
*
* @param path The path to the trace.
*/
-public class WtfTraceReader(path: String) : TraceReader<Job> {
+public class WtfTraceReader(path: Path) : TraceReader<Job> {
/**
* The internal iterator to use for this reader.
*/
private val iterator: Iterator<TraceEntry<Job>>
/**
+ * Construct a [TraceReader] from the specified [path].
+ *
+ * @param path The path to the trace.
+ */
+ public constructor(path: File) : this(path.toPath())
+
+ /**
* Initialize the reader.
*/
init {
@@ -56,43 +64,43 @@ public class WtfTraceReader(path: String) : TraceReader<Job> {
val tasks = mutableMapOf<Long, Task>()
val taskDependencies = mutableMapOf<Task, List<Long>>()
- @Suppress("DEPRECATION")
- val reader = AvroParquetReader.builder<GenericRecord>(Path(path, "tasks/schema-1.0")).build()
+ LocalParquetReader<GenericRecord>(path.resolve("tasks/schema-1.0")).use { reader ->
+ while (true) {
+ val nextRecord = reader.read() ?: break
- while (true) {
- val nextRecord = reader.read() ?: break
+ val workflowId = nextRecord.get("workflow_id") as Long
+ val taskId = nextRecord.get("id") as Long
+ val submitTime = nextRecord.get("ts_submit") as Long
+ val runtime = nextRecord.get("runtime") as Long
+ val cores = (nextRecord.get("resource_amount_requested") as Double).toInt()
- val workflowId = nextRecord.get("workflow_id") as Long
- val taskId = nextRecord.get("id") as Long
- val submitTime = nextRecord.get("ts_submit") as Long
- val runtime = nextRecord.get("runtime") as Long
- val cores = (nextRecord.get("resource_amount_requested") as Double).toInt()
- @Suppress("UNCHECKED_CAST")
- val dependencies = (nextRecord.get("parents") as ArrayList<GenericRecord>).map {
- it.get("item") as Long
- }
+ @Suppress("UNCHECKED_CAST")
+ val dependencies = (nextRecord.get("parents") as ArrayList<GenericRecord>).map {
+ it.get("item") as Long
+ }
- val flops: Long = 4100 * (runtime / 1000) * cores
+ val flops: Long = 4100 * (runtime / 1000) * cores
- val workflow = workflows.getOrPut(workflowId) {
- Job(UUID(0L, workflowId), "<unnamed>", HashSet())
- }
- val workload = SimFlopsWorkload(flops)
- val task = Task(
- UUID(0L, taskId),
- "<unnamed>",
- HashSet(),
- mapOf(
- "workload" to workload,
- WORKFLOW_TASK_CORES to cores,
- WORKFLOW_TASK_DEADLINE to runtime
+ val workflow = workflows.getOrPut(workflowId) {
+ Job(UUID(0L, workflowId), "<unnamed>", HashSet())
+ }
+ val workload = SimFlopsWorkload(flops)
+ val task = Task(
+ UUID(0L, taskId),
+ "<unnamed>",
+ HashSet(),
+ mapOf(
+ "workload" to workload,
+ WORKFLOW_TASK_CORES to cores,
+ WORKFLOW_TASK_DEADLINE to runtime
+ )
)
- )
- starts.merge(workflowId, submitTime, ::min)
- (workflow.tasks as MutableSet<Task>).add(task)
- tasks[taskId] = task
- taskDependencies[task] = dependencies
+ starts.merge(workflowId, submitTime, ::min)
+ (workflow.tasks as MutableSet<Task>).add(task)
+ tasks[taskId] = task
+ taskDependencies[task] = dependencies
+ }
}
// Fix dependencies and dependents for all tasks
diff --git a/opendc-format/src/main/kotlin/org/opendc/format/util/LocalInputFile.kt b/opendc-format/src/main/kotlin/org/opendc/format/util/LocalInputFile.kt
new file mode 100644
index 00000000..92319ace
--- /dev/null
+++ b/opendc-format/src/main/kotlin/org/opendc/format/util/LocalInputFile.kt
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.format.util
+
+import org.apache.parquet.io.InputFile
+import org.apache.parquet.io.SeekableInputStream
+import java.io.EOFException
+import java.io.File
+import java.nio.ByteBuffer
+import java.nio.channels.FileChannel
+import java.nio.file.Path
+import java.nio.file.StandardOpenOption
+
+/**
+ * An [InputFile] on the local filesystem.
+ */
+public class LocalInputFile(private val path: Path) : InputFile {
+ /**
+ * The [FileChannel] used for accessing the input path.
+ */
+ private val channel = FileChannel.open(path, StandardOpenOption.READ)
+
+ /**
+ * Construct a [LocalInputFile] for the specified [file].
+ */
+ public constructor(file: File) : this(file.toPath())
+
+ override fun getLength(): Long = channel.size()
+
+ override fun newStream(): SeekableInputStream = object : SeekableInputStream() {
+ override fun read(buf: ByteBuffer): Int {
+ return channel.read(buf)
+ }
+
+ override fun read(): Int {
+ val single = ByteBuffer.allocate(1)
+ var read: Int
+
+ // ReadableByteChannel#read might read zero bytes so continue until we read at least one byte
+ do {
+ read = channel.read(single)
+ } while (read == 0)
+
+ return if (read == -1) {
+ read
+ } else {
+ single.get(0).toInt() and 0xff
+ }
+ }
+
+ override fun getPos(): Long {
+ return channel.position()
+ }
+
+ override fun seek(newPos: Long) {
+ channel.position(newPos)
+ }
+
+ override fun readFully(bytes: ByteArray) {
+ readFully(ByteBuffer.wrap(bytes))
+ }
+
+ override fun readFully(bytes: ByteArray, start: Int, len: Int) {
+ readFully(ByteBuffer.wrap(bytes, start, len))
+ }
+
+ override fun readFully(buf: ByteBuffer) {
+ var remainder = buf.remaining()
+ while (remainder > 0) {
+ val read = channel.read(buf)
+ remainder -= read
+
+ if (read == -1 && remainder > 0) {
+ throw EOFException()
+ }
+ }
+ }
+
+ override fun close() {
+ channel.close()
+ }
+
+ override fun toString(): String = "NioSeekableInputStream"
+ }
+
+ override fun toString(): String = "LocalInputFile[path=$path]"
+}
diff --git a/opendc-format/src/main/kotlin/org/opendc/format/util/LocalOutputFile.kt b/opendc-format/src/main/kotlin/org/opendc/format/util/LocalOutputFile.kt
new file mode 100644
index 00000000..657bca5a
--- /dev/null
+++ b/opendc-format/src/main/kotlin/org/opendc/format/util/LocalOutputFile.kt
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.format.util
+
+import org.apache.parquet.io.OutputFile
+import org.apache.parquet.io.PositionOutputStream
+import java.io.File
+import java.io.OutputStream
+import java.nio.file.Files
+import java.nio.file.Path
+import java.nio.file.StandardOpenOption
+
+/**
+ * An [OutputFile] on the local filesystem.
+ */
+public class LocalOutputFile(private val path: Path) : OutputFile {
+ /**
+ * Construct a [LocalOutputFile] from the specified [file]
+ */
+ public constructor(file: File) : this(file.toPath())
+
+ override fun create(blockSizeHint: Long): PositionOutputStream {
+ val output = Files.newOutputStream(path, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)
+ return NioPositionOutputStream(output)
+ }
+
+ override fun createOrOverwrite(blockSizeHint: Long): PositionOutputStream {
+ val output = Files.newOutputStream(path, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)
+ return NioPositionOutputStream(output)
+ }
+
+ override fun supportsBlockSize(): Boolean = false
+
+ override fun defaultBlockSize(): Long =
+ throw UnsupportedOperationException("Local filesystem does not have default block size")
+
+ override fun getPath(): String = path.toString()
+
+ /**
+ * Implementation of [PositionOutputStream] for an [OutputStream].
+ */
+ private class NioPositionOutputStream(private val output: OutputStream) : PositionOutputStream() {
+ /**
+ * The current position in the file.
+ */
+ private var _pos = 0L
+
+ override fun getPos(): Long = _pos
+
+ override fun write(b: Int) {
+ output.write(b)
+ _pos++
+ }
+
+ override fun write(b: ByteArray) {
+ output.write(b)
+ _pos += b.size
+ }
+
+ override fun write(b: ByteArray, off: Int, len: Int) {
+ output.write(b, off, len)
+ _pos += len
+ }
+
+ override fun flush() {
+ output.flush()
+ }
+
+ override fun close() {
+ output.close()
+ }
+
+ override fun toString(): String = "NioPositionOutputStream[output=$output]"
+ }
+}
diff --git a/opendc-format/src/main/kotlin/org/opendc/format/util/LocalParquetReader.kt b/opendc-format/src/main/kotlin/org/opendc/format/util/LocalParquetReader.kt
new file mode 100644
index 00000000..5083f3e1
--- /dev/null
+++ b/opendc-format/src/main/kotlin/org/opendc/format/util/LocalParquetReader.kt
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.format.util
+
+import org.apache.parquet.avro.AvroParquetReader
+import org.apache.parquet.hadoop.ParquetReader
+import org.apache.parquet.io.InputFile
+import java.io.File
+import java.io.IOException
+import java.nio.file.Files
+import java.nio.file.Path
+import kotlin.io.path.isDirectory
+
+/**
+ * A helper class to read Parquet files.
+ *
+ * @param path The path to the Parquet file or directory to read.
+ */
+public class LocalParquetReader<out T>(path: Path) : AutoCloseable {
+ /**
+ * The input files to process.
+ */
+ private val filesIterator = if (path.isDirectory())
+ Files.list(path)
+ .filter { !it.isDirectory() }
+ .sorted()
+ .map { LocalInputFile(it) }
+ .iterator()
+ else
+ listOf(LocalInputFile(path)).iterator()
+
+ /**
+ * The Parquet reader to use.
+ */
+ private var reader: ParquetReader<T>? = null
+
+ /**
+ * Construct a [LocalParquetReader] for the specified [file].
+ */
+ public constructor(file: File) : this(file.toPath())
+
+ /**
+ * Read a single entry in the Parquet file.
+ */
+ public fun read(): T? {
+ return try {
+ val next = reader?.read()
+ if (next != null) {
+ next
+ } else {
+ initReader()
+
+ if (reader == null)
+ null
+ else
+ read()
+ }
+ } catch (e: InterruptedException) {
+ throw IOException(e)
+ }
+ }
+
+ /**
+ * Close the Parquet reader.
+ */
+ override fun close() {
+ reader?.close()
+ }
+
+ /**
+ * Initialize the next reader.
+ */
+ private fun initReader() {
+ reader?.close()
+
+ this.reader = if (filesIterator.hasNext()) {
+ createReader(filesIterator.next())
+ } else {
+ null
+ }
+ }
+
+ /**
+ * Create a Parquet reader for the specified file.
+ */
+ private fun createReader(input: InputFile): ParquetReader<T> {
+ return AvroParquetReader
+ .builder<T>(input)
+ .disableCompatibility()
+ .build()
+ }
+}
diff --git a/opendc-format/src/test/kotlin/org/opendc/format/trace/wtf/WtfTraceReaderTest.kt b/opendc-format/src/test/kotlin/org/opendc/format/trace/wtf/WtfTraceReaderTest.kt
index bcfa7553..31ae03e0 100644
--- a/opendc-format/src/test/kotlin/org/opendc/format/trace/wtf/WtfTraceReaderTest.kt
+++ b/opendc-format/src/test/kotlin/org/opendc/format/trace/wtf/WtfTraceReaderTest.kt
@@ -24,6 +24,7 @@ package org.opendc.format.trace.wtf
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.Test
+import java.io.File
/**
* Test suite for the [WtfTraceReader] class.
@@ -34,7 +35,7 @@ class WtfTraceReaderTest {
*/
@Test
fun testParseWtf() {
- val reader = WtfTraceReader("src/test/resources/wtf-trace")
+ val reader = WtfTraceReader(File("src/test/resources/wtf-trace"))
var entry = reader.next()
assertEquals(0, entry.start)
assertEquals(23, entry.workload.tasks.size)
diff --git a/opendc-format/src/test/kotlin/org/opendc/format/util/ParquetTest.kt b/opendc-format/src/test/kotlin/org/opendc/format/util/ParquetTest.kt
new file mode 100644
index 00000000..e496dd96
--- /dev/null
+++ b/opendc-format/src/test/kotlin/org/opendc/format/util/ParquetTest.kt
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2021 AtLarge Research
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.opendc.format.util
+
+import org.apache.avro.SchemaBuilder
+import org.apache.avro.generic.GenericData
+import org.apache.parquet.avro.AvroParquetReader
+import org.apache.parquet.avro.AvroParquetWriter
+import org.apache.parquet.hadoop.ParquetFileWriter
+import org.junit.jupiter.api.*
+import org.junit.jupiter.api.Assertions.assertEquals
+import java.io.File
+import java.nio.file.FileAlreadyExistsException
+import java.nio.file.NoSuchFileException
+
+/**
+ * Test suite for the Parquet helper classes.
+ */
+internal class ParquetTest {
+ private val schema = SchemaBuilder
+ .record("test")
+ .namespace("org.opendc.format.util")
+ .fields()
+ .name("field").type().intType().noDefault()
+ .endRecord()
+
+ private lateinit var file: File
+
+ /**
+ * Setup the test
+ */
+ @BeforeEach
+ fun setUp() {
+ file = File.createTempFile("opendc", "parquet")
+ }
+
+ /**
+ * Tear down the test.
+ */
+ @AfterEach
+ fun tearDown() {
+ file.delete()
+ }
+
+ /**
+ * Initial test to verify whether the Parquet writer works.
+ */
+ @Test
+ fun testSmoke() {
+ val n = 4
+ val writer = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(file))
+ .withSchema(schema)
+ .withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
+ .build()
+
+ try {
+ repeat(n) { i ->
+ val record = GenericData.Record(schema)
+ record.put("field", i)
+ writer.write(record)
+ }
+ } finally {
+ writer.close()
+ }
+
+ val reader = AvroParquetReader.builder<GenericData.Record>(LocalInputFile(file))
+ .build()
+
+ var counter = 0
+ try {
+ while (true) {
+ val record = reader.read() ?: break
+ assertEquals(counter++, record.get("field"))
+ }
+ } finally {
+ reader.close()
+ }
+
+ assertEquals(n, counter)
+ }
+
+ /**
+ * Test if overwriting fails if not specified.
+ */
+ @Test
+ fun testOverwrite() {
+ assertThrows<FileAlreadyExistsException> {
+ AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(file))
+ .withSchema(schema)
+ .build()
+ }
+ }
+
+ /**
+ * Test non-existent file.
+ */
+ @Test
+ fun testNonExistent() {
+ file.delete()
+ assertThrows<NoSuchFileException> {
+ AvroParquetReader.builder<GenericData.Record>(LocalInputFile(file))
+ .build()
+ }
+ }
+}