diff options
Diffstat (limited to 'opendc-format/src')
| -rw-r--r-- | opendc-format/src/main/kotlin/org/opendc/format/util/LocalOutputFile.kt | 95 | ||||
| -rw-r--r-- | opendc-format/src/test/kotlin/org/opendc/format/util/ParquetTest.kt | 125 |
2 files changed, 220 insertions, 0 deletions
diff --git a/opendc-format/src/main/kotlin/org/opendc/format/util/LocalOutputFile.kt b/opendc-format/src/main/kotlin/org/opendc/format/util/LocalOutputFile.kt new file mode 100644 index 00000000..657bca5a --- /dev/null +++ b/opendc-format/src/main/kotlin/org/opendc/format/util/LocalOutputFile.kt @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.format.util + +import org.apache.parquet.io.OutputFile +import org.apache.parquet.io.PositionOutputStream +import java.io.File +import java.io.OutputStream +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.StandardOpenOption + +/** + * An [OutputFile] on the local filesystem. + */ +public class LocalOutputFile(private val path: Path) : OutputFile { + /** + * Construct a [LocalOutputFile] from the specified [file] + */ + public constructor(file: File) : this(file.toPath()) + + override fun create(blockSizeHint: Long): PositionOutputStream { + val output = Files.newOutputStream(path, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE) + return NioPositionOutputStream(output) + } + + override fun createOrOverwrite(blockSizeHint: Long): PositionOutputStream { + val output = Files.newOutputStream(path, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING) + return NioPositionOutputStream(output) + } + + override fun supportsBlockSize(): Boolean = false + + override fun defaultBlockSize(): Long = + throw UnsupportedOperationException("Local filesystem does not have default block size") + + override fun getPath(): String = path.toString() + + /** + * Implementation of [PositionOutputStream] for an [OutputStream]. + */ + private class NioPositionOutputStream(private val output: OutputStream) : PositionOutputStream() { + /** + * The current position in the file. + */ + private var _pos = 0L + + override fun getPos(): Long = _pos + + override fun write(b: Int) { + output.write(b) + _pos++ + } + + override fun write(b: ByteArray) { + output.write(b) + _pos += b.size + } + + override fun write(b: ByteArray, off: Int, len: Int) { + output.write(b, off, len) + _pos += len + } + + override fun flush() { + output.flush() + } + + override fun close() { + output.close() + } + + override fun toString(): String = "NioPositionOutputStream[output=$output]" + } +} diff --git a/opendc-format/src/test/kotlin/org/opendc/format/util/ParquetTest.kt b/opendc-format/src/test/kotlin/org/opendc/format/util/ParquetTest.kt new file mode 100644 index 00000000..e496dd96 --- /dev/null +++ b/opendc-format/src/test/kotlin/org/opendc/format/util/ParquetTest.kt @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2021 AtLarge Research + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package org.opendc.format.util + +import org.apache.avro.SchemaBuilder +import org.apache.avro.generic.GenericData +import org.apache.parquet.avro.AvroParquetReader +import org.apache.parquet.avro.AvroParquetWriter +import org.apache.parquet.hadoop.ParquetFileWriter +import org.junit.jupiter.api.* +import org.junit.jupiter.api.Assertions.assertEquals +import java.io.File +import java.nio.file.FileAlreadyExistsException +import java.nio.file.NoSuchFileException + +/** + * Test suite for the Parquet helper classes. + */ +internal class ParquetTest { + private val schema = SchemaBuilder + .record("test") + .namespace("org.opendc.format.util") + .fields() + .name("field").type().intType().noDefault() + .endRecord() + + private lateinit var file: File + + /** + * Setup the test + */ + @BeforeEach + fun setUp() { + file = File.createTempFile("opendc", "parquet") + } + + /** + * Tear down the test. + */ + @AfterEach + fun tearDown() { + file.delete() + } + + /** + * Initial test to verify whether the Parquet writer works. + */ + @Test + fun testSmoke() { + val n = 4 + val writer = AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(file)) + .withSchema(schema) + .withWriteMode(ParquetFileWriter.Mode.OVERWRITE) + .build() + + try { + repeat(n) { i -> + val record = GenericData.Record(schema) + record.put("field", i) + writer.write(record) + } + } finally { + writer.close() + } + + val reader = AvroParquetReader.builder<GenericData.Record>(LocalInputFile(file)) + .build() + + var counter = 0 + try { + while (true) { + val record = reader.read() ?: break + assertEquals(counter++, record.get("field")) + } + } finally { + reader.close() + } + + assertEquals(n, counter) + } + + /** + * Test if overwriting fails if not specified. + */ + @Test + fun testOverwrite() { + assertThrows<FileAlreadyExistsException> { + AvroParquetWriter.builder<GenericData.Record>(LocalOutputFile(file)) + .withSchema(schema) + .build() + } + } + + /** + * Test non-existent file. + */ + @Test + fun testNonExistent() { + file.delete() + assertThrows<NoSuchFileException> { + AvroParquetReader.builder<GenericData.Record>(LocalInputFile(file)) + .build() + } + } +} |
