1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
/*
* Copyright (c) 2021 AtLarge Research
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package org.opendc.trace.util.parquet
import mu.KotlinLogging
import org.apache.parquet.column.ParquetProperties
import org.apache.parquet.hadoop.ParquetFileWriter
import org.apache.parquet.hadoop.ParquetWriter
import org.apache.parquet.hadoop.api.WriteSupport
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import java.io.File
import java.util.concurrent.ArrayBlockingQueue
import java.util.concurrent.BlockingQueue
import kotlin.concurrent.thread
/**
* A writer that writes data in Parquet format.
*
* @param path The path to the file to write the data to.
* @param writeSupport The [WriteSupport] implementation for converting the records to Parquet format.
*/
public abstract class ParquetDataWriter<in T>(
path: File,
private val writeSupport: WriteSupport<T>,
bufferSize: Int = 4096,
) : AutoCloseable {
/**
* The logging instance to use.
*/
private val logger = KotlinLogging.logger {}
/**
* The queue of records to process.
*/
private val queue: BlockingQueue<T> = ArrayBlockingQueue(bufferSize)
/**
* An exception to be propagated to the actual writer.
*/
private var exception: Throwable? = null
/**
* The thread that is responsible for writing the Parquet records.
*/
private val writerThread =
thread(start = false, name = this.toString()) {
val writer =
let {
val builder =
LocalParquetWriter.builder(path.toPath(), writeSupport)
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0)
.withCompressionCodec(CompressionCodecName.ZSTD)
.withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
buildWriter(builder)
}
val queue = queue
val buf = mutableListOf<T>()
var shouldStop = false
try {
while (!shouldStop) {
try {
writer.write(queue.take())
} catch (e: InterruptedException) {
shouldStop = true
}
if (queue.drainTo(buf) > 0) {
for (data in buf) {
writer.write(data)
}
buf.clear()
}
}
} catch (e: Throwable) {
logger.error(e) { "Failure in Parquet data writer" }
exception = e
} finally {
writer.close()
}
}
/**
* Build the [ParquetWriter] used to write the Parquet files.
*/
protected open fun buildWriter(builder: LocalParquetWriter.Builder<@UnsafeVariance T>): ParquetWriter<@UnsafeVariance T> {
return builder.build()
}
/**
* Write the specified metrics to the database.
*/
public fun write(data: T) {
val exception = exception
if (exception != null) {
throw IllegalStateException("Writer thread failed", exception)
}
queue.put(data)
}
/**
* Signal the writer to stop.
*/
override fun close() {
writerThread.interrupt()
writerThread.join()
}
init {
writerThread.start()
}
}
|