This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new c844d2682 PARQUET-2354: Fix race condition in CharsetValidator (#1154)
c844d2682 is described below
commit c844d2682f0f603c68f985a068beec0871fcc386
Author: Piotr Findeisen <[email protected]>
AuthorDate: Thu Sep 28 09:46:23 2023 +0200
PARQUET-2354: Fix race condition in CharsetValidator (#1154)
The `CharsetValidator` has a static singleton instance at
`BinaryTruncator.DEFAULT_UTF8_TRUNCATOR.validator`, so it can be
accessed from multiple threads. Before the change, all threads would
operate on a shared "dummy buffer" for decoding.
---
.../parquet/internal/column/columnindex/BinaryTruncator.java | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git
a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
index 8a6f0078b..57fbb7966 100644
---
a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
+++
b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
@@ -40,7 +40,7 @@ public abstract class BinaryTruncator {
}
private static class CharsetValidator {
- private final CharBuffer dummyBuffer = CharBuffer.allocate(1024);
+ private final ThreadLocal<CharBuffer> dummyBuffer =
ThreadLocal.withInitial(() -> CharBuffer.allocate(1024));
private final CharsetDecoder decoder;
CharsetValidator(Charset charset) {
@@ -50,11 +50,13 @@ public abstract class BinaryTruncator {
}
Validity checkValidity(ByteBuffer buffer) {
+ // TODO this is currently used for UTF-8 only, so validity check could
be done without copying.
+ CharBuffer charBuffer = dummyBuffer.get();
int pos = buffer.position();
CoderResult result = CoderResult.OVERFLOW;
while (result.isOverflow()) {
- dummyBuffer.clear();
- result = decoder.decode(buffer, dummyBuffer, true);
+ charBuffer.clear();
+ result = decoder.decode(buffer, charBuffer, true);
}
buffer.position(pos);
if (result.isUnderflow()) {