This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new c844d2682 PARQUET-2354: Fix race condition in CharsetValidator (#1154)
c844d2682 is described below

commit c844d2682f0f603c68f985a068beec0871fcc386
Author: Piotr Findeisen <[email protected]>
AuthorDate: Thu Sep 28 09:46:23 2023 +0200

    PARQUET-2354: Fix race condition in CharsetValidator (#1154)
    
    The `CharsetValidator` has a static singleton instance at
    `BinaryTruncator.DEFAULT_UTF8_TRUNCATOR.validator`, so it can be
    accessed from multiple threads. Before the change, all threads would
    operate on a shared "dummy buffer" for decoding.
---
 .../parquet/internal/column/columnindex/BinaryTruncator.java      | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git 
a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
 
b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
index 8a6f0078b..57fbb7966 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
@@ -40,7 +40,7 @@ public abstract class BinaryTruncator {
   }
 
   private static class CharsetValidator {
-    private final CharBuffer dummyBuffer = CharBuffer.allocate(1024);
+    private final ThreadLocal<CharBuffer> dummyBuffer = 
ThreadLocal.withInitial(() -> CharBuffer.allocate(1024));
     private final CharsetDecoder decoder;
 
     CharsetValidator(Charset charset) {
@@ -50,11 +50,13 @@ public abstract class BinaryTruncator {
     }
 
     Validity checkValidity(ByteBuffer buffer) {
+      // TODO this is currently used for UTF-8 only, so validity check could 
be done without copying.
+      CharBuffer charBuffer = dummyBuffer.get();
       int pos = buffer.position();
       CoderResult result = CoderResult.OVERFLOW;
       while (result.isOverflow()) {
-        dummyBuffer.clear();
-        result = decoder.decode(buffer, dummyBuffer, true);
+        charBuffer.clear();
+        result = decoder.decode(buffer, charBuffer, true);
       }
       buffer.position(pos);
       if (result.isUnderflow()) {

Reply via email to