This is an automated email from the ASF dual-hosted git repository.
wgtmac pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 65f7adef5 GH-3398: Fix potential ClassLoader leak caused by
ThreadLocal lambda in Binary (#3447)
65f7adef5 is described below
commit 65f7adef548f9ab52e5790feba1a5ae44f7ad3f7
Author: YangJie <[email protected]>
AuthorDate: Mon Jun 8 13:59:07 2026 +0800
GH-3398: Fix potential ClassLoader leak caused by ThreadLocal lambda in
Binary (#3447)
---
.../java/org/apache/parquet/io/api/Binary.java | 13 ++++----
.../java/org/apache/parquet/io/api/TestBinary.java | 35 ++++++++++++++++++++++
2 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
index e37ee1248..173581bdd 100644
--- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
+++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
@@ -27,7 +27,6 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
-import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.parquet.io.ParquetEncodingException;
@@ -268,14 +267,16 @@ public abstract class Binary implements
Comparable<Binary>, Serializable {
return "Binary{\"" + toStringUsingUTF8() + "\"}";
}
- private static final ThreadLocal<CharsetEncoder> ENCODER =
- ThreadLocal.withInitial(StandardCharsets.UTF_8::newEncoder);
-
private static ByteBuffer encodeUTF8(CharSequence value) {
try {
- return ENCODER.get().encode(CharBuffer.wrap(value));
+ // Use a fresh encoder per call rather than a static ThreadLocal
initialized with a lambda
+ // (UTF_8::newEncoder): that lambda's class is loaded by the
application ClassLoader and can
+ // keep it from being unloaded in long-lived pooled threads, leaking
Metaspace (GH-3398).
+ // The encoder also preserves strict CodingErrorAction.REPORT, so
malformed UTF-16 fails
+ // fast instead of being silently replaced (as String#getBytes(UTF_8)
would).
+ return
StandardCharsets.UTF_8.newEncoder().encode(CharBuffer.wrap(value));
} catch (CharacterCodingException e) {
- throw new ParquetEncodingException("UTF-8 not supported.", e);
+ throw new ParquetEncodingException("Failed to encode CharSequence as
UTF-8.", e);
}
}
}
diff --git
a/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
b/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
index a1a83af77..3dcb878d2 100644
--- a/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
+++ b/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
@@ -30,7 +30,10 @@ import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
+import org.apache.parquet.io.ParquetEncodingException;
import org.apache.parquet.io.api.TestBinary.BinaryFactory.BinaryAndOriginal;
import org.junit.Test;
@@ -314,4 +317,36 @@ public class TestBinary {
// expected
}
}
+
+ @Test
+ public void testFromCharSequenceEncodesValidUtf8() {
+ // Cover ASCII, multi-byte BMP, a supplementary code point (valid
surrogate pair) and empty.
+ assertFromCharSequenceEncodesUtf8("test-123-é中"); // ASCII + U+00E9
(2-byte) + U+4E2D (3-byte)
+ assertFromCharSequenceEncodesUtf8("😀"); // U+1F600, valid surrogate pair
(4-byte)
+ assertFromCharSequenceEncodesUtf8(""); // empty
+ }
+
+ private static void assertFromCharSequenceEncodesUtf8(String value) {
+ // fromCharSequence routes any CharSequence (here a StringBuilder) through
FromCharSequenceBinary.
+ // For valid input the strict encoder must match String#getBytes(UTF_8),
so this is a genuine
+ // cross-check, not a circular assertion.
+ Binary binary = Binary.fromCharSequence(new StringBuilder(value));
+ assertArrayEquals(value.getBytes(StandardCharsets.UTF_8),
binary.getBytes());
+ }
+
+ @Test
+ public void testFromCharSequenceRejectsMalformedUtf16() {
+ // An unpaired high surrogate is invalid UTF-16. FromCharSequenceBinary
must fail fast
+ // rather than silently substituting a replacement byte (as
String#getBytes(UTF_8) would).
+ CharSequence value = new
StringBuilder().append('a').append('\uD800').append('b');
+ try {
+ Binary.fromCharSequence(value);
+ fail("Should have thrown an exception for malformed UTF-16 input");
+ } catch (ParquetEncodingException e) {
+ // Lock in that the cause is a UTF-8 coding error, not an unrelated
failure of the same type.
+ assertTrue(
+ "expected a CharacterCodingException cause but was " + e.getCause(),
+ e.getCause() instanceof CharacterCodingException);
+ }
+ }
}