This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 0a86015cd GH-3249: Fix incorrect Bloom filter data when reading from
ByteArrayInputStream by using readFully() (#3250)
0a86015cd is described below
commit 0a86015cd96e55160c9491ca2dee470c6112e491
Author: Yuming Wang <[email protected]>
AuthorDate: Fri Jul 11 23:12:04 2025 +0800
GH-3249: Fix incorrect Bloom filter data when reading from
ByteArrayInputStream by using readFully() (#3250)
---
.../main/java/org/apache/parquet/hadoop/ParquetFileReader.java | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index b12a819cd..ae2de87cb 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -1667,7 +1667,14 @@ public class ParquetFileReader implements Closeable {
byte[] bitset;
if (null == bloomFilterDecryptor) {
bitset = new byte[numBytes];
- in.read(bitset);
+ // For negative bloomFilterLength (files from older versions), use
readFully() instead of read().
+ // readFully() guarantees reading exactly numBytes bytes, while read()
may read fewer bytes in a single
+ // call. This ensures the entire bitset is properly loaded.
+ if (bloomFilterLength < 0) {
+ f.readFully(bitset);
+ } else {
+ in.read(bitset);
+ }
} else {
bitset = bloomFilterDecryptor.decrypt(in, bloomFilterBitsetAAD);
if (bitset.length != numBytes) {