This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 0a86015cd GH-3249: Fix incorrect Bloom filter data when reading from 
ByteArrayInputStream by using readFully() (#3250)
0a86015cd is described below

commit 0a86015cd96e55160c9491ca2dee470c6112e491
Author: Yuming Wang <[email protected]>
AuthorDate: Fri Jul 11 23:12:04 2025 +0800

    GH-3249: Fix incorrect Bloom filter data when reading from 
ByteArrayInputStream by using readFully() (#3250)
---
 .../main/java/org/apache/parquet/hadoop/ParquetFileReader.java   | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index b12a819cd..ae2de87cb 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -1667,7 +1667,14 @@ public class ParquetFileReader implements Closeable {
     byte[] bitset;
     if (null == bloomFilterDecryptor) {
       bitset = new byte[numBytes];
-      in.read(bitset);
+      // For negative bloomFilterLength (files from older versions), use 
readFully() instead of read().
+      // readFully() guarantees reading exactly numBytes bytes, while read() 
may read fewer bytes in a single
+      // call. This ensures the entire bitset is properly loaded.
+      if (bloomFilterLength < 0) {
+        f.readFully(bitset);
+      } else {
+        in.read(bitset);
+      }
     } else {
       bitset = bloomFilterDecryptor.decrypt(in, bloomFilterBitsetAAD);
       if (bitset.length != numBytes) {

Reply via email to