mapleFU commented on code in PR #38863:
URL: https://github.com/apache/arrow/pull/38863#discussion_r1404377372


##########
cpp/src/parquet/bloom_filter.cc:
##########
@@ -105,16 +105,24 @@ static ::arrow::Status ValidateBloomFilterHeader(
 }
 
 BlockSplitBloomFilter BlockSplitBloomFilter::Deserialize(
-    const ReaderProperties& properties, ArrowInputStream* input) {
-  // NOTE: we don't know the bloom filter header size upfront, and we can't 
rely on
-  // InputStream::Peek() which isn't always implemented. Therefore, we must 
first
-  // Read() with an upper bound estimate of the header size, then once we know
-  // the bloom filter data size, we can Read() the exact number of remaining 
data bytes.
+    const ReaderProperties& properties, ArrowInputStream* input,
+    std::optional<int64_t> bloom_filter_length) {
   ThriftDeserializer deserializer(properties);
   format::BloomFilterHeader header;
+  int64_t bloom_filter_header_read_size = 0;
+  if (bloom_filter_length.has_value()) {
+    bloom_filter_header_read_size = bloom_filter_length.value();
+  } else {
+    // NOTE: we don't know the bloom filter header size upfront without
+    // bloom_filter_length, and we can't rely on InputStream::Peek() which 
isn't always
+    // implemented. Therefore, we must first Read() with an upper bound 
estimate of the
+    // header size, then once we know the bloom filter data size, we can 
Read() the exact
+    // number of remaining data bytes.
+    bloom_filter_header_read_size = kBloomFilterHeaderSizeGuess;
+  }
 
   // Read and deserialize bloom filter header
-  PARQUET_ASSIGN_OR_THROW(auto header_buf, 
input->Read(kBloomFilterHeaderSizeGuess));
+  PARQUET_ASSIGN_OR_THROW(auto header_buf, 
input->Read(bloom_filter_header_read_size));
   // This gets used, then set by DeserializeThriftMsg
   uint32_t header_size = static_cast<uint32_t>(header_buf->size());
   try {

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to