mapleFU commented on code in PR #37400:
URL: https://github.com/apache/arrow/pull/37400#discussion_r1664364035


##########
cpp/src/parquet/column_writer.cc:
##########
@@ -2363,12 +2390,153 @@ Status 
TypedColumnWriterImpl<FLBAType>::WriteArrowDense(
   return Status::OK();
 }
 
+template <typename DType>
+void TypedColumnWriterImpl<DType>::UpdateBloomFilter(const T* values,
+                                                     int64_t num_values) {
+  if (bloom_filter_) {
+    std::array<uint64_t, kHashBatchSize> hashes;
+    for (int64_t i = 0; i < num_values; i += kHashBatchSize) {
+      int64_t current_hash_batch_size = std::min(kHashBatchSize, num_values - 
i);
+      bloom_filter_->Hashes(values, static_cast<int>(current_hash_batch_size),

Review Comment:
   ```
     void WriteValuesSpaced(const T* values, int64_t num_values, int64_t 
num_spaced_values,
                            const uint8_t* valid_bits, int64_t 
valid_bits_offset,
                            int64_t num_levels, int64_t num_nulls) {
       if (num_values != num_spaced_values) {
         current_value_encoder_->PutSpaced(values, 
static_cast<int>(num_spaced_values),
                                           valid_bits, valid_bits_offset);
         UpdateBloomFilterSpaced(values, num_spaced_values, valid_bits, 
valid_bits_offset);
       } else {
         current_value_encoder_->Put(values, static_cast<int>(num_values));
         UpdateBloomFilter(values, num_values);
       }
       if (page_statistics_ != nullptr) {
         page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset,
                                        num_spaced_values, num_values, 
num_nulls);
       }
     }
   ```
   
   At least other use-case doesn't check this?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to