mapleFU commented on code in PR #37400:
URL: https://github.com/apache/arrow/pull/37400#discussion_r1539292051


##########
cpp/src/parquet/column_writer.cc:
##########
@@ -2322,12 +2347,133 @@ Status 
TypedColumnWriterImpl<FLBAType>::WriteArrowDense(
   return Status::OK();
 }
 
+template <typename DType>
+void TypedColumnWriterImpl<DType>::UpdateBloomFilter(const T* values,
+                                                     int64_t num_values) {
+  if (bloom_filter_) {
+    std::array<uint64_t, kHashBatchSize> hashes;
+    for (int64_t i = 0; i < num_values; i += kHashBatchSize) {
+      int64_t current_hash_batch_size = std::min(kHashBatchSize, num_values - 
i);
+      bloom_filter_->Hashes(values, static_cast<int>(current_hash_batch_size),
+                            hashes.data());
+      bloom_filter_->InsertHashes(hashes.data(),
+                                  static_cast<int>(current_hash_batch_size));
+    }
+  }
+}
+
+template <>
+void TypedColumnWriterImpl<FLBAType>::UpdateBloomFilter(const FLBA* values,
+                                                        int64_t num_values) {
+  if (bloom_filter_) {
+    std::array<uint64_t, kHashBatchSize> hashes;
+    for (int64_t i = 0; i < num_values; i += kHashBatchSize) {
+      int64_t current_hash_batch_size = std::min(kHashBatchSize, num_values - 
i);
+      bloom_filter_->Hashes(values, descr_->type_length(),
+                            static_cast<int>(current_hash_batch_size), 
hashes.data());
+      bloom_filter_->InsertHashes(hashes.data(),
+                                  static_cast<int>(current_hash_batch_size));
+    }
+  }
+}
+
+template <>
+void TypedColumnWriterImpl<BooleanType>::UpdateBloomFilter(const bool*, 
int64_t) {
+  DCHECK(bloom_filter_ == nullptr);
+}
+
+template <typename DType>
+void TypedColumnWriterImpl<DType>::UpdateBloomFilterSpaced(const T* values,
+                                                           int64_t num_values,
+                                                           const uint8_t* 
valid_bits,
+                                                           int64_t 
valid_bits_offset) {
+  if (bloom_filter_) {
+    std::array<uint64_t, kHashBatchSize> hashes;
+    ::arrow::internal::VisitSetBitRunsVoid(
+        valid_bits, valid_bits_offset, num_values, [&](int64_t position, 
int64_t length) {
+          for (int64_t i = 0; i < length; i += kHashBatchSize) {
+            auto current_hash_batch_size = std::min(kHashBatchSize, length - 
i);
+            bloom_filter_->Hashes(values + i + position,
+                                  static_cast<int>(current_hash_batch_size),
+                                  hashes.data());
+            bloom_filter_->InsertHashes(hashes.data(),
+                                        
static_cast<int>(current_hash_batch_size));
+          }
+        });
+  }
+}
+
+template <>
+void TypedColumnWriterImpl<BooleanType>::UpdateBloomFilterSpaced(const bool*, 
int64_t,
+                                                                 const 
uint8_t*,
+                                                                 int64_t) {
+  DCHECK(bloom_filter_ == nullptr);

Review Comment:
   > It would be enough to make it an empty function.
   
   Thats why it's a DCHECK



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to