mapleFU commented on code in PR #37400:
URL: https://github.com/apache/arrow/pull/37400#discussion_r1313767825
##########
cpp/src/parquet/column_writer.cc:
##########
@@ -2319,12 +2343,119 @@ Status
TypedColumnWriterImpl<FLBAType>::WriteArrowDense(
return Status::OK();
}
+template <typename DType>
+void TypedColumnWriterImpl<DType>::UpdateBloomFilter(const T* values,
+ int64_t num_values) {
+ if (bloom_filter_) {
+ // TODO(mwish): Would it allocate too much memory? Would an
std::array<uint64_t, 64>
+ // better here?
+ std::vector<uint64_t> hashes(num_values);
+ bloom_filter_->Hashes(values, static_cast<int>(num_values), hashes.data());
+ bloom_filter_->InsertHashes(hashes.data(), static_cast<int>(num_values));
+ }
+}
+
+template <>
+void TypedColumnWriterImpl<FLBAType>::UpdateBloomFilter(const FLBA* values,
+ int64_t num_values) {
+ if (bloom_filter_) {
+ for (int64_t i = 0; i < num_values; ++i) {
+ bloom_filter_->InsertHash(bloom_filter_->Hash(values + i,
descr_->type_length()));
+ }
+ }
+}
+
+template <>
+void TypedColumnWriterImpl<BooleanType>::UpdateBloomFilter(const bool*,
int64_t) {
+ DCHECK(bloom_filter_ == nullptr);
+}
+
+template <typename DType>
+void TypedColumnWriterImpl<DType>::UpdateBloomFilterSpaced(const T* values,
+ int64_t num_values,
+ const uint8_t*
valid_bits,
+ int64_t
valid_bits_offset) {
+ if (bloom_filter_) {
+ ::arrow::internal::VisitSetBitRunsVoid(
+ valid_bits, valid_bits_offset, num_values, [&](int64_t position,
int64_t length) {
+ for (int64_t i = 0; i < length; i++) {
+ bloom_filter_->InsertHash(bloom_filter_->Hash(values + i +
position));
+ }
+ });
+ }
+}
+
+template <>
+void TypedColumnWriterImpl<BooleanType>::UpdateBloomFilterSpaced(const bool*,
int64_t,
+ const
uint8_t*,
+ int64_t) {
+ DCHECK(bloom_filter_ == nullptr);
+}
+
+template <>
+void TypedColumnWriterImpl<FLBAType>::UpdateBloomFilterSpaced(const FLBA*
values,
+ int64_t
num_values,
+ const uint8_t*
valid_bits,
+ int64_t
valid_bits_offset) {
+ if (bloom_filter_) {
+ ::arrow::internal::VisitSetBitRunsVoid(
+ valid_bits, valid_bits_offset, num_values, [&](int64_t position,
int64_t length) {
+ for (int64_t i = 0; i < length; i++) {
+ bloom_filter_->InsertHash(
+ bloom_filter_->Hash(values + i + position,
descr_->type_length()));
+ }
+ });
+ }
+}
+
+template <typename ArrayType>
+void UpdateBinaryBloomFilter(BloomFilter* bloom_filter, const ArrayType&
array) {
+ PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename
ArrayType::TypeClass>(
+ *array.data(),
+ [&](const std::string_view& view) {
Review Comment:
Oh I got it, will fix it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]