mapleFU commented on code in PR #35691:
URL: https://github.com/apache/arrow/pull/35691#discussion_r1306446774
##########
cpp/src/parquet/bloom_filter_writer.h:
##########
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "bloom_filter.h"
+
+#include <map>
+#include <vector>
+
+namespace parquet {
+
+class PARQUET_EXPORT RowGroupBloomFilterReference {
+ public:
+ struct Reference {
+ int64_t offset;
+ int32_t length;
+ };
+
+ /// Append a new row group to host all incoming bloom filters.
+ void AppendRowGroup();
+
+ /// Add reference to the serialized bloom filter.
+ void AddBloomFilter(int32_t column_id, int64_t offset, int32_t length);
+
+ /// Get bloom filter offsets of a specific row group.
+ bool GetBloomFilterOffsets(size_t row_group_ordinal,
+ const std::map<int32_t, Reference>** out) const;
+
+ bool empty() const { return references_.empty(); }
+
+ private:
+ std::vector<std::map<int32_t, Reference>> references_;
+};
+
+class InternalFileEncryptor;
+
+namespace schema {
+class ColumnPath;
+}
+
+class PARQUET_EXPORT BloomFilterWriter {
+ public:
+ explicit BloomFilterWriter(const WriterProperties& properties)
+ : properties_(properties) {}
+ /// Append a new row group to host all incoming bloom filters.
+ void AppendRowGroup();
+
+ /// Return a BloomFilter defined by `col_path`.
+ ///
+ /// * If the col_path has a bloom filter, create a BloomFilter in
+ /// `row_group_bloom_filters_` and return.
+ /// * Otherwise, return nullptr.
+ BloomFilter* GetOrCreateBloomFilter(
+ const std::shared_ptr<schema::ColumnPath>& col_path,
Review Comment:
I've changed to `column_ordinal` for leaf columns
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]