wgtmac commented on code in PR #49880:
URL: https://github.com/apache/arrow/pull/49880#discussion_r3248969477


##########
cpp/src/parquet/bloom_filter_writer.cc:
##########
@@ -225,14 +230,47 @@ IndexLocations 
BloomFilterBuilderImpl::WriteTo(::arrow::io::OutputStream* sink)
   }
   finished_ = true;
 
+  // Bloom filter ordinals are encoded as int16 in the AAD when encryption is 
enabled.
+  constexpr size_t kEncryptedOrdinalLimit = 
std::numeric_limits<int16_t>::max();  // 32767
+
   IndexLocations locations;
 
   for (size_t i = 0; i != bloom_filters_.size(); ++i) {
     auto& row_group_bloom_filters = bloom_filters_[i];
     for (const auto& [column_id, filter] : row_group_bloom_filters) {
       // TODO(GH-43138): Determine the quality of bloom filter before writing 
it.
       PARQUET_ASSIGN_OR_THROW(int64_t offset, sink->Tell());
-      filter->WriteTo(sink);
+
+      const auto column_path = 
schema_->Column(column_id)->path()->ToDotString();
+      std::shared_ptr<Encryptor> meta_encryptor =
+          file_encryptor_ != nullptr
+              ? file_encryptor_->GetColumnMetaEncryptor(column_path)
+              : nullptr;
+      if (meta_encryptor != nullptr) {
+        const auto& column_props = 
properties_->column_encryption_properties(column_path);
+        if (column_props != nullptr && column_props->is_encrypted() &&
+            !column_props->is_encrypted_with_footer_key()) {
+          ParquetException::NYI("Bloom filter writing with a dedicated column 
key");
+        }
+        if (ARROW_PREDICT_FALSE(i > kEncryptedOrdinalLimit)) {
+          throw ParquetException(
+              "Encrypted files cannot contain more than 32767 row groups");
+        }
+        if (ARROW_PREDICT_FALSE(static_cast<size_t>(column_id) >
+                                kEncryptedOrdinalLimit)) {
+          throw ParquetException(
+              "Encrypted files cannot contain more than 32767 columns");
+        }
+        auto* block_filter = 
dynamic_cast<BlockSplitBloomFilter*>(filter.get());
+        if (block_filter == nullptr) {
+          throw ParquetException(
+              "Only BlockSplitBloomFilter is supported for encrypted bloom 
filters");
+        }
+        block_filter->WriteEncrypted(sink, meta_encryptor.get(), 
static_cast<int16_t>(i),

Review Comment:
   Column metadata is encrypted during column close (metadata.cc:1765), but 
bloom filter offsets are set later (metadata.cc:2066). For column-key metadata, 
readers use the decrypted encrypted_column_metadata, so they would miss the 
bloom filter offset/length unless the metadata encryption order is fixed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to