wgtmac commented on code in PR #36574:
URL: https://github.com/apache/arrow/pull/36574#discussion_r1292913520


##########
cpp/src/parquet/page_index.h:
##########
@@ -38,10 +41,10 @@ class RowGroupPageIndexReader;
 class PARQUET_EXPORT ColumnIndex {
  public:
   /// \brief Create a ColumnIndex from a serialized thrift message.
-  static std::unique_ptr<ColumnIndex> Make(const ColumnDescriptor& descr,
-                                           const void* serialized_index,
-                                           uint32_t index_len,
-                                           const ReaderProperties& properties);
+  static std::unique_ptr<ColumnIndex> Make(
+      const ColumnDescriptor& descr, const void* serialized_index, uint32_t 
index_len,
+      const ReaderProperties& properties,
+      const std::shared_ptr<Decryptor>& decryptor = NULLPTR);

Review Comment:
   Most of encryptor & decryptor input parameters in other places are default 
to nullptr. But yes, removing default parameters here looks better.



##########
cpp/src/parquet/page_index.h:
##########
@@ -38,10 +41,10 @@ class RowGroupPageIndexReader;
 class PARQUET_EXPORT ColumnIndex {
  public:
   /// \brief Create a ColumnIndex from a serialized thrift message.
-  static std::unique_ptr<ColumnIndex> Make(const ColumnDescriptor& descr,
-                                           const void* serialized_index,
-                                           uint32_t index_len,
-                                           const ReaderProperties& properties);
+  static std::unique_ptr<ColumnIndex> Make(
+      const ColumnDescriptor& descr, const void* serialized_index, uint32_t 
index_len,
+      const ReaderProperties& properties,
+      const std::shared_ptr<Decryptor>& decryptor = NULLPTR);

Review Comment:
   The decryptor will not be a member of returned `ColumnIndex` object. 
Instead, it is passed to ThriftDeserializer::DeserializeMessage as an input 
parameter:
   ```
     template <class T>
     void DeserializeMessage(const uint8_t* buf, uint32_t* len, T* 
deserialized_msg,
                             const std::shared_ptr<Decryptor>& decryptor = 
NULLPTR);
   ```
   So I think it is better to use `const std::shared_ptr<Decryptor>&`. Let me 
remove the default value.



##########
cpp/src/parquet/page_index.cc:
##########
@@ -744,9 +802,13 @@ class PageIndexBuilderImpl final : public PageIndexBuilder 
{
       for (size_t column = 0; column < num_columns; ++column) {
         const auto& column_page_index_builder = 
row_group_page_index_builders[column];
         if (column_page_index_builder != nullptr) {
+          /// Get encryptor if encryption is enabled.
+          auto encryptor = 
GetColumnMetaEncryptor<Builder>(static_cast<int>(row_group),

Review Comment:
   Do you mean `static_cast<int>`? I think this can avoid two lines in the 
function call.



##########
cpp/src/parquet/page_index.cc:
##########
@@ -309,6 +313,33 @@ class RowGroupPageIndexReaderImpl : public 
RowGroupPageIndexReader {
     }
   }
 
+  /// Get decryptor if the file is encrypted
+  std::shared_ptr<Decryptor> GetColumnMetaDecryptor(
+      const ColumnCryptoMetaData* crypto_metadata, int column_ordinal,
+      int8_t module_type) const {
+    std::shared_ptr<Decryptor> decryptor;
+    if (crypto_metadata != nullptr) {
+      if (file_decryptor_ == nullptr) {
+        throw ParquetException("RowGroup is noted as encrypted but no file 
decryptor");
+      }
+      if (crypto_metadata->encrypted_with_footer_key()) {
+        // The column is encrypted with footer key
+        decryptor = file_decryptor_->GetFooterDecryptorForColumnMeta();
+      } else {
+        // The column is encrypted with its own key
+        const std::string column_key_metadata = 
crypto_metadata->key_metadata();

Review Comment:
   Yes, `column_key_metadata` can but `column_path` below cannot.



##########
cpp/src/parquet/page_index.cc:
##########
@@ -309,6 +313,33 @@ class RowGroupPageIndexReaderImpl : public 
RowGroupPageIndexReader {
     }
   }
 
+  /// Get decryptor if the file is encrypted
+  std::shared_ptr<Decryptor> GetColumnMetaDecryptor(
+      const ColumnCryptoMetaData* crypto_metadata, int column_ordinal,
+      int8_t module_type) const {
+    std::shared_ptr<Decryptor> decryptor;
+    if (crypto_metadata != nullptr) {

Review Comment:
   Changed.



##########
cpp/src/parquet/page_index.cc:
##########
@@ -309,6 +313,33 @@ class RowGroupPageIndexReaderImpl : public 
RowGroupPageIndexReader {
     }
   }
 
+  /// Get decryptor if the file is encrypted
+  std::shared_ptr<Decryptor> GetColumnMetaDecryptor(
+      const ColumnCryptoMetaData* crypto_metadata, int column_ordinal,
+      int8_t module_type) const {
+    std::shared_ptr<Decryptor> decryptor;
+    if (crypto_metadata != nullptr) {
+      if (file_decryptor_ == nullptr) {
+        throw ParquetException("RowGroup is noted as encrypted but no file 
decryptor");
+      }
+      if (crypto_metadata->encrypted_with_footer_key()) {
+        // The column is encrypted with footer key
+        decryptor = file_decryptor_->GetFooterDecryptorForColumnMeta();
+      } else {
+        // The column is encrypted with its own key
+        const std::string column_key_metadata = 
crypto_metadata->key_metadata();
+        const std::string column_path = 
crypto_metadata->path_in_schema()->ToDotString();
+        decryptor =
+            file_decryptor_->GetColumnMetaDecryptor(column_path, 
column_key_metadata);
+      }
+      ARROW_DCHECK(!decryptor->file_aad().empty());

Review Comment:
   This should be an internal bug and other places are already using DCHECK for 
this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to