pitrou commented on code in PR #36574:
URL: https://github.com/apache/arrow/pull/36574#discussion_r1335920339
##########
cpp/src/parquet/page_index.h:
##########
@@ -25,14 +25,13 @@
namespace parquet {
-class ColumnDescriptor;
-class EncodedStatistics;
-class FileMetaData;
+class Decryptor;
+class Encryptor;
class InternalFileDecryptor;
+class InternalFileEncryptor;
Review Comment:
It would probably be useful to have a `encryption/type_fwd.h` to centralize
all such forward declarations?
##########
cpp/src/parquet/encryption/internal_file_decryptor.cc:
##########
@@ -215,4 +217,53 @@ std::shared_ptr<Decryptor>
InternalFileDecryptor::GetColumnDecryptor(
return column_data_map_[column_path];
}
+namespace {
+
+std::shared_ptr<Decryptor> GetColumnDecryptor(
+ const ColumnCryptoMetaData* crypto_metadata, InternalFileDecryptor*
file_decryptor,
+ const std::function<std::shared_ptr<Decryptor>(
+ InternalFileDecryptor* file_decryptor, const std::string& column_path,
+ const std::string& column_key_metadata, const std::string& aad)>&
func) {
+ if (crypto_metadata == nullptr) {
+ return nullptr;
+ }
+
+ if (file_decryptor == nullptr) {
+ throw ParquetException("RowGroup is noted as encrypted but no file
decryptor");
+ }
+
+ if (crypto_metadata->encrypted_with_footer_key()) {
+ return file_decryptor->GetFooterDecryptorForColumnMeta();
Review Comment:
Hmm... it should be `GetFooterDecryptorForColumnData` if called from
`GetColumnDataDecryptor`, shouldn't it?
##########
cpp/src/parquet/encryption/internal_file_decryptor.cc:
##########
@@ -215,4 +217,53 @@ std::shared_ptr<Decryptor>
InternalFileDecryptor::GetColumnDecryptor(
return column_data_map_[column_path];
}
+namespace {
+
+std::shared_ptr<Decryptor> GetColumnDecryptor(
+ const ColumnCryptoMetaData* crypto_metadata, InternalFileDecryptor*
file_decryptor,
+ const std::function<std::shared_ptr<Decryptor>(
+ InternalFileDecryptor* file_decryptor, const std::string& column_path,
+ const std::string& column_key_metadata, const std::string& aad)>&
func) {
+ if (crypto_metadata == nullptr) {
+ return nullptr;
+ }
+
+ if (file_decryptor == nullptr) {
+ throw ParquetException("RowGroup is noted as encrypted but no file
decryptor");
+ }
+
+ if (crypto_metadata->encrypted_with_footer_key()) {
+ return file_decryptor->GetFooterDecryptorForColumnMeta();
Review Comment:
I'm curious why this doesn't fail the tests, though.
##########
cpp/src/parquet/encryption/internal_file_decryptor.cc:
##########
@@ -215,4 +217,53 @@ std::shared_ptr<Decryptor>
InternalFileDecryptor::GetColumnDecryptor(
return column_data_map_[column_path];
}
+namespace {
+
+std::shared_ptr<Decryptor> GetColumnDecryptor(
+ const ColumnCryptoMetaData* crypto_metadata, InternalFileDecryptor*
file_decryptor,
+ const std::function<std::shared_ptr<Decryptor>(
+ InternalFileDecryptor* file_decryptor, const std::string& column_path,
+ const std::string& column_key_metadata, const std::string& aad)>&
func) {
+ if (crypto_metadata == nullptr) {
+ return nullptr;
+ }
+
+ if (file_decryptor == nullptr) {
+ throw ParquetException("RowGroup is noted as encrypted but no file
decryptor");
+ }
+
+ if (crypto_metadata->encrypted_with_footer_key()) {
+ return file_decryptor->GetFooterDecryptorForColumnMeta();
Review Comment:
Ah, this might be because the only distinction between data and metadata is
when selecting the actual cipher, and only if the selected algorithm id is not
already `AES_GCM_V1`. Here is for example `AesDecryptor`:
```c++
AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id,
int key_len,
bool metadata, bool
contains_length) {
ctx_ = nullptr;
length_buffer_length_ = contains_length ? kBufferSizeLength : 0;
ciphertext_size_delta_ = length_buffer_length_ + kNonceLength;
if (metadata || (ParquetCipher::AES_GCM_V1 == alg_id)) {
aes_mode_ = kGcmMode;
ciphertext_size_delta_ += kGcmTagLength;
} else {
aes_mode_ = kCtrMode;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]