EnricoMi commented on code in PR #45462:
URL: https://github.com/apache/arrow/pull/45462#discussion_r2131632441


##########
cpp/src/parquet/encryption/encryption.cc:
##########
@@ -163,6 +163,76 @@ FileEncryptionProperties::Builder* 
FileEncryptionProperties::Builder::encrypted_
   return this;
 }
 
+void FileEncryptionProperties::encrypt_schema(const SchemaDescriptor& schema) {
+  // Check that all columns in columnEncryptionProperties exist in the schema.
+  // Copy the encrypted_columns map as we are going to modify it while 
iterating it
+  auto encrypted_columns = 
ColumnPathToEncryptionPropertiesMap(encrypted_columns_);
+  // if columnEncryptionProperties is empty, every column in file schema will 
be
+  // encrypted with footer key.
+  if (!encrypted_columns.empty()) {
+    std::vector<std::pair<std::string, std::string>> column_path_vec;
+    // First, memorize all column or schema paths of the schema as dot-strings.
+    for (int i = 0; i < schema.num_columns(); i++) {
+      auto column = schema.Column(i);
+      auto column_path = column->path()->ToDotString();
+      auto schema_path = column->schema_path()->ToDotString();
+      column_path_vec.emplace_back(column_path, column_path);
+      if (schema_path != column_path) {
+        column_path_vec.emplace_back(schema_path, column_path);
+      }
+    }
+    // Sort them alphabetically, so that we can use binary-search and look up 
parent
+    // columns.
+    std::sort(column_path_vec.begin(), column_path_vec.end());
+
+    // Check if encrypted column exists in schema, or if it is a parent field 
of a column.
+    for (const auto& elem : encrypted_columns) {
+      auto& encrypted_column = elem.first;
+      auto encrypted_column_prefix = encrypted_column + ".";
+      auto encrypted_column_prefix_len = encrypted_column_prefix.size();
+
+      // first we look up encrypted_columns as
+      // find first column that equals encrypted_column or starts with 
encrypted_column
+      auto it = std::lower_bound(
+          column_path_vec.begin(), column_path_vec.end(), encrypted_column,
+          [&](const std::pair<std::string, std::string>& item, const 
std::string& term) {
+            return item.first < term;
+          });
+      bool matches = false;
+
+      // encrypted_column encrypts column 'it' when 'it' is either equal to
+      // encrypted_column, or 'it' starts with encrypted_column_prefix,
+      // i.e. encrypted_column followed by a '.'
+      while (
+          it != column_path_vec.end() &&
+          (it->first == encrypted_column ||
+           // C++20: can be replaced with 
it->first.starts_with(encrypted_column_prefix)

Review Comment:
   thanks for the pointer, applied



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to