lidavidm commented on a change in pull request #12162:
URL: https://github.com/apache/arrow/pull/12162#discussion_r786888157



##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +429,274 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a 
StructArray",
                                   {"*args"},
                                   "MakeStructOptions"};
 
+struct MapArrayLookupFunctor {
+  static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch, 
Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+    const MapArray map_array(batch[0].array());
+
+    std::shared_ptr<arrow::Array> keys = map_array.keys();
+    std::shared_ptr<arrow::Array> items = map_array.items();
+
+    const auto& query_key = options.query_key;
+    const auto& occurence = options.occurrence;
+
+    if (occurence == MapArrayLookupOptions::Occurrence::FIRST) {
+      std::unique_ptr<ArrayBuilder> builder;
+      RETURN_NOT_OK(
+          MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(), 
&builder));
+
+      int32_t last_key_idx_checked = 0;
+
+      // aka, number of {key, value} pairs in the current map
+      int32_t list_struct_len;
+      bool found_one_key;
+
+      for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
+           ++map_array_idx) {
+        // Number of Struct('s) = {key, value} in the list at the current index
+        list_struct_len = map_array.value_length(map_array_idx);
+        found_one_key = false;
+
+        for (int32_t key_idx_to_check = last_key_idx_checked;
+             key_idx_to_check < last_key_idx_checked + list_struct_len;
+             ++key_idx_to_check) {
+          ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+                                keys->GetScalar(key_idx_to_check));
+
+          if (key->Equals(*query_key)) {
+            found_one_key = true;
+            RETURN_NOT_OK(builder->AppendArraySlice(*items->data(), 
key_idx_to_check, 1));
+            break;
+          }
+        }
+        if (!found_one_key) {
+          RETURN_NOT_OK(builder->AppendNull());
+        }
+        // new index from where to start checking
+        last_key_idx_checked += list_struct_len;
+      }
+      ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+      out->value = result->data();
+
+    } else if (occurence == MapArrayLookupOptions::Occurrence::LAST) {
+      std::unique_ptr<ArrayBuilder> builder;
+      RETURN_NOT_OK(
+          MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(), 
&builder));
+
+      int32_t last_key_idx_checked = 0;
+
+      // aka, number of {key, value} pairs in the current map
+      int32_t list_struct_len;
+      int32_t last_key_idx_match;
+
+      for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
+           ++map_array_idx) {
+        // Number of Struct('s) = {key, value} in the list at the current index
+        list_struct_len = map_array.value_length(map_array_idx);
+        last_key_idx_match = -1;
+
+        for (int32_t key_idx_to_check = last_key_idx_checked;
+             key_idx_to_check < last_key_idx_checked + list_struct_len;
+             ++key_idx_to_check) {
+          ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+                                keys->GetScalar(key_idx_to_check));
+
+          if (key->Equals(*query_key)) {
+            last_key_idx_match = key_idx_to_check;
+          }
+        }
+        if (last_key_idx_match == -1) {
+          RETURN_NOT_OK(builder->AppendNull());
+        } else {
+          RETURN_NOT_OK(builder->AppendArraySlice(*items->data(), 
last_key_idx_match, 1));
+        }
+        // new index from where to start checking
+        last_key_idx_checked += list_struct_len;
+      }
+      ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+      out->value = result->data();
+
+    } else /* occurrence == MapArrayLookupOptions::Occurrence::All) */ {
+      std::unique_ptr<ArrayBuilder> builder;
+      std::unique_ptr<ArrayBuilder> list_builder;
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(),
+                                list(map_array.map_type()->item_type()), 
&builder));
+
+      int32_t last_key_idx_checked = 0;
+
+      // aka, number of {key, value} pairs in the current map
+      int32_t list_struct_len;
+      bool found_one_key;
+
+      for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
+           ++map_array_idx) {
+        // Number of Struct('s) = {key, value} in the list at the current index
+        list_struct_len = map_array.value_length(map_array_idx);
+        found_one_key = false;
+
+        if (list_struct_len > 0) {
+          RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), 
map_array.map_type()->item_type(),
+                                    &list_builder));
+        }
+
+        for (int32_t key_idx_to_check = last_key_idx_checked;
+             key_idx_to_check < last_key_idx_checked + list_struct_len;
+             ++key_idx_to_check) {
+          ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+                                keys->GetScalar(key_idx_to_check));
+
+          if (key->Equals(*query_key)) {
+            found_one_key = true;
+            RETURN_NOT_OK(
+                list_builder->AppendArraySlice(*items->data(), 
key_idx_to_check, 1));
+          }
+        }
+        if (!found_one_key) {
+          RETURN_NOT_OK(builder->AppendNull());
+        } else {
+          ARROW_ASSIGN_OR_RAISE(auto list_result, list_builder->Finish());
+          RETURN_NOT_OK(builder->AppendScalar(ListScalar(list_result)));
+        }
+        list_builder->Reset();
+
+        // new index from where to start checking
+        last_key_idx_checked += list_struct_len;
+      }
+      ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+      out->value = result->data();
+    }
+    return Status::OK();
+  }
+
+  static Status ExecMapScalar(KernelContext* ctx, const ExecBatch& batch, 
Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+    const auto& map_scalar = batch[0].scalar_as<MapScalar>();
+    const auto& struct_array = checked_cast<const 
StructArray&>(*map_scalar.value);
+    const std::shared_ptr<Array> keys = struct_array.field(0);
+    const std::shared_ptr<Array> items = struct_array.field(1);
+
+    if (ARROW_PREDICT_FALSE(!map_scalar.is_valid)) {
+      if (options.occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+        out->value = MakeNullScalar(list(items->type()));
+      } else {
+        out->value = MakeNullScalar(items->type());

Review comment:
       I would suggest looking at this in a debugger if you haven't already, 
just to see what this case looks like.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to