dhruv9vats commented on a change in pull request #12162:
URL: https://github.com/apache/arrow/pull/12162#discussion_r789861567



##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +429,169 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a 
StructArray",
                                   {"*args"},
                                   "MakeStructOptions"};
 
+struct MapArrayLookupFunctor {
+  static Result<int64_t> FindOneMapValueIndex(const Array& keys, const Scalar& 
query_key,
+                                              const int64_t start, const 
int64_t end,
+                                              const bool from_back = false) {
+    if (!from_back) {
+      for (int64_t idx = start; idx < end; ++idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys.GetScalar(idx));
+
+        if (key->Equals(query_key)) return idx;
+      }
+    } else {
+      for (int64_t idx = end - 1; idx >= start; --idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys.GetScalar(idx));
+
+        if (key->Equals(query_key)) return idx;
+      }
+    }
+    return -1;
+  }
+
+  static Result<std::shared_ptr<Scalar>> GetScalarOutput(KernelContext* ctx,
+                                                         const MapScalar 
map_scalar) {

Review comment:
       Is this even remotely close to what you had in mind @lidavidm ?
   Also, is there a rough example I could follow for `VisitArrayValuesInline` 
and `UnboxScalar` you mentioned for templating the kernel?

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +429,169 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a 
StructArray",
                                   {"*args"},
                                   "MakeStructOptions"};
 
+struct MapArrayLookupFunctor {
+  static Result<int64_t> FindOneMapValueIndex(const Array& keys, const Scalar& 
query_key,
+                                              const int64_t start, const 
int64_t end,
+                                              const bool from_back = false) {
+    if (!from_back) {
+      for (int64_t idx = start; idx < end; ++idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys.GetScalar(idx));
+
+        if (key->Equals(query_key)) return idx;
+      }
+    } else {
+      for (int64_t idx = end - 1; idx >= start; --idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys.GetScalar(idx));
+
+        if (key->Equals(query_key)) return idx;
+      }
+    }
+    return -1;
+  }
+
+  static Result<std::shared_ptr<Scalar>> GetScalarOutput(KernelContext* ctx,
+                                                         const MapScalar 
map_scalar) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    const std::shared_ptr<Scalar>& query_key = options.query_key;
+    const auto& occurrence = options.occurrence;
+
+    const auto& struct_array = checked_cast<const 
StructArray&>(*map_scalar.value);
+    const std::shared_ptr<Array> keys = struct_array.field(0);
+    const std::shared_ptr<Array> items = struct_array.field(1);

Review comment:
       Note how we no longer have to deal with `offsets()` as before. This does 
seem to return only the required parts of the `Array`.

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +429,169 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a 
StructArray",
                                   {"*args"},
                                   "MakeStructOptions"};
 
+struct MapArrayLookupFunctor {
+  static Result<int64_t> FindOneMapValueIndex(const Array& keys, const Scalar& 
query_key,
+                                              const int64_t start, const 
int64_t end,
+                                              const bool from_back = false) {
+    if (!from_back) {
+      for (int64_t idx = start; idx < end; ++idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys.GetScalar(idx));
+
+        if (key->Equals(query_key)) return idx;
+      }
+    } else {
+      for (int64_t idx = end - 1; idx >= start; --idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys.GetScalar(idx));
+
+        if (key->Equals(query_key)) return idx;
+      }
+    }
+    return -1;
+  }
+
+  static Result<std::shared_ptr<Scalar>> GetScalarOutput(KernelContext* ctx,
+                                                         const MapScalar 
map_scalar) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    const std::shared_ptr<Scalar>& query_key = options.query_key;
+    const auto& occurrence = options.occurrence;
+
+    const auto& struct_array = checked_cast<const 
StructArray&>(*map_scalar.value);
+    const std::shared_ptr<Array> keys = struct_array.field(0);
+    const std::shared_ptr<Array> items = struct_array.field(1);
+
+    std::shared_ptr<Scalar> output;
+
+    if (occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      std::unique_ptr<ArrayBuilder> builder;
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), items->type(), &builder));
+
+      bool found_at_least_one_key = false;
+      for (int64_t idx = 0; idx < struct_array.length(); ++idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys->GetScalar(idx));
+
+        if (key->Equals(*query_key)) {
+          found_at_least_one_key = true;
+          RETURN_NOT_OK(builder->AppendArraySlice(*items->data(), idx, 1));
+        }
+      }
+      if (!found_at_least_one_key) {
+        output = MakeNullScalar(list(items->type()));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+        ARROW_ASSIGN_OR_RAISE(output, MakeScalar(list(items->type()), result));
+      }
+    }
+
+    else { /* occurrence == FIRST || LAST */
+      bool from_back = (occurrence == MapArrayLookupOptions::LAST);
+
+      ARROW_ASSIGN_OR_RAISE(
+          int64_t key_match_idx,
+          FindOneMapValueIndex(*keys, *query_key, 0, struct_array.length(), 
from_back));
+      if (key_match_idx != -1) {
+        ARROW_ASSIGN_OR_RAISE(output, items->GetScalar(key_match_idx));
+      } else {
+        output = MakeNullScalar(items->type());
+      }
+    }
+    return output;
+  }
+
+  static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch, 
Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+    const MapArray map_array(batch[0].array());
+
+    std::unique_ptr<ArrayBuilder> builder;
+    if (options.occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(),
+                                list(map_array.map_type()->item_type()), 
&builder));
+    } else {
+      RETURN_NOT_OK(
+          MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(), 
&builder));
+    }
+    for (int64_t map_array_idx = 0; map_array_idx < map_array.length(); 
++map_array_idx) {
+      if (!map_array.IsValid(map_array_idx)) {
+        RETURN_NOT_OK(builder->AppendNull());
+        continue;
+      } else {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                              map_array.GetScalar(map_array_idx));
+        auto map_scalar = std::static_pointer_cast<MapScalar>(scalar);
+        ARROW_ASSIGN_OR_RAISE(auto scalar_output, GetScalarOutput(ctx, 
*map_scalar));
+        RETURN_NOT_OK(builder->AppendScalar(*scalar_output));
+      }
+    }
+    ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+    out->value = result->data();
+    return Status::OK();
+  }
+
+  static Status ExecMapScalar(KernelContext* ctx, const ExecBatch& batch, 
Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    std::shared_ptr<DataType> item_type =
+        checked_cast<const MapType&>(*batch[0].type()).item_type();
+
+    const auto& map_scalar = batch[0].scalar_as<MapScalar>();
+
+    if (ARROW_PREDICT_FALSE(!map_scalar.is_valid)) {
+      if (options.occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+        out->value = MakeNullScalar(list(item_type));
+      } else {
+        out->value = MakeNullScalar(item_type);
+      }
+      return Status::OK();
+    }

Review comment:
       Have kept the null checking outside the helper function as dealing with 
return types seemed easier this way.

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +429,169 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a 
StructArray",
                                   {"*args"},
                                   "MakeStructOptions"};
 
+struct MapArrayLookupFunctor {
+  static Result<int64_t> FindOneMapValueIndex(const Array& keys, const Scalar& 
query_key,
+                                              const int64_t start, const 
int64_t end,
+                                              const bool from_back = false) {
+    if (!from_back) {
+      for (int64_t idx = start; idx < end; ++idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys.GetScalar(idx));
+
+        if (key->Equals(query_key)) return idx;
+      }
+    } else {
+      for (int64_t idx = end - 1; idx >= start; --idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys.GetScalar(idx));
+
+        if (key->Equals(query_key)) return idx;
+      }
+    }
+    return -1;
+  }
+
+  static Result<std::shared_ptr<Scalar>> GetScalarOutput(KernelContext* ctx,
+                                                         const MapScalar 
map_scalar) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    const std::shared_ptr<Scalar>& query_key = options.query_key;
+    const auto& occurrence = options.occurrence;
+
+    const auto& struct_array = checked_cast<const 
StructArray&>(*map_scalar.value);
+    const std::shared_ptr<Array> keys = struct_array.field(0);
+    const std::shared_ptr<Array> items = struct_array.field(1);
+
+    std::shared_ptr<Scalar> output;
+
+    if (occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      std::unique_ptr<ArrayBuilder> builder;
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), items->type(), &builder));
+
+      bool found_at_least_one_key = false;
+      for (int64_t idx = 0; idx < struct_array.length(); ++idx) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key, 
keys->GetScalar(idx));
+
+        if (key->Equals(*query_key)) {
+          found_at_least_one_key = true;
+          RETURN_NOT_OK(builder->AppendArraySlice(*items->data(), idx, 1));
+        }
+      }
+      if (!found_at_least_one_key) {
+        output = MakeNullScalar(list(items->type()));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+        ARROW_ASSIGN_OR_RAISE(output, MakeScalar(list(items->type()), result));
+      }
+    }
+
+    else { /* occurrence == FIRST || LAST */
+      bool from_back = (occurrence == MapArrayLookupOptions::LAST);
+
+      ARROW_ASSIGN_OR_RAISE(
+          int64_t key_match_idx,
+          FindOneMapValueIndex(*keys, *query_key, 0, struct_array.length(), 
from_back));
+      if (key_match_idx != -1) {
+        ARROW_ASSIGN_OR_RAISE(output, items->GetScalar(key_match_idx));
+      } else {
+        output = MakeNullScalar(items->type());
+      }
+    }
+    return output;
+  }
+
+  static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch, 
Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+    const MapArray map_array(batch[0].array());
+
+    std::unique_ptr<ArrayBuilder> builder;
+    if (options.occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(),
+                                list(map_array.map_type()->item_type()), 
&builder));
+    } else {
+      RETURN_NOT_OK(
+          MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(), 
&builder));
+    }
+    for (int64_t map_array_idx = 0; map_array_idx < map_array.length(); 
++map_array_idx) {
+      if (!map_array.IsValid(map_array_idx)) {
+        RETURN_NOT_OK(builder->AppendNull());
+        continue;
+      } else {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                              map_array.GetScalar(map_array_idx));
+        auto map_scalar = std::static_pointer_cast<MapScalar>(scalar);
+        ARROW_ASSIGN_OR_RAISE(auto scalar_output, GetScalarOutput(ctx, 
*map_scalar));
+        RETURN_NOT_OK(builder->AppendScalar(*scalar_output));

Review comment:
       What is the correct approach/way to do this? Especially the 
`static_pointer_cast` part.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to