dhruv9vats commented on a change in pull request #12162:
URL: https://github.com/apache/arrow/pull/12162#discussion_r785455620
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +430,97 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a
StructArray",
{"*args"},
"MakeStructOptions"};
+struct MapArrayLookupFunctor {
+ static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch,
Datum* out) {
+ const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+ MapArray map_array(batch[0].array());
+
+ // Offset differences will tell the number of Strcut = {key, value} pairs
+ // present in the current list.
+ // const std::shared_ptr<arrow::Buffer> offsets =
map_array.value_offsets();
+
+ std::shared_ptr<arrow::Array> keys = map_array.keys();
+ std::shared_ptr<arrow::Array> items = map_array.items();
+
+ const auto& query_key = options.query_key;
+ const auto& occurence = options.occurence;
+
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(
+ MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(),
&builder));
+
+ int32_t last_key_idx_checked = 0;
+
+ // aka, number of {key, value} pairs in the current map
+ int32_t list_struct_len;
+ bool found_one_key = false;
+ for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
++map_array_idx) {
+ // Number of Struct('s) = {key, value} in the list at the current index
+ list_struct_len = map_array.value_length(map_array_idx);
+ for (int32_t key_idx_to_check = last_key_idx_checked;
+ key_idx_to_check < last_key_idx_checked + list_struct_len;
+ ++key_idx_to_check) {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+ keys->GetScalar(key_idx_to_check));
+ if (key->Equals(*query_key)) {
+ std::cout << "Key being checked: " << key->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> item,
+ items->GetScalar(key_idx_to_check));
+ std::cout << "Value at key: " << item->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(auto value,
+
item->CastTo(map_array.map_type()->item_type()));
+
+ std::cout << "Item being appended: " << value->ToString() << "\n";
+ RETURN_NOT_OK(builder->AppendScalar(*value));
+
+ if (occurence == MapArrayLookupOptions::First) {
+ found_one_key = true;
+ break;
+ }
+ }
+ }
+ if (found_one_key && occurence == MapArrayLookupOptions::First) break;
Review comment:
Using this in conjunction with the condition above to break out of
nested `for` loops if `Occurence::First` is used. As this currently is, this
will break out when the first key is encountered, regardless of whether there
are matching keys in _other_ `Lists`. Is this what we want?
If instead _all_ `First` keys (and its item) from *each* `List` is needed,
removing this condition will (I hope) suffice.
There was also a mention of returning a different `DataType` when `First` is
used as opposed to `All`, so how does this all work?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]