lidavidm commented on a change in pull request #12162:
URL: https://github.com/apache/arrow/pull/12162#discussion_r786060176
##########
File path: cpp/src/arrow/compute/api_scalar.cc
##########
@@ -573,6 +605,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kUtf8NormalizeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kWeekOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kRandomOptionsType));
+ DCHECK_OK(registry->AddFunctionOptionsType(kMapArrayLookupOptionsType));
Review comment:
ditto here
##########
File path: cpp/src/arrow/compute/api_scalar.cc
##########
@@ -344,6 +364,9 @@ static auto kRandomOptionsType =
GetFunctionOptionsType<RandomOptions>(
DataMember("length", &RandomOptions::length),
DataMember("initializer", &RandomOptions::initializer),
DataMember("seed", &RandomOptions::seed));
+static auto kMapArrayLookupOptionsType =
GetFunctionOptionsType<MapArrayLookupOptions>(
Review comment:
nit, but try to keep these organized alphabetically (though I see Random
already broke that)
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +430,97 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a
StructArray",
{"*args"},
"MakeStructOptions"};
+struct MapArrayLookupFunctor {
+ static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch,
Datum* out) {
+ const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+ MapArray map_array(batch[0].array());
+
+ // Offset differences will tell the number of Strcut = {key, value} pairs
+ // present in the current list.
+ // const std::shared_ptr<arrow::Buffer> offsets =
map_array.value_offsets();
+
+ std::shared_ptr<arrow::Array> keys = map_array.keys();
+ std::shared_ptr<arrow::Array> items = map_array.items();
+
+ const auto& query_key = options.query_key;
+ const auto& occurence = options.occurence;
+
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(
+ MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(),
&builder));
+
+ int32_t last_key_idx_checked = 0;
+
+ // aka, number of {key, value} pairs in the current map
+ int32_t list_struct_len;
+ bool found_one_key = false;
+ for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
++map_array_idx) {
+ // Number of Struct('s) = {key, value} in the list at the current index
+ list_struct_len = map_array.value_length(map_array_idx);
+ for (int32_t key_idx_to_check = last_key_idx_checked;
+ key_idx_to_check < last_key_idx_checked + list_struct_len;
+ ++key_idx_to_check) {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+ keys->GetScalar(key_idx_to_check));
+ if (key->Equals(*query_key)) {
+ std::cout << "Key being checked: " << key->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> item,
+ items->GetScalar(key_idx_to_check));
+ std::cout << "Value at key: " << item->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(auto value,
+
item->CastTo(map_array.map_type()->item_type()));
+
+ std::cout << "Item being appended: " << value->ToString() << "\n";
+ RETURN_NOT_OK(builder->AppendScalar(*value));
+
+ if (occurence == MapArrayLookupOptions::First) {
+ found_one_key = true;
+ break;
+ }
+ }
+ }
+ if (found_one_key && occurence == MapArrayLookupOptions::First) break;
+
+ // new index from where to start checking
+ last_key_idx_checked += list_struct_len;
+ }
+ // For now, handling 'Last' and 'All' occurence options as same
+ // TODO: Handle 'Last' option.
+ ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+ out->value = result->data();
+ return Status::OK();
+ }
+};
+
+Result<ValueDescr> ResolveMapArrayLookupType(KernelContext* ctx,
+ const std::vector<ValueDescr>&
descrs) {
+ std::shared_ptr<DataType> type = descrs.front().type;
+ std::shared_ptr<DataType> value_type;
+ std::shared_ptr<DataType> key_type;
+ if (type->id() == Type::MAP) {
Review comment:
No need for this check. We're only registering kernels that accept
MapType parameters so this should never _not_ be MAP. (You can DCHECK_EQ if you
really want to be sure.)
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -225,6 +225,30 @@ TEST(TestScalarNested, StructField) {
}
}
+TEST(TestScalarNested, MapArrayLookup) {
+ MapArrayLookupOptions foo_all(MakeScalar("foo"), MapArrayLookupOptions::All);
+ MapArrayLookupOptions foo_first(MakeScalar("foo"),
MapArrayLookupOptions::First);
+ auto type = map(utf8(), int32());
+
+ auto keys = ArrayFromJSON(utf8(), R"([
+ "foo", "bar", "hello", "foo", "lesgo", "whatnow",
+ "nothing", "hat", "foo", "sorry", "dip", "foo"
+ ])");
+ auto items = ArrayFromJSON(int16(), R"([
+ 99, 1, 2, 3, 5, 8,
+ null, null, 101, 1, null, 22
+ ])");
+ auto offsets = ArrayFromJSON(int32(), "[0, 6, 6, 12,
12]")->data()->buffers[1];
+ auto null_bitmap = ArrayFromJSON(boolean(), "[1, 0, 1,
1]")->data()->buffers[1];
+
+ MapArray map_array(type, 4, offsets, keys, items, null_bitmap, 1, 0);
+
+ CheckScalarNonRecursive("map_array_lookup", {map_array},
+ ArrayFromJSON(int32(), "[99, 3, 101, 22]"),
&foo_all);
Review comment:
This output seems wrong, I would expect `[[99, 3], null, [101, 22],
null]`
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +430,97 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a
StructArray",
{"*args"},
"MakeStructOptions"};
+struct MapArrayLookupFunctor {
+ static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch,
Datum* out) {
+ const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+ MapArray map_array(batch[0].array());
+
+ // Offset differences will tell the number of Strcut = {key, value} pairs
+ // present in the current list.
+ // const std::shared_ptr<arrow::Buffer> offsets =
map_array.value_offsets();
+
+ std::shared_ptr<arrow::Array> keys = map_array.keys();
+ std::shared_ptr<arrow::Array> items = map_array.items();
+
+ const auto& query_key = options.query_key;
+ const auto& occurence = options.occurence;
+
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(
+ MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(),
&builder));
+
+ int32_t last_key_idx_checked = 0;
+
+ // aka, number of {key, value} pairs in the current map
+ int32_t list_struct_len;
+ bool found_one_key = false;
+ for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
++map_array_idx) {
+ // Number of Struct('s) = {key, value} in the list at the current index
+ list_struct_len = map_array.value_length(map_array_idx);
+ for (int32_t key_idx_to_check = last_key_idx_checked;
+ key_idx_to_check < last_key_idx_checked + list_struct_len;
+ ++key_idx_to_check) {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+ keys->GetScalar(key_idx_to_check));
+ if (key->Equals(*query_key)) {
+ std::cout << "Key being checked: " << key->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> item,
+ items->GetScalar(key_idx_to_check));
+ std::cout << "Value at key: " << item->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(auto value,
+
item->CastTo(map_array.map_type()->item_type()));
+
+ std::cout << "Item being appended: " << value->ToString() << "\n";
+ RETURN_NOT_OK(builder->AppendScalar(*value));
+
+ if (occurence == MapArrayLookupOptions::First) {
+ found_one_key = true;
+ break;
+ }
+ }
+ }
+ if (found_one_key && occurence == MapArrayLookupOptions::First) break;
+
+ // new index from where to start checking
+ last_key_idx_checked += list_struct_len;
+ }
+ // For now, handling 'Last' and 'All' occurence options as same
+ // TODO: Handle 'Last' option.
+ ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+ out->value = result->data();
+ return Status::OK();
+ }
+};
+
+Result<ValueDescr> ResolveMapArrayLookupType(KernelContext* ctx,
+ const std::vector<ValueDescr>&
descrs) {
+ std::shared_ptr<DataType> type = descrs.front().type;
+ std::shared_ptr<DataType> value_type;
+ std::shared_ptr<DataType> key_type;
+ if (type->id() == Type::MAP) {
+ std::cout << "map type found!\n";
+ key_type = type->field(0)->type()->field(0)->type();
+ value_type = type->field(0)->type()->field(1)->type();
+
+ std::cout << "Value type: " << value_type->ToString() << "\n";
+ }
+ return ValueDescr(value_type, descrs.front().shape);
Review comment:
Eventually if Occurrence == ALL then this should return
`ValueDescr(list(value_type), descrs.front().shape)`.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +430,97 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a
StructArray",
{"*args"},
"MakeStructOptions"};
+struct MapArrayLookupFunctor {
+ static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch,
Datum* out) {
Review comment:
There's no one "correct" structure. This is probably okay.
##########
File path: cpp/src/arrow/compute/api_scalar.cc
##########
@@ -545,6 +568,15 @@ RandomOptions::RandomOptions(int64_t length, Initializer
initializer, uint64_t s
RandomOptions::RandomOptions() : RandomOptions(0, SystemRandom, 0) {}
constexpr char RandomOptions::kTypeName[];
+MapArrayLookupOptions::MapArrayLookupOptions(std::shared_ptr<Scalar> query_key,
+ Occurence occurence)
+ : FunctionOptions(internal::kMapArrayLookupOptionsType),
+ query_key(query_key),
Review comment:
nit, but `std::move(query_key)`
##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -470,6 +470,30 @@ class ARROW_EXPORT RandomOptions : public FunctionOptions {
uint64_t seed;
};
+/// Options for map_array_lookup function
+class ARROW_EXPORT MapArrayLookupOptions : public FunctionOptions {
+ public:
+ enum Occurence {
+ /// Return the first matching value
+ First,
Review comment:
For these enums we use UPPER_SNAKE_CASE naming.
##########
File path: cpp/src/arrow/compute/api_scalar.cc
##########
@@ -545,6 +568,15 @@ RandomOptions::RandomOptions(int64_t length, Initializer
initializer, uint64_t s
RandomOptions::RandomOptions() : RandomOptions(0, SystemRandom, 0) {}
constexpr char RandomOptions::kTypeName[];
+MapArrayLookupOptions::MapArrayLookupOptions(std::shared_ptr<Scalar> query_key,
Review comment:
ditto here
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -225,6 +225,30 @@ TEST(TestScalarNested, StructField) {
}
}
+TEST(TestScalarNested, MapArrayLookup) {
+ MapArrayLookupOptions foo_all(MakeScalar("foo"), MapArrayLookupOptions::All);
+ MapArrayLookupOptions foo_first(MakeScalar("foo"),
MapArrayLookupOptions::First);
+ auto type = map(utf8(), int32());
+
+ auto keys = ArrayFromJSON(utf8(), R"([
+ "foo", "bar", "hello", "foo", "lesgo", "whatnow",
+ "nothing", "hat", "foo", "sorry", "dip", "foo"
+ ])");
+ auto items = ArrayFromJSON(int16(), R"([
+ 99, 1, 2, 3, 5, 8,
+ null, null, 101, 1, null, 22
+ ])");
+ auto offsets = ArrayFromJSON(int32(), "[0, 6, 6, 12,
12]")->data()->buffers[1];
+ auto null_bitmap = ArrayFromJSON(boolean(), "[1, 0, 1,
1]")->data()->buffers[1];
Review comment:
`ArrayFromJSON` directly supports MapType, use that. For instance
https://github.com/apache/arrow/blob/e12a4545bdc5a8683c8dfdbb0468922d444c0500/cpp/src/arrow/ipc/json_simple_test.cc#L757
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -225,6 +225,30 @@ TEST(TestScalarNested, StructField) {
}
}
+TEST(TestScalarNested, MapArrayLookup) {
+ MapArrayLookupOptions foo_all(MakeScalar("foo"), MapArrayLookupOptions::All);
+ MapArrayLookupOptions foo_first(MakeScalar("foo"),
MapArrayLookupOptions::First);
+ auto type = map(utf8(), int32());
+
+ auto keys = ArrayFromJSON(utf8(), R"([
+ "foo", "bar", "hello", "foo", "lesgo", "whatnow",
+ "nothing", "hat", "foo", "sorry", "dip", "foo"
+ ])");
+ auto items = ArrayFromJSON(int16(), R"([
Review comment:
This is why you're seeing int16 in your implementation above.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -225,6 +225,30 @@ TEST(TestScalarNested, StructField) {
}
}
+TEST(TestScalarNested, MapArrayLookup) {
+ MapArrayLookupOptions foo_all(MakeScalar("foo"), MapArrayLookupOptions::All);
+ MapArrayLookupOptions foo_first(MakeScalar("foo"),
MapArrayLookupOptions::First);
+ auto type = map(utf8(), int32());
+
+ auto keys = ArrayFromJSON(utf8(), R"([
+ "foo", "bar", "hello", "foo", "lesgo", "whatnow",
+ "nothing", "hat", "foo", "sorry", "dip", "foo"
+ ])");
+ auto items = ArrayFromJSON(int16(), R"([
+ 99, 1, 2, 3, 5, 8,
+ null, null, 101, 1, null, 22
+ ])");
+ auto offsets = ArrayFromJSON(int32(), "[0, 6, 6, 12,
12]")->data()->buffers[1];
+ auto null_bitmap = ArrayFromJSON(boolean(), "[1, 0, 1,
1]")->data()->buffers[1];
+
+ MapArray map_array(type, 4, offsets, keys, items, null_bitmap, 1, 0);
+
+ CheckScalarNonRecursive("map_array_lookup", {map_array},
+ ArrayFromJSON(int32(), "[99, 3, 101, 22]"),
&foo_all);
+ CheckScalarNonRecursive("map_array_lookup", {map_array},
ArrayFromJSON(int32(), "[99]"),
Review comment:
A scalar kernel is elementwise. There should be exactly one output
element per input element. The output here should be `[99, null, 101, null]`.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -225,6 +225,30 @@ TEST(TestScalarNested, StructField) {
}
}
+TEST(TestScalarNested, MapArrayLookup) {
+ MapArrayLookupOptions foo_all(MakeScalar("foo"), MapArrayLookupOptions::All);
+ MapArrayLookupOptions foo_first(MakeScalar("foo"),
MapArrayLookupOptions::First);
+ auto type = map(utf8(), int32());
+
+ auto keys = ArrayFromJSON(utf8(), R"([
+ "foo", "bar", "hello", "foo", "lesgo", "whatnow",
+ "nothing", "hat", "foo", "sorry", "dip", "foo"
+ ])");
+ auto items = ArrayFromJSON(int16(), R"([
+ 99, 1, 2, 3, 5, 8,
+ null, null, 101, 1, null, 22
+ ])");
+ auto offsets = ArrayFromJSON(int32(), "[0, 6, 6, 12,
12]")->data()->buffers[1];
+ auto null_bitmap = ArrayFromJSON(boolean(), "[1, 0, 1,
1]")->data()->buffers[1];
+
+ MapArray map_array(type, 4, offsets, keys, items, null_bitmap, 1, 0);
Review comment:
This is not validating the types/values, hence the Cast above. Use
ArrayFromJSON.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +430,97 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a
StructArray",
{"*args"},
"MakeStructOptions"};
+struct MapArrayLookupFunctor {
+ static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch,
Datum* out) {
+ const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+ MapArray map_array(batch[0].array());
+
+ // Offset differences will tell the number of Strcut = {key, value} pairs
+ // present in the current list.
+ // const std::shared_ptr<arrow::Buffer> offsets =
map_array.value_offsets();
+
+ std::shared_ptr<arrow::Array> keys = map_array.keys();
+ std::shared_ptr<arrow::Array> items = map_array.items();
+
+ const auto& query_key = options.query_key;
+ const auto& occurence = options.occurence;
+
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(
+ MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(),
&builder));
+
+ int32_t last_key_idx_checked = 0;
+
+ // aka, number of {key, value} pairs in the current map
+ int32_t list_struct_len;
+ bool found_one_key = false;
+ for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
++map_array_idx) {
+ // Number of Struct('s) = {key, value} in the list at the current index
+ list_struct_len = map_array.value_length(map_array_idx);
+ for (int32_t key_idx_to_check = last_key_idx_checked;
+ key_idx_to_check < last_key_idx_checked + list_struct_len;
+ ++key_idx_to_check) {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+ keys->GetScalar(key_idx_to_check));
+ if (key->Equals(*query_key)) {
+ std::cout << "Key being checked: " << key->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> item,
+ items->GetScalar(key_idx_to_check));
+ std::cout << "Value at key: " << item->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(auto value,
+
item->CastTo(map_array.map_type()->item_type()));
+
+ std::cout << "Item being appended: " << value->ToString() << "\n";
+ RETURN_NOT_OK(builder->AppendScalar(*value));
+
+ if (occurence == MapArrayLookupOptions::First) {
+ found_one_key = true;
+ break;
+ }
+ }
+ }
+ if (found_one_key && occurence == MapArrayLookupOptions::First) break;
+
+ // new index from where to start checking
+ last_key_idx_checked += list_struct_len;
+ }
+ // For now, handling 'Last' and 'All' occurence options as same
+ // TODO: Handle 'Last' option.
+ ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+ out->value = result->data();
+ return Status::OK();
+ }
+};
+
+Result<ValueDescr> ResolveMapArrayLookupType(KernelContext* ctx,
+ const std::vector<ValueDescr>&
descrs) {
+ std::shared_ptr<DataType> type = descrs.front().type;
+ std::shared_ptr<DataType> value_type;
+ std::shared_ptr<DataType> key_type;
+ if (type->id() == Type::MAP) {
+ std::cout << "map type found!\n";
+ key_type = type->field(0)->type()->field(0)->type();
+ value_type = type->field(0)->type()->field(1)->type();
+
+ std::cout << "Value type: " << value_type->ToString() << "\n";
+ }
+ return ValueDescr(value_type, descrs.front().shape);
+}
+
+void AddMapArrayLookupKernels(ScalarFunction* func) {
+ ScalarKernel kernel(
+ {InputType(Type::MAP, ValueDescr::ARRAY)},
OutputType(ResolveMapArrayLookupType),
+ MapArrayLookupFunctor::ExecMapArray,
OptionsWrapper<MapArrayLookupOptions>::Init);
+ kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+ kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+ DCHECK_OK(func->AddKernel(std::move(kernel)));
+}
+
+const FunctionDoc map_array_lookup_doc(
+ "Find the items corresponding to a given key in a MapArray", ("More doc"),
+ {"container"}, "MapArrayLookupOptions");
Review comment:
```suggestion
{"container"}, "MapArrayLookupOptions", /*options_required=*/true);
```
##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -470,6 +470,30 @@ class ARROW_EXPORT RandomOptions : public FunctionOptions {
uint64_t seed;
};
+/// Options for map_array_lookup function
+class ARROW_EXPORT MapArrayLookupOptions : public FunctionOptions {
+ public:
+ enum Occurence {
+ /// Return the first matching value
+ First,
+ /// Return the last matching value
+ Last,
+ /// Return all matching values
+ All
+ };
+
+ MapArrayLookupOptions(std::shared_ptr<Scalar> query_key, Occurence occurence
= All);
+ MapArrayLookupOptions();
+
+ constexpr static char const kTypeName[] = "MapArrayLookupOptions";
+
+ /// The key to lookup in the map
+ std::shared_ptr<Scalar> query_key;
Review comment:
Shape does not apply to Scalar. (Scalar, Array, etc. are Shapes.)
For `MapType[K, V]`, `query_key` should be of type `K`.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +430,97 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a
StructArray",
{"*args"},
"MakeStructOptions"};
+struct MapArrayLookupFunctor {
+ static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch,
Datum* out) {
+ const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+ MapArray map_array(batch[0].array());
+
+ // Offset differences will tell the number of Strcut = {key, value} pairs
+ // present in the current list.
+ // const std::shared_ptr<arrow::Buffer> offsets =
map_array.value_offsets();
+
+ std::shared_ptr<arrow::Array> keys = map_array.keys();
+ std::shared_ptr<arrow::Array> items = map_array.items();
+
+ const auto& query_key = options.query_key;
+ const auto& occurence = options.occurence;
+
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(
+ MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(),
&builder));
+
+ int32_t last_key_idx_checked = 0;
+
+ // aka, number of {key, value} pairs in the current map
+ int32_t list_struct_len;
+ bool found_one_key = false;
+ for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
++map_array_idx) {
+ // Number of Struct('s) = {key, value} in the list at the current index
+ list_struct_len = map_array.value_length(map_array_idx);
+ for (int32_t key_idx_to_check = last_key_idx_checked;
+ key_idx_to_check < last_key_idx_checked + list_struct_len;
+ ++key_idx_to_check) {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+ keys->GetScalar(key_idx_to_check));
+ if (key->Equals(*query_key)) {
+ std::cout << "Key being checked: " << key->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> item,
+ items->GetScalar(key_idx_to_check));
+ std::cout << "Value at key: " << item->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(auto value,
+
item->CastTo(map_array.map_type()->item_type()));
+
+ std::cout << "Item being appended: " << value->ToString() << "\n";
+ RETURN_NOT_OK(builder->AppendScalar(*value));
+
+ if (occurence == MapArrayLookupOptions::First) {
+ found_one_key = true;
+ break;
+ }
+ }
+ }
+ if (found_one_key && occurence == MapArrayLookupOptions::First) break;
+
+ // new index from where to start checking
+ last_key_idx_checked += list_struct_len;
+ }
+ // For now, handling 'Last' and 'All' occurence options as same
+ // TODO: Handle 'Last' option.
+ ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+ out->value = result->data();
+ return Status::OK();
+ }
+};
+
+Result<ValueDescr> ResolveMapArrayLookupType(KernelContext* ctx,
+ const std::vector<ValueDescr>&
descrs) {
+ std::shared_ptr<DataType> type = descrs.front().type;
+ std::shared_ptr<DataType> value_type;
+ std::shared_ptr<DataType> key_type;
+ if (type->id() == Type::MAP) {
+ std::cout << "map type found!\n";
+ key_type = type->field(0)->type()->field(0)->type();
Review comment:
Yes, you can check them here and return an error, you could do it in the
kernel as well.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +430,97 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a
StructArray",
{"*args"},
"MakeStructOptions"};
+struct MapArrayLookupFunctor {
+ static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch,
Datum* out) {
+ const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+ MapArray map_array(batch[0].array());
+
+ // Offset differences will tell the number of Strcut = {key, value} pairs
+ // present in the current list.
+ // const std::shared_ptr<arrow::Buffer> offsets =
map_array.value_offsets();
+
+ std::shared_ptr<arrow::Array> keys = map_array.keys();
+ std::shared_ptr<arrow::Array> items = map_array.items();
+
+ const auto& query_key = options.query_key;
+ const auto& occurence = options.occurence;
+
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(
+ MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(),
&builder));
+
+ int32_t last_key_idx_checked = 0;
+
+ // aka, number of {key, value} pairs in the current map
+ int32_t list_struct_len;
+ bool found_one_key = false;
+ for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
++map_array_idx) {
+ // Number of Struct('s) = {key, value} in the list at the current index
+ list_struct_len = map_array.value_length(map_array_idx);
+ for (int32_t key_idx_to_check = last_key_idx_checked;
+ key_idx_to_check < last_key_idx_checked + list_struct_len;
+ ++key_idx_to_check) {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+ keys->GetScalar(key_idx_to_check));
+ if (key->Equals(*query_key)) {
+ std::cout << "Key being checked: " << key->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> item,
+ items->GetScalar(key_idx_to_check));
+ std::cout << "Value at key: " << item->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(auto value,
+
item->CastTo(map_array.map_type()->item_type()));
+
+ std::cout << "Item being appended: " << value->ToString() << "\n";
+ RETURN_NOT_OK(builder->AppendScalar(*value));
+
+ if (occurence == MapArrayLookupOptions::First) {
+ found_one_key = true;
+ break;
+ }
+ }
+ }
+ if (found_one_key && occurence == MapArrayLookupOptions::First) break;
+
+ // new index from where to start checking
+ last_key_idx_checked += list_struct_len;
+ }
+ // For now, handling 'Last' and 'All' occurence options as same
+ // TODO: Handle 'Last' option.
+ ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+ out->value = result->data();
+ return Status::OK();
+ }
+};
+
+Result<ValueDescr> ResolveMapArrayLookupType(KernelContext* ctx,
+ const std::vector<ValueDescr>&
descrs) {
+ std::shared_ptr<DataType> type = descrs.front().type;
+ std::shared_ptr<DataType> value_type;
+ std::shared_ptr<DataType> key_type;
+ if (type->id() == Type::MAP) {
+ std::cout << "map type found!\n";
+ key_type = type->field(0)->type()->field(0)->type();
Review comment:
Instead of using `type->field...` try `checked_cast<const
MapType&>(*type).key_type()`
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -429,6 +430,97 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a
StructArray",
{"*args"},
"MakeStructOptions"};
+struct MapArrayLookupFunctor {
+ static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch,
Datum* out) {
+ const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+
+ MapArray map_array(batch[0].array());
+
+ // Offset differences will tell the number of Strcut = {key, value} pairs
+ // present in the current list.
+ // const std::shared_ptr<arrow::Buffer> offsets =
map_array.value_offsets();
+
+ std::shared_ptr<arrow::Array> keys = map_array.keys();
+ std::shared_ptr<arrow::Array> items = map_array.items();
+
+ const auto& query_key = options.query_key;
+ const auto& occurence = options.occurence;
+
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(
+ MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(),
&builder));
+
+ int32_t last_key_idx_checked = 0;
+
+ // aka, number of {key, value} pairs in the current map
+ int32_t list_struct_len;
+ bool found_one_key = false;
+ for (int32_t map_array_idx = 0; map_array_idx < map_array.length();
++map_array_idx) {
+ // Number of Struct('s) = {key, value} in the list at the current index
+ list_struct_len = map_array.value_length(map_array_idx);
+ for (int32_t key_idx_to_check = last_key_idx_checked;
+ key_idx_to_check < last_key_idx_checked + list_struct_len;
+ ++key_idx_to_check) {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> key,
+ keys->GetScalar(key_idx_to_check));
+ if (key->Equals(*query_key)) {
+ std::cout << "Key being checked: " << key->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> item,
+ items->GetScalar(key_idx_to_check));
+ std::cout << "Value at key: " << item->ToString() << "\n";
+ ARROW_ASSIGN_OR_RAISE(auto value,
+
item->CastTo(map_array.map_type()->item_type()));
+
+ std::cout << "Item being appended: " << value->ToString() << "\n";
+ RETURN_NOT_OK(builder->AppendScalar(*value));
Review comment:
Look below. You have int16, not int32, in the tests. Use ArrayFromJSON
directly instead of trying to build up the array from its parts.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]