pitrou commented on a change in pull request #9606: URL: https://github.com/apache/arrow/pull/9606#discussion_r587386055
########## File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc ########## @@ -280,6 +320,28 @@ class TestIndexInKernel : public ::testing::Test { ASSERT_OK(actual.chunked_array()->ValidateFull()); AssertChunkedEqual(*expected, *actual.chunked_array()); } + + void CheckIndexInDictionary(const std::shared_ptr<DataType>& type, + const std::shared_ptr<DataType>& index_type, + const std::string& input_dictionary_json, + const std::string& input_index_json, + const std::string& value_set_json, + const std::string& expected_json, bool skip_nulls = false) { + auto dict_type = dictionary(index_type, type); + auto indices = ArrayFromJSON(index_type, input_index_json); + auto dict = ArrayFromJSON(type, value_set_json); Review comment: Should be `input_dictionary_json` as well. ########## File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc ########## @@ -72,6 +72,27 @@ void CheckIsInChunked(const std::shared_ptr<ChunkedArray>& input, AssertChunkedEqual(*expected, *actual); } +void CheckIsInDictionary(const std::shared_ptr<DataType>& type, + const std::shared_ptr<DataType>& index_type, + const std::string& input_dictionary_json, + const std::string& input_index_json, + const std::string& value_set_json, + const std::string& expected_json, bool skip_nulls = false) { + auto dict_type = dictionary(index_type, type); + auto indices = ArrayFromJSON(index_type, input_index_json); + auto dict = ArrayFromJSON(type, value_set_json); Review comment: This should use `input_dictionary_json`, not `value_set_json`. ########## File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc ########## @@ -231,6 +252,25 @@ TEST_F(TestIsInKernel, Decimal) { /*skip_nulls=*/true); } +TEST_F(TestIsInKernel, DictionaryArray) { + for (auto index_ty : all_dictionary_index_types()) { + CheckIsInDictionary(/*type=*/utf8(), + /*index_type=*/index_ty, + /*input_dictionary_json=*/R"(["A", "B", "C", "D"])", + /*input_index_json=*/"[1, 2, null, 0]", + /*value_set_json=*/R"(["A", "B", "C"])", + /*expected_json=*/"[true, true, false, true]", + /*skip_nulls=*/false); + CheckIsInDictionary(/*type=*/float32(), Review comment: Consider adding the following tests: ```c++ // With nulls and skip_nulls=false CheckIsInDictionary(/*type=*/utf8(), /*index_type=*/index_ty, /*input_dictionary_json=*/R"(["A", "B", "C", "D"])", /*input_index_json=*/"[1, 3, null, 0, 1]", /*value_set_json=*/R"(["C", "B", "A", null])", /*expected_json=*/"[true, false, true, true, true]", /*skip_nulls=*/false); CheckIsInDictionary(/*type=*/utf8(), /*index_type=*/index_ty, /*input_dictionary_json=*/R"(["A", null, "C", "D"])", /*input_index_json=*/"[1, 3, null, 0, 1]", /*value_set_json=*/R"(["C", "B", "A", null])", /*expected_json=*/"[true, false, true, true, true]", /*skip_nulls=*/false); CheckIsInDictionary(/*type=*/utf8(), /*index_type=*/index_ty, /*input_dictionary_json=*/R"(["A", null, "C", "D"])", /*input_index_json=*/"[1, 3, null, 0, 1]", /*value_set_json=*/R"(["C", "B", "A"])", /*expected_json=*/"[false, false, false, true, false]", /*skip_nulls=*/false); // With nulls and skip_nulls=true CheckIsInDictionary(/*type=*/utf8(), /*index_type=*/index_ty, /*input_dictionary_json=*/R"(["A", "B", "C", "D"])", /*input_index_json=*/"[1, 3, null, 0, 1]", /*value_set_json=*/R"(["C", "B", "A", null])", /*expected_json=*/"[true, false, false, true, true]", /*skip_nulls=*/true); CheckIsInDictionary(/*type=*/utf8(), /*index_type=*/index_ty, /*input_dictionary_json=*/R"(["A", null, "C", "D"])", /*input_index_json=*/"[1, 3, null, 0, 1]", /*value_set_json=*/R"(["C", "B", "A", null])", /*expected_json=*/"[false, false, false, true, false]", /*skip_nulls=*/true); CheckIsInDictionary(/*type=*/utf8(), /*index_type=*/index_ty, /*input_dictionary_json=*/R"(["A", null, "C", "D"])", /*input_index_json=*/"[1, 3, null, 0, 1]", /*value_set_json=*/R"(["C", "B", "A"])", /*expected_json=*/"[false, false, false, true, false]", /*skip_nulls=*/true); ``` ########## File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc ########## @@ -583,6 +645,25 @@ TEST_F(TestIndexInKernel, Decimal) { /*skip_nulls=*/true); } +TEST_F(TestIndexInKernel, DictionaryArray) { + for (auto index_ty : all_dictionary_index_types()) { + CheckIndexInDictionary(/*type=*/utf8(), + /*index_type=*/index_ty, + /*input_dictionary_json=*/R"(["A", "B", "C", "D"])", + /*input_index_json=*/"[1, 2, null, 0]", + /*value_set_json=*/R"(["A", "B", "C"])", + /*expected_json=*/"[1, 2, null, 0]", + /*skip_nulls=*/false); + CheckIndexInDictionary(/*type=*/float32(), Review comment: As above, can you add non-trivial tests with nulls? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org