pitrou commented on a change in pull request #9606:
URL: https://github.com/apache/arrow/pull/9606#discussion_r587386055



##########
File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
##########
@@ -280,6 +320,28 @@ class TestIndexInKernel : public ::testing::Test {
     ASSERT_OK(actual.chunked_array()->ValidateFull());
     AssertChunkedEqual(*expected, *actual.chunked_array());
   }
+
+  void CheckIndexInDictionary(const std::shared_ptr<DataType>& type,
+                              const std::shared_ptr<DataType>& index_type,
+                              const std::string& input_dictionary_json,
+                              const std::string& input_index_json,
+                              const std::string& value_set_json,
+                              const std::string& expected_json, bool 
skip_nulls = false) {
+    auto dict_type = dictionary(index_type, type);
+    auto indices = ArrayFromJSON(index_type, input_index_json);
+    auto dict = ArrayFromJSON(type, value_set_json);

Review comment:
       Should be `input_dictionary_json` as well.

##########
File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
##########
@@ -72,6 +72,27 @@ void CheckIsInChunked(const std::shared_ptr<ChunkedArray>& 
input,
   AssertChunkedEqual(*expected, *actual);
 }
 
+void CheckIsInDictionary(const std::shared_ptr<DataType>& type,
+                         const std::shared_ptr<DataType>& index_type,
+                         const std::string& input_dictionary_json,
+                         const std::string& input_index_json,
+                         const std::string& value_set_json,
+                         const std::string& expected_json, bool skip_nulls = 
false) {
+  auto dict_type = dictionary(index_type, type);
+  auto indices = ArrayFromJSON(index_type, input_index_json);
+  auto dict = ArrayFromJSON(type, value_set_json);

Review comment:
       This should use `input_dictionary_json`, not `value_set_json`.

##########
File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
##########
@@ -231,6 +252,25 @@ TEST_F(TestIsInKernel, Decimal) {
             /*skip_nulls=*/true);
 }
 
+TEST_F(TestIsInKernel, DictionaryArray) {
+  for (auto index_ty : all_dictionary_index_types()) {
+    CheckIsInDictionary(/*type=*/utf8(),
+                        /*index_type=*/index_ty,
+                        /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                        /*input_index_json=*/"[1, 2, null, 0]",
+                        /*value_set_json=*/R"(["A", "B", "C"])",
+                        /*expected_json=*/"[true, true, false, true]",
+                        /*skip_nulls=*/false);
+    CheckIsInDictionary(/*type=*/float32(),

Review comment:
       Consider adding the following tests:
   ```c++
       // With nulls and skip_nulls=false
       CheckIsInDictionary(/*type=*/utf8(),
                           /*index_type=*/index_ty,
                           /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
                           /*input_index_json=*/"[1, 3, null, 0, 1]",
                           /*value_set_json=*/R"(["C", "B", "A", null])",
                           /*expected_json=*/"[true, false, true, true, true]",
                           /*skip_nulls=*/false);
       CheckIsInDictionary(/*type=*/utf8(),
                           /*index_type=*/index_ty,
                           /*input_dictionary_json=*/R"(["A", null, "C", "D"])",
                           /*input_index_json=*/"[1, 3, null, 0, 1]",
                           /*value_set_json=*/R"(["C", "B", "A", null])",
                           /*expected_json=*/"[true, false, true, true, true]",
                           /*skip_nulls=*/false);
       CheckIsInDictionary(/*type=*/utf8(),
                           /*index_type=*/index_ty,
                           /*input_dictionary_json=*/R"(["A", null, "C", "D"])",
                           /*input_index_json=*/"[1, 3, null, 0, 1]",
                           /*value_set_json=*/R"(["C", "B", "A"])",
                           /*expected_json=*/"[false, false, false, true, 
false]",
                           /*skip_nulls=*/false);
   
       // With nulls and skip_nulls=true
       CheckIsInDictionary(/*type=*/utf8(),
                           /*index_type=*/index_ty,
                           /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
                           /*input_index_json=*/"[1, 3, null, 0, 1]",
                           /*value_set_json=*/R"(["C", "B", "A", null])",
                           /*expected_json=*/"[true, false, false, true, true]",
                           /*skip_nulls=*/true);
       CheckIsInDictionary(/*type=*/utf8(),
                           /*index_type=*/index_ty,
                           /*input_dictionary_json=*/R"(["A", null, "C", "D"])",
                           /*input_index_json=*/"[1, 3, null, 0, 1]",
                           /*value_set_json=*/R"(["C", "B", "A", null])",
                           /*expected_json=*/"[false, false, false, true, 
false]",
                           /*skip_nulls=*/true);
       CheckIsInDictionary(/*type=*/utf8(),
                           /*index_type=*/index_ty,
                           /*input_dictionary_json=*/R"(["A", null, "C", "D"])",
                           /*input_index_json=*/"[1, 3, null, 0, 1]",
                           /*value_set_json=*/R"(["C", "B", "A"])",
                           /*expected_json=*/"[false, false, false, true, 
false]",
                           /*skip_nulls=*/true);
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
##########
@@ -583,6 +645,25 @@ TEST_F(TestIndexInKernel, Decimal) {
                /*skip_nulls=*/true);
 }
 
+TEST_F(TestIndexInKernel, DictionaryArray) {
+  for (auto index_ty : all_dictionary_index_types()) {
+    CheckIndexInDictionary(/*type=*/utf8(),
+                           /*index_type=*/index_ty,
+                           /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                           /*input_index_json=*/"[1, 2, null, 0]",
+                           /*value_set_json=*/R"(["A", "B", "C"])",
+                           /*expected_json=*/"[1, 2, null, 0]",
+                           /*skip_nulls=*/false);
+    CheckIndexInDictionary(/*type=*/float32(),

Review comment:
       As above, can you add non-trivial tests with nulls?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to