pitrou commented on code in PR #48915:
URL: https://github.com/apache/arrow/pull/48915#discussion_r2727653655


##########
cpp/src/arrow/c/bridge_test.cc:
##########
@@ -1080,15 +1074,15 @@ TEST_F(TestArrayExport, Union) {
   TestNested(type, data);
 }
 
-#ifdef ARROW_COMPUTE
+// Helper to create a RunEndEncoded array from JSON for testing

Review Comment:
   Can we just add REE support to `ArrayFromJSON`?



##########
cpp/src/arrow/util/ree_util.h:
##########
@@ -20,15 +20,53 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
+#include <cstring>
+#include <string_view>
 
 #include "arrow/array/data.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/logging_internal.h"

Review Comment:
   Shouldn't we avoid including an internal header here? (not including 
`logging.h` itself would be preferable as well)



##########
cpp/src/arrow/util/ree_util.h:
##########
@@ -580,5 +632,408 @@ class MergedRunsIterator {
   int64_t logical_pos_;
 };
 
+namespace internal {

Review Comment:
   Hmm, is it useful to expose these in `ree_util.h`? Do they even need to be 
in a header? If so, perhaps a separate `ree_util_internal.h`.



##########
cpp/src/arrow/util/ree_util.h:
##########
@@ -20,15 +20,53 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
+#include <cstring>
+#include <string_view>
 
 #include "arrow/array/data.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/logging_internal.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
 namespace ree_util {
 
+// Macro listing all value types supported by run-end encoding.

Review Comment:
   Looks like you added some of them :) But basically any non-nested type is 
supported except Null, right? Can we rearrange our code around that instead of 
trying to exhaustively list all supported value types?



##########
cpp/src/arrow/compute/kernels/vector_run_end_encode.cc:
##########
@@ -606,28 +411,9 @@ void RegisterVectorRunEndDecode(FunctionRegistry* 
registry) {
   };
 
   add_kernel(Type::NA);
-  add_kernel(Type::BOOL);
-  for (const auto& ty : NumericTypes()) {
-    add_kernel(ty->id());
-  }
-  add_kernel(Type::HALF_FLOAT);
-  add_kernel(Type::DATE32);
-  add_kernel(Type::DATE64);
-  add_kernel(Type::TIME32);
-  add_kernel(Type::TIME64);
-  add_kernel(Type::TIMESTAMP);
-  add_kernel(Type::DURATION);
-  for (const auto& ty : IntervalTypes()) {
-    add_kernel(ty->id());
-  }
-  for (const auto& type_id : DecimalTypeIds()) {
-    add_kernel(type_id);
-  }
-  add_kernel(Type::FIXED_SIZE_BINARY);
-  add_kernel(Type::STRING);
-  add_kernel(Type::BINARY);
-  add_kernel(Type::LARGE_STRING);
-  add_kernel(Type::LARGE_BINARY);
+#define ADD_REE_KERNEL(TYPE_CLASS, TYPE_ENUM) add_kernel(Type::TYPE_ENUM);
+  ARROW_REE_SUPPORTED_TYPES(ADD_REE_KERNEL)
+#undef ADD_REE_KERNEL

Review Comment:
   Instead of using exact match ids in `add_kernel`, we could have a dedicated 
flexible matcher for supported REE types.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to