pitrou commented on code in PR #48915:
URL: https://github.com/apache/arrow/pull/48915#discussion_r2727653655
##########
cpp/src/arrow/c/bridge_test.cc:
##########
@@ -1080,15 +1074,15 @@ TEST_F(TestArrayExport, Union) {
TestNested(type, data);
}
-#ifdef ARROW_COMPUTE
+// Helper to create a RunEndEncoded array from JSON for testing
Review Comment:
Can we just add REE support to `ArrayFromJSON`?
##########
cpp/src/arrow/util/ree_util.h:
##########
@@ -20,15 +20,53 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <cstring>
+#include <string_view>
#include "arrow/array/data.h"
#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/logging_internal.h"
Review Comment:
Shouldn't we avoid including an internal header here? (not including
`logging.h` itself would be preferable as well)
##########
cpp/src/arrow/util/ree_util.h:
##########
@@ -580,5 +632,408 @@ class MergedRunsIterator {
int64_t logical_pos_;
};
+namespace internal {
Review Comment:
Hmm, is it useful to expose these in `ree_util.h`? Do they even need to be
in a header? If so, perhaps a separate `ree_util_internal.h`.
##########
cpp/src/arrow/util/ree_util.h:
##########
@@ -20,15 +20,53 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <cstring>
+#include <string_view>
#include "arrow/array/data.h"
#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/logging_internal.h"
#include "arrow/util/macros.h"
namespace arrow {
namespace ree_util {
+// Macro listing all value types supported by run-end encoding.
Review Comment:
Looks like you added some of them :) But basically any non-nested type is
supported except Null, right? Can we rearrange our code around that instead of
trying to exhaustively list all supported value types?
##########
cpp/src/arrow/compute/kernels/vector_run_end_encode.cc:
##########
@@ -606,28 +411,9 @@ void RegisterVectorRunEndDecode(FunctionRegistry*
registry) {
};
add_kernel(Type::NA);
- add_kernel(Type::BOOL);
- for (const auto& ty : NumericTypes()) {
- add_kernel(ty->id());
- }
- add_kernel(Type::HALF_FLOAT);
- add_kernel(Type::DATE32);
- add_kernel(Type::DATE64);
- add_kernel(Type::TIME32);
- add_kernel(Type::TIME64);
- add_kernel(Type::TIMESTAMP);
- add_kernel(Type::DURATION);
- for (const auto& ty : IntervalTypes()) {
- add_kernel(ty->id());
- }
- for (const auto& type_id : DecimalTypeIds()) {
- add_kernel(type_id);
- }
- add_kernel(Type::FIXED_SIZE_BINARY);
- add_kernel(Type::STRING);
- add_kernel(Type::BINARY);
- add_kernel(Type::LARGE_STRING);
- add_kernel(Type::LARGE_BINARY);
+#define ADD_REE_KERNEL(TYPE_CLASS, TYPE_ENUM) add_kernel(Type::TYPE_ENUM);
+ ARROW_REE_SUPPORTED_TYPES(ADD_REE_KERNEL)
+#undef ADD_REE_KERNEL
Review Comment:
Instead of using exact match ids in `add_kernel`, we could have a dedicated
flexible matcher for supported REE types.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]