westonpace commented on code in PR #38147:
URL: https://github.com/apache/arrow/pull/38147#discussion_r1358388917
##########
cpp/src/arrow/compute/light_array.cc:
##########
@@ -80,27 +80,32 @@ KeyColumnArray KeyColumnArray::Slice(int64_t offset,
int64_t length) const {
KeyColumnArray sliced;
sliced.metadata_ = metadata_;
sliced.length_ = length;
- uint32_t fixed_size =
- !metadata_.is_fixed_length ? sizeof(uint32_t) : metadata_.fixed_length;
+ uint32_t fixed_size = metadata_.fixed_length;
sliced.buffers_[0] =
buffers_[0] ? buffers_[0] + (bit_offset_[0] + offset) / 8 : nullptr;
sliced.mutable_buffers_[0] =
mutable_buffers_[0] ? mutable_buffers_[0] + (bit_offset_[0] + offset) /
8 : nullptr;
sliced.bit_offset_[0] = (bit_offset_[0] + offset) % 8;
- if (fixed_size == 0 && !metadata_.is_null_type) {
+ if (metadata_.fixed_length == 0 && !metadata_.is_null_type) {
+ ARROW_DCHECK(is_bool_type() || is_other_fixed_width_types())
Review Comment:
How would `is_other_fixed_width_types` be true here if
`metadata_.fixed_length == 0`? It seems we can only be here if bool.
##########
cpp/src/arrow/compute/light_array.h:
##########
@@ -183,6 +183,36 @@ class ARROW_EXPORT KeyColumnArray {
// Starting bit offset within the first byte (between 0 and 7)
// to be used when accessing buffers that store bit vectors.
int bit_offset_[kMaxBuffers - 1];
+
+ bool is_dictionary_type() const {
Review Comment:
Does the `KeyColumnArray` even support storing dictionary encoded data? I
didn't think it did but I could be remembering incorrectly.
##########
cpp/src/arrow/compute/light_array.h:
##########
@@ -183,6 +183,36 @@ class ARROW_EXPORT KeyColumnArray {
// Starting bit offset within the first byte (between 0 and 7)
// to be used when accessing buffers that store bit vectors.
int bit_offset_[kMaxBuffers - 1];
+
+ bool is_dictionary_type() const {
+ return metadata_.is_fixed_length && metadata_.fixed_length != 0 &&
+ !metadata_.is_null_type;
+ }
+
+ bool is_bool_type() const {
+ return metadata_.is_fixed_length && metadata_.fixed_length == 0 &&
+ !metadata_.is_null_type;
+ }
+
+ bool is_other_fixed_width_types() const {
+ return metadata_.is_fixed_length && metadata_.fixed_length != 0 &&
+ !metadata_.is_null_type;
+ }
Review Comment:
Isn't this checking the exact same things as `is_dictionary_type`?
##########
cpp/src/arrow/compute/light_array.h:
##########
@@ -183,6 +183,36 @@ class ARROW_EXPORT KeyColumnArray {
// Starting bit offset within the first byte (between 0 and 7)
// to be used when accessing buffers that store bit vectors.
int bit_offset_[kMaxBuffers - 1];
+
+ bool is_dictionary_type() const {
+ return metadata_.is_fixed_length && metadata_.fixed_length != 0 &&
+ !metadata_.is_null_type;
Review Comment:
I'm not sure these conditions imply dictionary. This could be any regular
fixed-width column (e.g. int32) could it?
##########
cpp/src/arrow/compute/light_array.cc:
##########
@@ -80,27 +80,32 @@ KeyColumnArray KeyColumnArray::Slice(int64_t offset,
int64_t length) const {
KeyColumnArray sliced;
sliced.metadata_ = metadata_;
sliced.length_ = length;
- uint32_t fixed_size =
- !metadata_.is_fixed_length ? sizeof(uint32_t) : metadata_.fixed_length;
+ uint32_t fixed_size = metadata_.fixed_length;
sliced.buffers_[0] =
buffers_[0] ? buffers_[0] + (bit_offset_[0] + offset) / 8 : nullptr;
sliced.mutable_buffers_[0] =
mutable_buffers_[0] ? mutable_buffers_[0] + (bit_offset_[0] + offset) /
8 : nullptr;
sliced.bit_offset_[0] = (bit_offset_[0] + offset) % 8;
- if (fixed_size == 0 && !metadata_.is_null_type) {
+ if (metadata_.fixed_length == 0 && !metadata_.is_null_type) {
+ ARROW_DCHECK(is_bool_type() || is_other_fixed_width_types())
+ << "Expected BOOL type or OTHER FIXED WIDTH type but got a different
type.";
sliced.buffers_[1] =
buffers_[1] ? buffers_[1] + (bit_offset_[1] + offset) / 8 : nullptr;
sliced.mutable_buffers_[1] = mutable_buffers_[1]
? mutable_buffers_[1] + (bit_offset_[1] +
offset) / 8
: nullptr;
sliced.bit_offset_[1] = (bit_offset_[1] + offset) % 8;
- } else {
+ } else if (metadata_.fixed_length > 0) {
+ ARROW_DCHECK(is_dictionary_type() || is_binary_type() ||
is_large_binary_type())
Review Comment:
I am confused. If the type is `int32` then it seems we would reach this
branch. `metadata_.fixed_length` would be `4` and `metadata_.is_null_type`
would be `false` and `metadata._is_fixed_length` would be `true`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]