zagto commented on code in PR #13330:
URL: https://github.com/apache/arrow/pull/13330#discussion_r953961609


##########
cpp/src/arrow/compute/kernels/vector_run_length_encode.cc:
##########
@@ -0,0 +1,393 @@
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/rle_util.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+template <typename ArrowType, bool has_validity_buffer>
+struct EncodeDecodeCommonExec {
+  using CType = typename ArrowType::c_type;
+
+  struct Element {
+    bool valid;
+    CType value;
+
+    bool operator!=(const Element& other) const {
+      return valid != other.valid || value != other.value;
+    }
+  };
+
+  EncodeDecodeCommonExec(KernelContext* kernel_context, const ExecSpan& span,
+                         ExecResult* result)
+      : kernel_context{kernel_context},
+        input_array{span.values[0].array},
+        exec_result{result} {
+    ARROW_DCHECK(span.num_values() == 1);
+  }
+
+  Element ReadValue() {
+    Element result;
+    if (has_validity_buffer) {
+      result.valid = bit_util::GetBit(input_validity, read_offset);
+    } else {
+      result.valid = true;
+    }
+    result.value = (reinterpret_cast<const CType*>(input_values))[read_offset];
+    return result;
+  }
+
+  void WriteValue(Element element) {
+    if (has_validity_buffer) {
+      bit_util::SetBitsTo(output_validity, write_offset, 1, element.valid);
+    }
+    (reinterpret_cast<CType*>(output_values))[write_offset] = element.value;
+  }
+
+  KernelContext* kernel_context;
+  const ArraySpan input_array;
+  ExecResult* exec_result;
+  const uint8_t* input_validity;
+  const void* input_values;
+  uint8_t* output_validity;
+  void* output_values;
+  // read offset is a physical index into the values buffer, including array 
offsets
+  int64_t read_offset;
+  int64_t write_offset;
+};
+
+template <>
+EncodeDecodeCommonExec<BooleanType, true>::Element
+EncodeDecodeCommonExec<BooleanType, true>::ReadValue() {
+  Element result;
+  result.valid = bit_util::GetBit(input_validity, read_offset);
+  if (result.valid) {
+    result.value =
+        bit_util::GetBit(reinterpret_cast<const uint8_t*>(input_values), 
read_offset);
+  }
+  return result;
+}
+
+template <>
+EncodeDecodeCommonExec<BooleanType, false>::Element
+EncodeDecodeCommonExec<BooleanType, false>::ReadValue() {
+  return {
+      .valid = true,
+      .value =
+          bit_util::GetBit(reinterpret_cast<const uint8_t*>(input_values), 
read_offset),
+  };
+}
+
+template <>
+void EncodeDecodeCommonExec<BooleanType, true>::WriteValue(Element element) {
+  bit_util::SetBitTo(output_validity, write_offset, element.valid);
+  if (element.valid) {
+    bit_util::SetBitTo(reinterpret_cast<uint8_t*>(output_values), write_offset,
+                       element.value);
+  }
+}
+
+template <>
+void EncodeDecodeCommonExec<BooleanType, false>::WriteValue(Element element) {
+  bit_util::SetBitTo(reinterpret_cast<uint8_t*>(output_values), write_offset,
+                     element.value);
+}
+
+template <typename ArrowType, bool has_validity_buffer>
+struct RunLengthEncodeExec
+    : public EncodeDecodeCommonExec<ArrowType, has_validity_buffer> {
+  using EncodeDecodeCommonExec<ArrowType, 
has_validity_buffer>::EncodeDecodeCommonExec;
+  using typename EncodeDecodeCommonExec<ArrowType, has_validity_buffer>::CType;
+  using typename EncodeDecodeCommonExec<ArrowType, 
has_validity_buffer>::Element;
+
+  Status Exec() {
+    ArrayData* output_array_data = this->exec_result->array_data().get();
+    if (this->input_array.length == 0) {
+      output_array_data->length = 0;
+      output_array_data->offset = 0;
+      output_array_data->buffers = {NULLPTR};
+      output_array_data->child_data[0] =
+          ArrayData::Make(this->input_array.type->GetSharedPtr(),
+                          /*length =*/0,
+                          /*buffers =*/{NULLPTR, NULLPTR});
+      return Status::OK();
+    }
+    if (this->input_array.length > std::numeric_limits<int32_t>::max()) {
+      return Status::Invalid(
+          "run-length encoded arrays can only have a number of elements that 
can be "
+          "represented as a 32-bit signed integer");
+    }
+    this->input_validity = this->input_array.buffers[0].data;
+    this->input_values = this->input_array.buffers[1].data;
+    int64_t input_offset = this->input_array.offset;
+
+    this->read_offset = input_offset;
+    Element element = this->ReadValue();
+    int64_t num_values_output = 1;
+
+    // calculate input null count by ourselves. The input span likely got 
sliced by an
+    // ExecSpanIterator using SetOffset right before this code executes. 
SetOffset leaves
+    // the null_count value of the ArraySpan as kUnknownNullCount.

Review Comment:
   yes it sets it to kUnknownNullCount, getting null count after that would 
cause it to interate over the validity buffer once more



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to