lidavidm commented on a change in pull request #11542:
URL: https://github.com/apache/arrow/pull/11542#discussion_r741888371



##########
File path: cpp/src/arrow/util/byte_size.cc
##########
@@ -102,6 +108,282 @@ int64_t TotalBufferSize(const Table& table) {
   return DoTotalBufferSize(table, &seen_buffers);
 }
 
+namespace {
+
+struct GetByteRangesArray {
+  const ArrayData& input;
+  int64_t offset;
+  int64_t length;
+  UInt64Builder* range_starts;
+  UInt64Builder* range_offsets;
+  UInt64Builder* range_lengths;
+
+  Status VisitBitmap(const std::shared_ptr<Buffer>& buffer) const {
+    if (buffer) {
+      uint64_t data_start = reinterpret_cast<uint64_t>(buffer->data());
+      RETURN_NOT_OK(range_starts->Append(data_start));
+      RETURN_NOT_OK(range_offsets->Append(BitUtil::RoundDown(offset, 8) / 8));
+      RETURN_NOT_OK(range_lengths->Append(BitUtil::CoveringBytes(offset, 
length)));
+    }
+    return Status::OK();
+  }
+
+  Status VisitFixedWidthArray(const Buffer& buffer, const FixedWidthType& 
type) const {
+    uint64_t data_start = reinterpret_cast<uint64_t>(buffer.data());
+    uint64_t offset_bits = offset * type.bit_width();
+    uint64_t offset_bytes = 
BitUtil::RoundDown(static_cast<int64_t>(offset_bits), 8) / 8;
+    uint64_t end_byte =
+        BitUtil::RoundUp(static_cast<int64_t>(offset_bits + (length * 
type.bit_width())),
+                         8) /
+        8;
+    uint64_t length_bytes = (end_byte - offset_bytes);
+    RETURN_NOT_OK(range_starts->Append(data_start));
+    RETURN_NOT_OK(range_offsets->Append(offset_bytes));
+    return range_lengths->Append(length_bytes);
+  }
+
+  Status Visit(const FixedWidthType& type) const {
+    static_assert(sizeof(uint8_t*) <= sizeof(uint64_t),
+                  "Undefined behavior if pointer larger than uint64_t");
+    RETURN_NOT_OK(VisitBitmap(input.buffers[0]));
+    RETURN_NOT_OK(VisitFixedWidthArray(*input.buffers[1], type));
+    if (input.dictionary) {
+      // This is slightly imprecise because we always assume the entire 
dictionary is
+      // referenced.  If this array has an offset it may only be referencing a 
portion of
+      // the dictionary

Review comment:
       Even if there's no offset, the array may only reference a portion of the 
dictionary - so I don't think it's imprecise that we always count the entire 
dictionary.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to