This is an automated email from the ASF dual-hosted git repository.
chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 1eb109d1ba [MINOR] Lazy set the numBytes of ColumnarBatch (#11759)
1eb109d1ba is described below
commit 1eb109d1baaa11b790fc640a58a84b7aa77ef5a2
Author: Chengcheng Jin <[email protected]>
AuthorDate: Fri Mar 13 22:14:20 2026 +0800
[MINOR] Lazy set the numBytes of ColumnarBatch (#11759)
---
cpp/core/memory/ColumnarBatch.cc | 11 +++++++----
cpp/core/memory/ColumnarBatch.h | 3 +++
cpp/velox/memory/GpuBufferColumnarBatch.cc | 11 +++++++----
3 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/cpp/core/memory/ColumnarBatch.cc b/cpp/core/memory/ColumnarBatch.cc
index d2c5749ecf..d9881596d5 100644
--- a/cpp/core/memory/ColumnarBatch.cc
+++ b/cpp/core/memory/ColumnarBatch.cc
@@ -106,11 +106,14 @@ std::string ArrowCStructColumnarBatch::getType() const {
}
int64_t ArrowCStructColumnarBatch::numBytes() {
- int64_t bytes = cArray_->n_buffers;
- for (int64_t i = 0; i < cArray_->n_children; ++i) {
- bytes += cArray_->children[i]->n_buffers;
+ if (!numBytes_.has_value()) {
+ int64_t bytes = cArray_->n_buffers;
+ for (int64_t i = 0; i < cArray_->n_children; ++i) {
+ bytes += cArray_->children[i]->n_buffers;
+ }
+ numBytes_ = bytes;
}
- return bytes;
+ return numBytes_.value();
}
std::shared_ptr<ArrowSchema> ArrowCStructColumnarBatch::exportArrowSchema() {
diff --git a/cpp/core/memory/ColumnarBatch.h b/cpp/core/memory/ColumnarBatch.h
index be487f871e..9e9ae06983 100644
--- a/cpp/core/memory/ColumnarBatch.h
+++ b/cpp/core/memory/ColumnarBatch.h
@@ -18,6 +18,7 @@
#pragma once
#include <memory>
+#include <optional>
#include "arrow/c/bridge.h"
#include "arrow/c/helpers.h"
@@ -59,6 +60,8 @@ class ColumnarBatch {
protected:
int64_t exportNanos_;
+ // If the batch is immutable batch, the numBytes_ should be a fixed value,
lazy set it.
+ std::optional<int64_t> numBytes_;
};
class ArrowColumnarBatch final : public ColumnarBatch {
diff --git a/cpp/velox/memory/GpuBufferColumnarBatch.cc
b/cpp/velox/memory/GpuBufferColumnarBatch.cc
index f73873bab3..4b4a5ee58f 100644
--- a/cpp/velox/memory/GpuBufferColumnarBatch.cc
+++ b/cpp/velox/memory/GpuBufferColumnarBatch.cc
@@ -51,11 +51,14 @@ std::vector<char>
GpuBufferColumnarBatch::toUnsafeRow(int32_t rowId) const {
}
int64_t GpuBufferColumnarBatch::numBytes() {
- int64_t numBytes = 0;
- for (const auto& buffer : buffers_) {
- numBytes += buffer->size();
+ if (!numBytes_.has_value()) {
+ int64_t bytes = 0;
+ for (const auto& buffer : buffers_) {
+ bytes += buffer->size();
+ }
+ numBytes_ = bytes;
}
- return numBytes;
+ return numBytes_.value();
}
// Optimize to release the previous buffer after merge it.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]