This is an automated email from the ASF dual-hosted git repository.

chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 1eb109d1ba [MINOR] Lazy set the numBytes of ColumnarBatch (#11759)
1eb109d1ba is described below

commit 1eb109d1baaa11b790fc640a58a84b7aa77ef5a2
Author: Chengcheng Jin <[email protected]>
AuthorDate: Fri Mar 13 22:14:20 2026 +0800

    [MINOR] Lazy set the numBytes of ColumnarBatch (#11759)
---
 cpp/core/memory/ColumnarBatch.cc           | 11 +++++++----
 cpp/core/memory/ColumnarBatch.h            |  3 +++
 cpp/velox/memory/GpuBufferColumnarBatch.cc | 11 +++++++----
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/cpp/core/memory/ColumnarBatch.cc b/cpp/core/memory/ColumnarBatch.cc
index d2c5749ecf..d9881596d5 100644
--- a/cpp/core/memory/ColumnarBatch.cc
+++ b/cpp/core/memory/ColumnarBatch.cc
@@ -106,11 +106,14 @@ std::string ArrowCStructColumnarBatch::getType() const {
 }
 
 int64_t ArrowCStructColumnarBatch::numBytes() {
-  int64_t bytes = cArray_->n_buffers;
-  for (int64_t i = 0; i < cArray_->n_children; ++i) {
-    bytes += cArray_->children[i]->n_buffers;
+  if (!numBytes_.has_value()) {
+    int64_t bytes = cArray_->n_buffers;
+    for (int64_t i = 0; i < cArray_->n_children; ++i) {
+      bytes += cArray_->children[i]->n_buffers;
+    }
+    numBytes_ = bytes;
   }
-  return bytes;
+  return numBytes_.value();
 }
 
 std::shared_ptr<ArrowSchema> ArrowCStructColumnarBatch::exportArrowSchema() {
diff --git a/cpp/core/memory/ColumnarBatch.h b/cpp/core/memory/ColumnarBatch.h
index be487f871e..9e9ae06983 100644
--- a/cpp/core/memory/ColumnarBatch.h
+++ b/cpp/core/memory/ColumnarBatch.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <memory>
+#include <optional>
 
 #include "arrow/c/bridge.h"
 #include "arrow/c/helpers.h"
@@ -59,6 +60,8 @@ class ColumnarBatch {
 
  protected:
   int64_t exportNanos_;
+  // If the batch is immutable batch, the numBytes_ should be a fixed value, 
lazy set it.
+  std::optional<int64_t> numBytes_;
 };
 
 class ArrowColumnarBatch final : public ColumnarBatch {
diff --git a/cpp/velox/memory/GpuBufferColumnarBatch.cc 
b/cpp/velox/memory/GpuBufferColumnarBatch.cc
index f73873bab3..4b4a5ee58f 100644
--- a/cpp/velox/memory/GpuBufferColumnarBatch.cc
+++ b/cpp/velox/memory/GpuBufferColumnarBatch.cc
@@ -51,11 +51,14 @@ std::vector<char> 
GpuBufferColumnarBatch::toUnsafeRow(int32_t rowId) const {
 }
 
 int64_t GpuBufferColumnarBatch::numBytes() {
-  int64_t numBytes = 0;
-  for (const auto& buffer : buffers_) {
-    numBytes += buffer->size();
+  if (!numBytes_.has_value()) {
+    int64_t bytes = 0;
+    for (const auto& buffer : buffers_) {
+      bytes += buffer->size();
+    }
+    numBytes_ = bytes;
   }
-  return numBytes;
+  return numBytes_.value();
 }
 
 // Optimize to release the previous buffer after merge it.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to