This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b48bcdae fix!: Change `ArrowBufferResize()` and `ArrowBitmapResize()` 
to update `size_bytes` (#459)
b48bcdae is described below

commit b48bcdaef1b7bcfa446e2dd1b9ac92c648e7fef4
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 10 17:50:23 2024 -0300

    fix!: Change `ArrowBufferResize()` and `ArrowBitmapResize()` to update 
`size_bytes` (#459)
    
    Currently, `ArrowBufferResize()` and `ArrowBitmapResize()` update the
    *capacity* of the underlying buffer but do not update the *size*. This
    was documented and is consistent with Arrow C++'s `BufferBuilder` but
    inconsistent with `std::vector`, Arrow C++'s `PoolBuffer`, and user
    expectations given the name of the function.
    
    After this PR, `ArrowBufferResize()` and `ArrowBitmapResize()` update
    the `size_bytes` of the underlying buffer.
    
    This is a breaking change.
---
 src/nanoarrow/buffer_inline.h | 67 +++++++++++++++++++++++++++----------------
 src/nanoarrow/buffer_test.cc  | 52 +++++++++++++++++++++++++++++++--
 src/nanoarrow/nanoarrow.h     | 21 ++++++--------
 3 files changed, 100 insertions(+), 40 deletions(-)

diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index d6d74d82..54a00a92 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -91,29 +91,29 @@ static inline void ArrowBufferMove(struct ArrowBuffer* src, 
struct ArrowBuffer*
 }
 
 static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
-                                               int64_t new_capacity_bytes,
+                                               int64_t new_size_bytes,
                                                char shrink_to_fit) {
-  if (new_capacity_bytes < 0) {
+  if (new_size_bytes < 0) {
     return EINVAL;
   }
 
-  if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
-    buffer->data = buffer->allocator.reallocate(
-        &buffer->allocator, buffer->data, buffer->capacity_bytes, 
new_capacity_bytes);
-    if (buffer->data == NULL && new_capacity_bytes > 0) {
+  int needs_reallocation = new_size_bytes > buffer->capacity_bytes ||
+                           (shrink_to_fit && new_size_bytes < 
buffer->capacity_bytes);
+
+  if (needs_reallocation) {
+    buffer->data = buffer->allocator.reallocate(&buffer->allocator, 
buffer->data,
+                                                buffer->capacity_bytes, 
new_size_bytes);
+
+    if (buffer->data == NULL && new_size_bytes > 0) {
       buffer->capacity_bytes = 0;
       buffer->size_bytes = 0;
       return ENOMEM;
     }
 
-    buffer->capacity_bytes = new_capacity_bytes;
-  }
-
-  // Ensures that when shrinking that size <= capacity
-  if (new_capacity_bytes < buffer->size_bytes) {
-    buffer->size_bytes = new_capacity_bytes;
+    buffer->capacity_bytes = new_size_bytes;
   }
 
+  buffer->size_bytes = new_size_bytes;
   return NANOARROW_OK;
 }
 
@@ -124,8 +124,19 @@ static inline ArrowErrorCode ArrowBufferReserve(struct 
ArrowBuffer* buffer,
     return NANOARROW_OK;
   }
 
-  return ArrowBufferResize(
-      buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), 
0);
+  int64_t new_capacity_bytes =
+      _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes);
+  buffer->data = buffer->allocator.reallocate(&buffer->allocator, buffer->data,
+                                              buffer->capacity_bytes, 
new_capacity_bytes);
+
+  if (buffer->data == NULL && new_capacity_bytes > 0) {
+    buffer->capacity_bytes = 0;
+    buffer->size_bytes = 0;
+    return ENOMEM;
+  }
+
+  buffer->capacity_bytes = new_capacity_bytes;
+  return NANOARROW_OK;
 }
 
 static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const 
void* data,
@@ -468,32 +479,38 @@ static inline void ArrowBitmapMove(struct ArrowBitmap* 
src, struct ArrowBitmap*
 static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
                                                 int64_t additional_size_bits) {
   int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits;
-  if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) {
+  int64_t min_capacity_bytes = _ArrowBytesForBits(min_capacity_bits);
+  int64_t current_size_bytes = bitmap->buffer.size_bytes;
+  int64_t current_capacity_bytes = bitmap->buffer.capacity_bytes;
+
+  if (min_capacity_bytes <= current_capacity_bytes) {
     return NANOARROW_OK;
   }
 
-  NANOARROW_RETURN_NOT_OK(
-      ArrowBufferReserve(&bitmap->buffer, 
_ArrowBytesForBits(additional_size_bits)));
+  int64_t additional_capacity_bytes = min_capacity_bytes - current_size_bytes;
+  NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(&bitmap->buffer, 
additional_capacity_bytes));
 
+  // Zero out the last byte for deterministic output in the common case
+  // of reserving a known remaining size. We should have returned above
+  // if there was not at least one additional byte to allocate; however,
+  // DCHECK() just to be sure.
+  NANOARROW_DCHECK(bitmap->buffer.capacity_bytes > current_capacity_bytes);
   bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
   return NANOARROW_OK;
 }
 
 static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
-                                               int64_t new_capacity_bits,
+                                               int64_t new_size_bits,
                                                char shrink_to_fit) {
-  if (new_capacity_bits < 0) {
+  if (new_size_bits < 0) {
     return EINVAL;
   }
 
-  int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
+  int64_t new_size_bytes = _ArrowBytesForBits(new_size_bits);
   NANOARROW_RETURN_NOT_OK(
-      ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit));
-
-  if (new_capacity_bits < bitmap->size_bits) {
-    bitmap->size_bits = new_capacity_bits;
-  }
+      ArrowBufferResize(&bitmap->buffer, new_size_bytes, shrink_to_fit));
 
+  bitmap->size_bits = new_size_bits;
   return NANOARROW_OK;
 }
 
diff --git a/src/nanoarrow/buffer_test.cc b/src/nanoarrow/buffer_test.cc
index 5c14161d..6403298b 100644
--- a/src/nanoarrow/buffer_test.cc
+++ b/src/nanoarrow/buffer_test.cc
@@ -84,9 +84,22 @@ TEST(BufferTest, BufferTestBasic) {
   EXPECT_EQ(buffer.size_bytes, 12);
   EXPECT_STREQ(reinterpret_cast<char*>(buffer.data), "12345678901");
 
+  // Resize larger without a reallocation
+  uint8_t* second_data = buffer.data;
+  EXPECT_EQ(ArrowBufferResize(&buffer, 13, false), NANOARROW_OK);
+  EXPECT_EQ(buffer.data, second_data);
+  EXPECT_EQ(buffer.capacity_bytes, 20);
+  EXPECT_EQ(buffer.size_bytes, 13);
+
+  // Resize larger with a reallocation
+  EXPECT_EQ(ArrowBufferResize(&buffer, 21, false), NANOARROW_OK);
+  EXPECT_NE(buffer.data, second_data);
+  EXPECT_EQ(buffer.capacity_bytes, 21);
+  EXPECT_EQ(buffer.size_bytes, 21);
+
   // Resize smaller without shrinking
   EXPECT_EQ(ArrowBufferResize(&buffer, 5, false), NANOARROW_OK);
-  EXPECT_EQ(buffer.capacity_bytes, 20);
+  EXPECT_EQ(buffer.capacity_bytes, 21);
   EXPECT_EQ(buffer.size_bytes, 5);
   EXPECT_EQ(strncmp(reinterpret_cast<char*>(buffer.data), "12345", 5), 0);
 
@@ -509,13 +522,48 @@ TEST(BitmapTest, BitmapTestMove) {
   ArrowBitmapReset(&bitmap2);
 }
 
+TEST(BitmapTest, BitmapTestReserve) {
+  struct ArrowBitmap bitmap;
+  ArrowBitmapInit(&bitmap);
+
+  // Reserve zero bits
+  ASSERT_EQ(ArrowBitmapReserve(&bitmap, 0), NANOARROW_OK);
+  EXPECT_EQ(bitmap.buffer.size_bytes, 0);
+  EXPECT_EQ(bitmap.buffer.capacity_bytes, 0);
+  EXPECT_EQ(bitmap.size_bits, 0);
+
+  // Reserve <=8 bits
+  ASSERT_EQ(ArrowBitmapReserve(&bitmap, 2), NANOARROW_OK);
+  EXPECT_EQ(bitmap.buffer.size_bytes, 0);
+  EXPECT_EQ(bitmap.buffer.capacity_bytes, 1);
+  EXPECT_EQ(bitmap.size_bits, 0);
+
+  // Reserve <=8 bits and ensure last byte is not zeroed
+  bitmap.buffer.data[0] = 0xff;
+  ASSERT_EQ(ArrowBitmapReserve(&bitmap, 8), NANOARROW_OK);
+  EXPECT_EQ(bitmap.buffer.size_bytes, 0);
+  EXPECT_EQ(bitmap.buffer.capacity_bytes, 1);
+  EXPECT_EQ(bitmap.size_bits, 0);
+  EXPECT_EQ(bitmap.buffer.data[0], 0xff);
+
+  // Reserve >8 bits and ensure last byte *is* zeroed
+  ASSERT_EQ(ArrowBitmapReserve(&bitmap, 9), NANOARROW_OK);
+  EXPECT_EQ(bitmap.buffer.size_bytes, 0);
+  EXPECT_EQ(bitmap.buffer.capacity_bytes, 2);
+  EXPECT_EQ(bitmap.size_bits, 0);
+  EXPECT_EQ(bitmap.buffer.data[0], 0xff);
+  EXPECT_EQ(bitmap.buffer.data[1], 0x00);
+
+  ArrowBitmapReset(&bitmap);
+}
+
 TEST(BitmapTest, BitmapTestResize) {
   struct ArrowBitmap bitmap;
   ArrowBitmapInit(&bitmap);
 
   // Check normal usage, which is resize to the final length
   // after appending a bunch of values
-  ASSERT_EQ(ArrowBitmapResize(&bitmap, 200, false), NANOARROW_OK);
+  ASSERT_EQ(ArrowBitmapReserve(&bitmap, 200), NANOARROW_OK);
   EXPECT_EQ(bitmap.buffer.size_bytes, 0);
   EXPECT_EQ(bitmap.buffer.capacity_bytes, 200 / 8);
   EXPECT_EQ(bitmap.size_bits, 0);
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 57f9cc12..b213db47 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -620,14 +620,12 @@ static inline void ArrowBufferReset(struct ArrowBuffer* 
buffer);
 /// address and resets buffer.
 static inline void ArrowBufferMove(struct ArrowBuffer* src, struct 
ArrowBuffer* dst);
 
-/// \brief Grow or shrink a buffer to a given capacity
+/// \brief Grow or shrink a buffer to a given size
 ///
-/// When shrinking the capacity of the buffer, the buffer is only reallocated
-/// if shrink_to_fit is non-zero. Calling ArrowBufferResize() does not
-/// adjust the buffer's size member except to ensure that the invariant
-/// capacity >= size remains true.
+/// When shrinking the size of the buffer, the buffer is only reallocated
+/// if shrink_to_fit is non-zero.
 static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
-                                               int64_t new_capacity_bytes,
+                                               int64_t new_size_bytes,
                                                char shrink_to_fit);
 
 /// \brief Ensure a buffer has at least a given additional capacity
@@ -757,15 +755,12 @@ static inline void ArrowBitmapMove(struct ArrowBitmap* 
src, struct ArrowBitmap*
 static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
                                                 int64_t additional_size_bits);
 
-/// \brief Grow or shrink a bitmap to a given capacity
+/// \brief Grow or shrink a bitmap to a given size
 ///
-/// When shrinking the capacity of the bitmap, the bitmap is only reallocated
-/// if shrink_to_fit is non-zero. Calling ArrowBitmapResize() does not
-/// adjust the buffer's size member except when shrinking new_capacity_bits
-/// to a value less than the current number of bits in the bitmap.
+/// When shrinking the size of the bitmap, the bitmap is only reallocated
+/// if shrink_to_fit is non-zero.
 static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
-                                               int64_t new_capacity_bits,
-                                               char shrink_to_fit);
+                                               int64_t new_size_bits, char 
shrink_to_fit);
 
 /// \brief Reserve space for and append zero or more of the same boolean value 
to a bitmap
 static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,

Reply via email to