This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new b48bcdae fix!: Change `ArrowBufferResize()` and `ArrowBitmapResize()`
to update `size_bytes` (#459)
b48bcdae is described below
commit b48bcdaef1b7bcfa446e2dd1b9ac92c648e7fef4
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 10 17:50:23 2024 -0300
fix!: Change `ArrowBufferResize()` and `ArrowBitmapResize()` to update
`size_bytes` (#459)
Currently, `ArrowBufferResize()` and `ArrowBitmapResize()` update the
*capacity* of the underlying buffer but do not update the *size*. This
was documented and is consistent with Arrow C++'s `BufferBuilder` but
inconsistent with `std::vector`, Arrow C++'s `PoolBuffer`, and user
expectations given the name of the function.
After this PR, `ArrowBufferResize()` and `ArrowBitmapResize()` update
the `size_bytes` of the underlying buffer.
This is a breaking change.
---
src/nanoarrow/buffer_inline.h | 67 +++++++++++++++++++++++++++----------------
src/nanoarrow/buffer_test.cc | 52 +++++++++++++++++++++++++++++++--
src/nanoarrow/nanoarrow.h | 21 ++++++--------
3 files changed, 100 insertions(+), 40 deletions(-)
diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index d6d74d82..54a00a92 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -91,29 +91,29 @@ static inline void ArrowBufferMove(struct ArrowBuffer* src,
struct ArrowBuffer*
}
static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
- int64_t new_capacity_bytes,
+ int64_t new_size_bytes,
char shrink_to_fit) {
- if (new_capacity_bytes < 0) {
+ if (new_size_bytes < 0) {
return EINVAL;
}
- if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
- buffer->data = buffer->allocator.reallocate(
- &buffer->allocator, buffer->data, buffer->capacity_bytes,
new_capacity_bytes);
- if (buffer->data == NULL && new_capacity_bytes > 0) {
+ int needs_reallocation = new_size_bytes > buffer->capacity_bytes ||
+ (shrink_to_fit && new_size_bytes <
buffer->capacity_bytes);
+
+ if (needs_reallocation) {
+ buffer->data = buffer->allocator.reallocate(&buffer->allocator,
buffer->data,
+ buffer->capacity_bytes,
new_size_bytes);
+
+ if (buffer->data == NULL && new_size_bytes > 0) {
buffer->capacity_bytes = 0;
buffer->size_bytes = 0;
return ENOMEM;
}
- buffer->capacity_bytes = new_capacity_bytes;
- }
-
- // Ensures that when shrinking that size <= capacity
- if (new_capacity_bytes < buffer->size_bytes) {
- buffer->size_bytes = new_capacity_bytes;
+ buffer->capacity_bytes = new_size_bytes;
}
+ buffer->size_bytes = new_size_bytes;
return NANOARROW_OK;
}
@@ -124,8 +124,19 @@ static inline ArrowErrorCode ArrowBufferReserve(struct
ArrowBuffer* buffer,
return NANOARROW_OK;
}
- return ArrowBufferResize(
- buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes),
0);
+ int64_t new_capacity_bytes =
+ _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes);
+ buffer->data = buffer->allocator.reallocate(&buffer->allocator, buffer->data,
+ buffer->capacity_bytes,
new_capacity_bytes);
+
+ if (buffer->data == NULL && new_capacity_bytes > 0) {
+ buffer->capacity_bytes = 0;
+ buffer->size_bytes = 0;
+ return ENOMEM;
+ }
+
+ buffer->capacity_bytes = new_capacity_bytes;
+ return NANOARROW_OK;
}
static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const
void* data,
@@ -468,32 +479,38 @@ static inline void ArrowBitmapMove(struct ArrowBitmap*
src, struct ArrowBitmap*
static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
int64_t additional_size_bits) {
int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits;
- if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) {
+ int64_t min_capacity_bytes = _ArrowBytesForBits(min_capacity_bits);
+ int64_t current_size_bytes = bitmap->buffer.size_bytes;
+ int64_t current_capacity_bytes = bitmap->buffer.capacity_bytes;
+
+ if (min_capacity_bytes <= current_capacity_bytes) {
return NANOARROW_OK;
}
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferReserve(&bitmap->buffer,
_ArrowBytesForBits(additional_size_bits)));
+ int64_t additional_capacity_bytes = min_capacity_bytes - current_size_bytes;
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(&bitmap->buffer,
additional_capacity_bytes));
+ // Zero out the last byte for deterministic output in the common case
+ // of reserving a known remaining size. We should have returned above
+ // if there was not at least one additional byte to allocate; however,
+ // DCHECK() just to be sure.
+ NANOARROW_DCHECK(bitmap->buffer.capacity_bytes > current_capacity_bytes);
bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
- int64_t new_capacity_bits,
+ int64_t new_size_bits,
char shrink_to_fit) {
- if (new_capacity_bits < 0) {
+ if (new_size_bits < 0) {
return EINVAL;
}
- int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
+ int64_t new_size_bytes = _ArrowBytesForBits(new_size_bits);
NANOARROW_RETURN_NOT_OK(
- ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit));
-
- if (new_capacity_bits < bitmap->size_bits) {
- bitmap->size_bits = new_capacity_bits;
- }
+ ArrowBufferResize(&bitmap->buffer, new_size_bytes, shrink_to_fit));
+ bitmap->size_bits = new_size_bits;
return NANOARROW_OK;
}
diff --git a/src/nanoarrow/buffer_test.cc b/src/nanoarrow/buffer_test.cc
index 5c14161d..6403298b 100644
--- a/src/nanoarrow/buffer_test.cc
+++ b/src/nanoarrow/buffer_test.cc
@@ -84,9 +84,22 @@ TEST(BufferTest, BufferTestBasic) {
EXPECT_EQ(buffer.size_bytes, 12);
EXPECT_STREQ(reinterpret_cast<char*>(buffer.data), "12345678901");
+ // Resize larger without a reallocation
+ uint8_t* second_data = buffer.data;
+ EXPECT_EQ(ArrowBufferResize(&buffer, 13, false), NANOARROW_OK);
+ EXPECT_EQ(buffer.data, second_data);
+ EXPECT_EQ(buffer.capacity_bytes, 20);
+ EXPECT_EQ(buffer.size_bytes, 13);
+
+ // Resize larger with a reallocation
+ EXPECT_EQ(ArrowBufferResize(&buffer, 21, false), NANOARROW_OK);
+ EXPECT_NE(buffer.data, second_data);
+ EXPECT_EQ(buffer.capacity_bytes, 21);
+ EXPECT_EQ(buffer.size_bytes, 21);
+
// Resize smaller without shrinking
EXPECT_EQ(ArrowBufferResize(&buffer, 5, false), NANOARROW_OK);
- EXPECT_EQ(buffer.capacity_bytes, 20);
+ EXPECT_EQ(buffer.capacity_bytes, 21);
EXPECT_EQ(buffer.size_bytes, 5);
EXPECT_EQ(strncmp(reinterpret_cast<char*>(buffer.data), "12345", 5), 0);
@@ -509,13 +522,48 @@ TEST(BitmapTest, BitmapTestMove) {
ArrowBitmapReset(&bitmap2);
}
+TEST(BitmapTest, BitmapTestReserve) {
+ struct ArrowBitmap bitmap;
+ ArrowBitmapInit(&bitmap);
+
+ // Reserve zero bits
+ ASSERT_EQ(ArrowBitmapReserve(&bitmap, 0), NANOARROW_OK);
+ EXPECT_EQ(bitmap.buffer.size_bytes, 0);
+ EXPECT_EQ(bitmap.buffer.capacity_bytes, 0);
+ EXPECT_EQ(bitmap.size_bits, 0);
+
+ // Reserve <=8 bits
+ ASSERT_EQ(ArrowBitmapReserve(&bitmap, 2), NANOARROW_OK);
+ EXPECT_EQ(bitmap.buffer.size_bytes, 0);
+ EXPECT_EQ(bitmap.buffer.capacity_bytes, 1);
+ EXPECT_EQ(bitmap.size_bits, 0);
+
+ // Reserve <=8 bits and ensure last byte is not zeroed
+ bitmap.buffer.data[0] = 0xff;
+ ASSERT_EQ(ArrowBitmapReserve(&bitmap, 8), NANOARROW_OK);
+ EXPECT_EQ(bitmap.buffer.size_bytes, 0);
+ EXPECT_EQ(bitmap.buffer.capacity_bytes, 1);
+ EXPECT_EQ(bitmap.size_bits, 0);
+ EXPECT_EQ(bitmap.buffer.data[0], 0xff);
+
+ // Reserve >8 bits and ensure last byte *is* zeroed
+ ASSERT_EQ(ArrowBitmapReserve(&bitmap, 9), NANOARROW_OK);
+ EXPECT_EQ(bitmap.buffer.size_bytes, 0);
+ EXPECT_EQ(bitmap.buffer.capacity_bytes, 2);
+ EXPECT_EQ(bitmap.size_bits, 0);
+ EXPECT_EQ(bitmap.buffer.data[0], 0xff);
+ EXPECT_EQ(bitmap.buffer.data[1], 0x00);
+
+ ArrowBitmapReset(&bitmap);
+}
+
TEST(BitmapTest, BitmapTestResize) {
struct ArrowBitmap bitmap;
ArrowBitmapInit(&bitmap);
// Check normal usage, which is resize to the final length
// after appending a bunch of values
- ASSERT_EQ(ArrowBitmapResize(&bitmap, 200, false), NANOARROW_OK);
+ ASSERT_EQ(ArrowBitmapReserve(&bitmap, 200), NANOARROW_OK);
EXPECT_EQ(bitmap.buffer.size_bytes, 0);
EXPECT_EQ(bitmap.buffer.capacity_bytes, 200 / 8);
EXPECT_EQ(bitmap.size_bits, 0);
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 57f9cc12..b213db47 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -620,14 +620,12 @@ static inline void ArrowBufferReset(struct ArrowBuffer*
buffer);
/// address and resets buffer.
static inline void ArrowBufferMove(struct ArrowBuffer* src, struct
ArrowBuffer* dst);
-/// \brief Grow or shrink a buffer to a given capacity
+/// \brief Grow or shrink a buffer to a given size
///
-/// When shrinking the capacity of the buffer, the buffer is only reallocated
-/// if shrink_to_fit is non-zero. Calling ArrowBufferResize() does not
-/// adjust the buffer's size member except to ensure that the invariant
-/// capacity >= size remains true.
+/// When shrinking the size of the buffer, the buffer is only reallocated
+/// if shrink_to_fit is non-zero.
static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
- int64_t new_capacity_bytes,
+ int64_t new_size_bytes,
char shrink_to_fit);
/// \brief Ensure a buffer has at least a given additional capacity
@@ -757,15 +755,12 @@ static inline void ArrowBitmapMove(struct ArrowBitmap*
src, struct ArrowBitmap*
static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
int64_t additional_size_bits);
-/// \brief Grow or shrink a bitmap to a given capacity
+/// \brief Grow or shrink a bitmap to a given size
///
-/// When shrinking the capacity of the bitmap, the bitmap is only reallocated
-/// if shrink_to_fit is non-zero. Calling ArrowBitmapResize() does not
-/// adjust the buffer's size member except when shrinking new_capacity_bits
-/// to a value less than the current number of bits in the bitmap.
+/// When shrinking the size of the bitmap, the bitmap is only reallocated
+/// if shrink_to_fit is non-zero.
static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
- int64_t new_capacity_bits,
- char shrink_to_fit);
+ int64_t new_size_bits, char
shrink_to_fit);
/// \brief Reserve space for and append zero or more of the same boolean value
to a bitmap
static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,