This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new e21cc98 feat: Add `ArrowBitmapUnpackInt8Unsafe()` (#276)
e21cc98 is described below
commit e21cc98b6e02feccd85ebce9bc54508305eea847
Author: William Ayd <[email protected]>
AuthorDate: Thu Aug 17 08:52:31 2023 -0400
feat: Add `ArrowBitmapUnpackInt8Unsafe()` (#276)
---
src/nanoarrow/buffer_inline.h | 51 +++++++++++++++++++++++++++
src/nanoarrow/buffer_test.cc | 80 +++++++++++++++++++++++++++++++++++++++++++
src/nanoarrow/nanoarrow.h | 4 +++
3 files changed, 135 insertions(+)
diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index b270b33..efb5ca7 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -222,6 +222,17 @@ static inline int64_t _ArrowBytesForBits(int64_t bits) {
return (bits >> 3) + ((bits & 7) != 0);
}
+static inline void _ArrowBitmapUnpackInt8(const uint8_t word, int8_t* out) {
+ out[0] = (word >> 0) & 1;
+ out[1] = (word >> 1) & 1;
+ out[2] = (word >> 2) & 1;
+ out[3] = (word >> 3) & 1;
+ out[4] = (word >> 4) & 1;
+ out[5] = (word >> 5) & 1;
+ out[6] = (word >> 6) & 1;
+ out[7] = (word >> 7) & 1;
+}
+
static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
*out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 |
values[4] << 4 |
values[5] << 5 | values[6] << 6 | values[7] << 7);
@@ -236,6 +247,46 @@ static inline int8_t ArrowBitGet(const uint8_t* bits,
int64_t i) {
return (bits[i >> 3] >> (i & 0x07)) & 1;
}
+static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t
start_offset,
+ int64_t length, int8_t* out) {
+ if (length == 0) {
+ return;
+ }
+
+ const int64_t i_begin = start_offset;
+ const int64_t i_end = start_offset + length;
+ const int64_t i_last_valid = i_end - 1;
+
+ const int64_t bytes_begin = i_begin / 8;
+ const int64_t bytes_last_valid = i_last_valid / 8;
+
+ if (bytes_begin == bytes_last_valid) {
+ // count bits within a single byte
+ for (int i = 0; i < length; i++) {
+ out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
+ }
+
+ return;
+ }
+
+ // first byte
+ for (int i = 0; i < 8 - (i_begin % 8); i++) {
+ *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
+ }
+
+ // middle bytes
+ for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
+ _ArrowBitmapUnpackInt8(bits[i], out);
+ out += 8;
+ }
+
+ // last byte
+ const int bits_remaining = i_end % 8 == 0 ? 8 : i_end % 8;
+ for (int i = 0; i < bits_remaining; i++) {
+ *out++ = ArrowBitGet(&bits[bytes_last_valid], i);
+ }
+}
+
static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
bits[i / 8] |= _ArrowkBitmask[i % 8];
}
diff --git a/src/nanoarrow/buffer_test.cc b/src/nanoarrow/buffer_test.cc
index 9d5032c..1907d37 100644
--- a/src/nanoarrow/buffer_test.cc
+++ b/src/nanoarrow/buffer_test.cc
@@ -271,6 +271,86 @@ TEST(BitmapTest, BitmapTestElement) {
EXPECT_EQ(ArrowBitGet(bitmap, 16 + 7), 0);
}
+template <int offset, int length>
+void TestArrowBitmapUnpackInt8Unsafe(const uint8_t* bitmap, int8_t* out,
+ std::vector<uint8_t> expected) {
+ ArrowBitmapUnpackInt8Unsafe(bitmap, offset, length, out);
+ for (int i = 0; i < expected.size(); i++) {
+ EXPECT_EQ(out[i], expected[i]);
+ }
+}
+
+TEST(BitmapTest, BitmapTestBitmapUnpackInt8Unsafe) {
+ uint8_t bitmap[3];
+ int8_t result[sizeof(bitmap) * 8];
+
+ memset(bitmap, 0xff, sizeof(bitmap));
+ ArrowBitmapUnpackInt8Unsafe(bitmap, 0, sizeof(result), result);
+ for (int i = 0; i < sizeof(result); i++) {
+ EXPECT_EQ(result[i], 1);
+ }
+
+ bitmap[0] = 0x93; // 10010011
+ bitmap[1] = 0x55; // 01010101
+ bitmap[2] = 0xaa; // 10101010
+
+ // offset 0, length boundary, one byte
+ TestArrowBitmapUnpackInt8Unsafe<0, 8>(bitmap, result, {1, 1, 0, 0, 1, 0, 0,
1});
+
+ // offset 0, length boundary, different bytes
+ TestArrowBitmapUnpackInt8Unsafe<0, 16>(
+ bitmap, result, {1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0});
+
+ // offset 0, length non-boundary, one byte
+ TestArrowBitmapUnpackInt8Unsafe<0, 5>(bitmap, result, {1, 1, 0, 0, 1});
+
+ // offset boundary, length boundary, one byte
+ TestArrowBitmapUnpackInt8Unsafe<8, 8>(bitmap, result, {1, 0, 1, 0, 1, 0, 1,
0});
+
+ // offset boundary, length boundary, different bytes
+ TestArrowBitmapUnpackInt8Unsafe<8, 16>(
+ bitmap, result, {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});
+
+ // offset boundary, length non-boundary, one byte
+ TestArrowBitmapUnpackInt8Unsafe<8, 5>(bitmap, result, {1, 0, 1, 0, 1});
+
+ // offset boundary, length non-boundary, different bytes
+ TestArrowBitmapUnpackInt8Unsafe<8, 13>(bitmap, result,
+ {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,
0});
+
+ // offset non-boundary, length boundary, one byte
+ TestArrowBitmapUnpackInt8Unsafe<3, 5>(bitmap, result, {0, 1, 0, 0, 1});
+
+ // offset non-boundary, length boundary, different bytes
+ TestArrowBitmapUnpackInt8Unsafe<3, 13>(bitmap, result,
+ {0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,
0});
+
+ // offset non-boundary, length non-boundary, one byte
+ TestArrowBitmapUnpackInt8Unsafe<3, 3>(bitmap, result, {0, 1, 0});
+
+ // offset non-boundary, length non-boundary, different bytes
+ TestArrowBitmapUnpackInt8Unsafe<3, 11>(bitmap, result,
+ {0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0});
+
+ // offset non-boundary non-first byte, length boundary, one byte
+ TestArrowBitmapUnpackInt8Unsafe<11, 5>(bitmap, result, {0, 1, 0, 1, 0});
+
+ // offset non-boundary non-first byte, length boundary, different bytes
+ TestArrowBitmapUnpackInt8Unsafe<11, 13>(bitmap, result,
+ {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0,
1});
+
+ // offset non-boundary non-first byte, length non-boundary, one byte
+ TestArrowBitmapUnpackInt8Unsafe<11, 3>(bitmap, result, {0, 1, 0});
+
+ // offset non-boundary non-first byte, length non-boundary, different bytes
+ TestArrowBitmapUnpackInt8Unsafe<11, 11>(bitmap, result,
+ {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0,
1});
+
+ // non-boundary, three byte span
+ TestArrowBitmapUnpackInt8Unsafe<7, 11>(bitmap, result,
+ {1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1});
+}
+
TEST(BitmapTest, BitmapTestSetTo) {
uint8_t bitmap[10];
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 9308411..09a80a8 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -716,6 +716,10 @@ static inline ArrowErrorCode ArrowBitmapAppend(struct
ArrowBitmap* bitmap,
static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
uint8_t bits_are_set, int64_t
length);
+/// \brief Extract boolean values from a range in a bitmap
+static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t
start_offset,
+ int64_t length, int8_t* out);
+
/// \brief Append boolean values encoded as int8_t to a bitmap
///
/// The values must all be 0 or 1.