This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e21cc98  feat: Add `ArrowBitmapUnpackInt8Unsafe()` (#276)
e21cc98 is described below

commit e21cc98b6e02feccd85ebce9bc54508305eea847
Author: William Ayd <[email protected]>
AuthorDate: Thu Aug 17 08:52:31 2023 -0400

    feat: Add `ArrowBitmapUnpackInt8Unsafe()` (#276)
---
 src/nanoarrow/buffer_inline.h | 51 +++++++++++++++++++++++++++
 src/nanoarrow/buffer_test.cc  | 80 +++++++++++++++++++++++++++++++++++++++++++
 src/nanoarrow/nanoarrow.h     |  4 +++
 3 files changed, 135 insertions(+)

diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index b270b33..efb5ca7 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -222,6 +222,17 @@ static inline int64_t _ArrowBytesForBits(int64_t bits) {
   return (bits >> 3) + ((bits & 7) != 0);
 }
 
+static inline void _ArrowBitmapUnpackInt8(const uint8_t word, int8_t* out) {
+  out[0] = (word >> 0) & 1;
+  out[1] = (word >> 1) & 1;
+  out[2] = (word >> 2) & 1;
+  out[3] = (word >> 3) & 1;
+  out[4] = (word >> 4) & 1;
+  out[5] = (word >> 5) & 1;
+  out[6] = (word >> 6) & 1;
+  out[7] = (word >> 7) & 1;
+}
+
 static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
   *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | 
values[4] << 4 |
           values[5] << 5 | values[6] << 6 | values[7] << 7);
@@ -236,6 +247,46 @@ static inline int8_t ArrowBitGet(const uint8_t* bits, 
int64_t i) {
   return (bits[i >> 3] >> (i & 0x07)) & 1;
 }
 
+static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t 
start_offset,
+                                               int64_t length, int8_t* out) {
+  if (length == 0) {
+    return;
+  }
+
+  const int64_t i_begin = start_offset;
+  const int64_t i_end = start_offset + length;
+  const int64_t i_last_valid = i_end - 1;
+
+  const int64_t bytes_begin = i_begin / 8;
+  const int64_t bytes_last_valid = i_last_valid / 8;
+
+  if (bytes_begin == bytes_last_valid) {
+    // count bits within a single byte
+    for (int i = 0; i < length; i++) {
+      out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
+    }
+
+    return;
+  }
+
+  // first byte
+  for (int i = 0; i < 8 - (i_begin % 8); i++) {
+    *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
+  }
+
+  // middle bytes
+  for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
+    _ArrowBitmapUnpackInt8(bits[i], out);
+    out += 8;
+  }
+
+  // last byte
+  const int bits_remaining = i_end % 8 == 0 ? 8 : i_end % 8;
+  for (int i = 0; i < bits_remaining; i++) {
+    *out++ = ArrowBitGet(&bits[bytes_last_valid], i);
+  }
+}
+
 static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
   bits[i / 8] |= _ArrowkBitmask[i % 8];
 }
diff --git a/src/nanoarrow/buffer_test.cc b/src/nanoarrow/buffer_test.cc
index 9d5032c..1907d37 100644
--- a/src/nanoarrow/buffer_test.cc
+++ b/src/nanoarrow/buffer_test.cc
@@ -271,6 +271,86 @@ TEST(BitmapTest, BitmapTestElement) {
   EXPECT_EQ(ArrowBitGet(bitmap, 16 + 7), 0);
 }
 
+template <int offset, int length>
+void TestArrowBitmapUnpackInt8Unsafe(const uint8_t* bitmap, int8_t* out,
+                                     std::vector<uint8_t> expected) {
+  ArrowBitmapUnpackInt8Unsafe(bitmap, offset, length, out);
+  for (int i = 0; i < expected.size(); i++) {
+    EXPECT_EQ(out[i], expected[i]);
+  }
+}
+
+TEST(BitmapTest, BitmapTestBitmapUnpackInt8Unsafe) {
+  uint8_t bitmap[3];
+  int8_t result[sizeof(bitmap) * 8];
+
+  memset(bitmap, 0xff, sizeof(bitmap));
+  ArrowBitmapUnpackInt8Unsafe(bitmap, 0, sizeof(result), result);
+  for (int i = 0; i < sizeof(result); i++) {
+    EXPECT_EQ(result[i], 1);
+  }
+
+  bitmap[0] = 0x93;  // 10010011
+  bitmap[1] = 0x55;  // 01010101
+  bitmap[2] = 0xaa;  // 10101010
+
+  // offset 0, length boundary, one byte
+  TestArrowBitmapUnpackInt8Unsafe<0, 8>(bitmap, result, {1, 1, 0, 0, 1, 0, 0, 
1});
+
+  // offset 0, length boundary, different bytes
+  TestArrowBitmapUnpackInt8Unsafe<0, 16>(
+      bitmap, result, {1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0});
+
+  // offset 0, length non-boundary, one byte
+  TestArrowBitmapUnpackInt8Unsafe<0, 5>(bitmap, result, {1, 1, 0, 0, 1});
+
+  // offset boundary, length boundary, one byte
+  TestArrowBitmapUnpackInt8Unsafe<8, 8>(bitmap, result, {1, 0, 1, 0, 1, 0, 1, 
0});
+
+  // offset boundary, length boundary, different bytes
+  TestArrowBitmapUnpackInt8Unsafe<8, 16>(
+      bitmap, result, {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});
+
+  // offset boundary, length non-boundary, one byte
+  TestArrowBitmapUnpackInt8Unsafe<8, 5>(bitmap, result, {1, 0, 1, 0, 1});
+
+  // offset boundary, length non-boundary, different bytes
+  TestArrowBitmapUnpackInt8Unsafe<8, 13>(bitmap, result,
+                                         {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 
0});
+
+  // offset non-boundary, length boundary, one byte
+  TestArrowBitmapUnpackInt8Unsafe<3, 5>(bitmap, result, {0, 1, 0, 0, 1});
+
+  // offset non-boundary, length boundary, different bytes
+  TestArrowBitmapUnpackInt8Unsafe<3, 13>(bitmap, result,
+                                         {0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 
0});
+
+  // offset non-boundary, length non-boundary, one byte
+  TestArrowBitmapUnpackInt8Unsafe<3, 3>(bitmap, result, {0, 1, 0});
+
+  // offset non-boundary, length non-boundary, different bytes
+  TestArrowBitmapUnpackInt8Unsafe<3, 11>(bitmap, result,
+                                         {0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0});
+
+  // offset non-boundary non-first byte, length boundary, one byte
+  TestArrowBitmapUnpackInt8Unsafe<11, 5>(bitmap, result, {0, 1, 0, 1, 0});
+
+  // offset non-boundary non-first byte, length boundary, different bytes
+  TestArrowBitmapUnpackInt8Unsafe<11, 13>(bitmap, result,
+                                          {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 
1});
+
+  // offset non-boundary non-first byte, length non-boundary, one byte
+  TestArrowBitmapUnpackInt8Unsafe<11, 3>(bitmap, result, {0, 1, 0});
+
+  // offset non-boundary non-first byte, length non-boundary, different bytes
+  TestArrowBitmapUnpackInt8Unsafe<11, 11>(bitmap, result,
+                                          {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 
1});
+
+  // non-boundary, three byte span
+  TestArrowBitmapUnpackInt8Unsafe<7, 11>(bitmap, result,
+                                         {1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1});
+}
+
 TEST(BitmapTest, BitmapTestSetTo) {
   uint8_t bitmap[10];
 
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 9308411..09a80a8 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -716,6 +716,10 @@ static inline ArrowErrorCode ArrowBitmapAppend(struct 
ArrowBitmap* bitmap,
 static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
                                            uint8_t bits_are_set, int64_t 
length);
 
+/// \brief Extract boolean values from a range in a bitmap
+static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t 
start_offset,
+                                               int64_t length, int8_t* out);
+
 /// \brief Append boolean values encoded as int8_t to a bitmap
 ///
 /// The values must all be 0 or 1.

Reply via email to