(arrow-nanoarrow) branch main updated: feat: Add support for appending values to half float `ArrowArray` (#499)

paleolimbot Fri, 31 May 2024 07:55:38 -0700

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git



The following commit(s) were added to refs/heads/main by this push:
     new e6ac17ea feat: Add support for appending values to half float 
`ArrowArray` (#499)
e6ac17ea is described below

commit e6ac17ea66d3ccfc21e481ad54e7e2f75332dc1e
Author: Cocoa <[email protected]>
AuthorDate: Fri May 31 15:54:56 2024 +0100

    feat: Add support for appending values to half float `ArrowArray` (#499)
    
    Hi, thanks for this great library! I found that `ArrowArrayAppendXX`
    functions won't append a value to a half float `ArrowArray` while I was
    playing with this library. So I decided to add this support, which I
    believe it should make this library easier to use.
    
    The conversion function is adapted from Arrow Go,
    https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go,
    and the tests cases are taken from
    https://github.com/apache/arrow/blob/main/go/arrow/float16/float16_test.go.
    
    Although I'm not sure if I should also update the files in `dist` though
    (I guess it might be automated when doing a release but I didn't check).
    I'll be happy to update them if these files are not updated
    automatically, and I'm happy to do any other changes. :)
---
 src/nanoarrow/array_inline.h  | 12 ++++++++++
 src/nanoarrow/array_test.cc   | 41 ++++++++++++++++++++++++++++++++++
 src/nanoarrow/buffer_inline.h | 51 +++++++++++++++++++++++++++++++++++++++++++
 src/nanoarrow/nanoarrow.h     |  6 +++++
 src/nanoarrow/utils_test.cc   | 19 ++++++++++++++++
 5 files changed, 129 insertions(+)

diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 16e4c52e..2f606edf 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -364,6 +364,10 @@ static inline ArrowErrorCode ArrowArrayAppendInt(struct 
ArrowArray* array,
     case NANOARROW_TYPE_FLOAT:
       NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, 
(float)value));
       break;
+    case NANOARROW_TYPE_HALF_FLOAT:
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppendUInt16(data_buffer, 
ArrowFloatToHalfFloat((float)value)));
+      break;
     case NANOARROW_TYPE_BOOL:
       NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
       break;
@@ -414,6 +418,10 @@ static inline ArrowErrorCode ArrowArrayAppendUInt(struct 
ArrowArray* array,
     case NANOARROW_TYPE_FLOAT:
       NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, 
(float)value));
       break;
+    case NANOARROW_TYPE_HALF_FLOAT:
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppendUInt16(data_buffer, 
ArrowFloatToHalfFloat((float)value)));
+      break;
     case NANOARROW_TYPE_BOOL:
       NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
       break;
@@ -443,6 +451,10 @@ static inline ArrowErrorCode ArrowArrayAppendDouble(struct 
ArrowArray* array,
     case NANOARROW_TYPE_FLOAT:
       NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, 
(float)value));
       break;
+    case NANOARROW_TYPE_HALF_FLOAT:
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppendUInt16(data_buffer, 
ArrowFloatToHalfFloat((float)value)));
+      break;
     default:
       return EINVAL;
   }
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index abd0ee6e..d0fc510e 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -787,6 +787,47 @@ TEST(ArrayTest, ArrayTestAppendToFloatArray) {
   EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe(), 
options));
 }
 
+TEST(ArrayTest, ArrayTestAppendToHalfFloatArray) {
+  struct ArrowArray array;
+
+  ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_HALF_FLOAT), 
NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendUInt(&array, 3), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendDouble(&array, 3.14), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendDouble(&array, std::numeric_limits<float>::max()),
+            NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendDouble(&array, NAN), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendDouble(&array, INFINITY), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendDouble(&array, -INFINITY), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendDouble(&array, -1), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendDouble(&array, 0), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+  EXPECT_EQ(array.length, 11);
+  EXPECT_EQ(array.null_count, 2);
+  auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+  auto data_buffer = reinterpret_cast<const uint16_t*>(array.buffers[1]);
+  EXPECT_EQ(validity_buffer[0], 0b11111001);
+  EXPECT_EQ(validity_buffer[1], 0b00000111);
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[0]), 1);
+  EXPECT_EQ(data_buffer[1], 0);
+  EXPECT_EQ(data_buffer[2], 0);
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[3]), 3.0);
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[4]), 3.138672);
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[5]),
+                  std::numeric_limits<float>::max());
+  EXPECT_TRUE(std::isnan(ArrowHalfFloatToFloat(data_buffer[6])));
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[7]), INFINITY);
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[8]), -INFINITY);
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[9]), -1);
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[10]), 0);
+
+  auto arrow_array = ImportArray(&array, float16());
+  ARROW_EXPECT_OK(arrow_array);
+}
+
 TEST(ArrayTest, ArrayTestAppendToBoolArray) {
   struct ArrowArray array;
 
diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index e68de587..4de8fa72 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -80,6 +80,57 @@ static inline int64_t _ArrowGrowByFactor(int64_t 
current_capacity, int64_t new_c
   }
 }
 
+// float to half float conversion, adapted from Arrow Go
+// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go
+static inline uint16_t ArrowFloatToHalfFloat(float value) {
+  union {
+    float f;
+    uint32_t b;
+  } u;
+  u.f = value;
+
+  uint16_t sn = (uint16_t)((u.b >> 31) & 0x1);
+  uint16_t exp = (u.b >> 23) & 0xff;
+  int16_t res = (int16_t)exp - 127 + 15;
+  uint16_t fc = (uint16_t)(u.b >> 13) & 0x3ff;
+
+  if (exp == 0) {
+    res = 0;
+  } else if (exp == 0xff) {
+    res = 0x1f;
+  } else if (res > 0x1e) {
+    res = 0x1f;
+    fc = 0;
+  } else if (res < 0x01) {
+    res = 0;
+    fc = 0;
+  }
+
+  return (uint16_t)((sn << 15) | (uint16_t)(res << 10) | fc);
+}
+
+// half float to float conversion, adapted from Arrow Go
+// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go
+static inline float ArrowHalfFloatToFloat(uint16_t value) {
+  uint32_t sn = (uint32_t)((value >> 15) & 0x1);
+  uint32_t exp = (value >> 10) & 0x1f;
+  uint32_t res = exp + 127 - 15;
+  uint32_t fc = value & 0x3ff;
+
+  if (exp == 0) {
+    res = 0;
+  } else if (exp == 0x1f) {
+    res = 0xff;
+  }
+
+  union {
+    float f;
+    uint32_t b;
+  } u;
+  u.b = (uint32_t)(sn << 31) | (uint32_t)(res << 23) | (uint32_t)(fc << 13);
+  return u.f;
+}
+
 static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
   buffer->data = NULL;
   buffer->size_bytes = 0;
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index b213db47..4831a40a 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -289,6 +289,12 @@ ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* 
decimal,
 ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* 
decimal,
                                                 struct ArrowBuffer* buffer);
 
+/// \brief Get the half float value of a float
+static inline uint16_t ArrowFloatToHalfFloat(float value);
+
+/// \brief Get the float value of a half float
+static inline float ArrowHalfFloatToFloat(uint16_t value);
+
 /// \brief Resolve a chunk index from increasing int64_t offsets
 ///
 /// Given a buffer of increasing int64_t offsets that begin with 0 (e.g., 
offset buffer
diff --git a/src/nanoarrow/utils_test.cc b/src/nanoarrow/utils_test.cc
index e2eef993..7ff55941 100644
--- a/src/nanoarrow/utils_test.cc
+++ b/src/nanoarrow/utils_test.cc
@@ -543,6 +543,25 @@ TEST(DecimalTest, DecimalRoundtripBitshiftTest) {
   ArrowBufferReset(&buffer);
 }
 
+// test case adapted from
+// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16_test.go
+TEST(HalfFloatTest, FloatAndHalfFloatRoundTrip) {
+  uint16_t cases_bits[] = {
+      0x8000,  0x7c00, 0xfc00, 0x3c00, 0x4000, 0xc000,
+      +0x0000, 0x5b8f, 0xdb8f, 0x48c8, 0xc8c8,
+  };
+  float cases_float[] = {
+      -0.0, INFINITY, -INFINITY, 1, 2, -2, 0, 241.875, -241.875, 9.5625, 
-9.5625,
+  };
+
+  for (size_t i = 0; i < sizeof(cases_float) / sizeof(float); i++) {
+    uint16_t bits = ArrowFloatToHalfFloat(cases_float[i]);
+    EXPECT_EQ(bits, cases_bits[i]);
+    float floats = ArrowHalfFloatToFloat(bits);
+    EXPECT_FLOAT_EQ(floats, cases_float[i]);
+  }
+}
+
 TEST(UtilsTest, ArrowResolveChunk64Test) {
   int64_t offsets[] = {0, 2, 3, 6};
   int64_t n_offsets = 4;

(arrow-nanoarrow) branch main updated: feat: Add support for appending values to half float `ArrowArray` (#499)

Reply via email to