This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new e6ac17ea feat: Add support for appending values to half float
`ArrowArray` (#499)
e6ac17ea is described below
commit e6ac17ea66d3ccfc21e481ad54e7e2f75332dc1e
Author: Cocoa <[email protected]>
AuthorDate: Fri May 31 15:54:56 2024 +0100
feat: Add support for appending values to half float `ArrowArray` (#499)
Hi, thanks for this great library! I found that `ArrowArrayAppendXX`
functions won't append a value to a half float `ArrowArray` while I was
playing with this library. So I decided to add this support, which I
believe it should make this library easier to use.
The conversion function is adapted from Arrow Go,
https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go,
and the tests cases are taken from
https://github.com/apache/arrow/blob/main/go/arrow/float16/float16_test.go.
Although I'm not sure if I should also update the files in `dist` though
(I guess it might be automated when doing a release but I didn't check).
I'll be happy to update them if these files are not updated
automatically, and I'm happy to do any other changes. :)
---
src/nanoarrow/array_inline.h | 12 ++++++++++
src/nanoarrow/array_test.cc | 41 ++++++++++++++++++++++++++++++++++
src/nanoarrow/buffer_inline.h | 51 +++++++++++++++++++++++++++++++++++++++++++
src/nanoarrow/nanoarrow.h | 6 +++++
src/nanoarrow/utils_test.cc | 19 ++++++++++++++++
5 files changed, 129 insertions(+)
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 16e4c52e..2f606edf 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -364,6 +364,10 @@ static inline ArrowErrorCode ArrowArrayAppendInt(struct
ArrowArray* array,
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer,
(float)value));
break;
+ case NANOARROW_TYPE_HALF_FLOAT:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendUInt16(data_buffer,
ArrowFloatToHalfFloat((float)value)));
+ break;
case NANOARROW_TYPE_BOOL:
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
break;
@@ -414,6 +418,10 @@ static inline ArrowErrorCode ArrowArrayAppendUInt(struct
ArrowArray* array,
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer,
(float)value));
break;
+ case NANOARROW_TYPE_HALF_FLOAT:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendUInt16(data_buffer,
ArrowFloatToHalfFloat((float)value)));
+ break;
case NANOARROW_TYPE_BOOL:
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
break;
@@ -443,6 +451,10 @@ static inline ArrowErrorCode ArrowArrayAppendDouble(struct
ArrowArray* array,
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer,
(float)value));
break;
+ case NANOARROW_TYPE_HALF_FLOAT:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendUInt16(data_buffer,
ArrowFloatToHalfFloat((float)value)));
+ break;
default:
return EINVAL;
}
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index abd0ee6e..d0fc510e 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -787,6 +787,47 @@ TEST(ArrayTest, ArrayTestAppendToFloatArray) {
EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe(),
options));
}
+TEST(ArrayTest, ArrayTestAppendToHalfFloatArray) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_HALF_FLOAT),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 3), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, 3.14), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, std::numeric_limits<float>::max()),
+ NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, NAN), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, INFINITY), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, -INFINITY), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, -1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, 0), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 11);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const uint16_t*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0b11111001);
+ EXPECT_EQ(validity_buffer[1], 0b00000111);
+ EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[0]), 1);
+ EXPECT_EQ(data_buffer[1], 0);
+ EXPECT_EQ(data_buffer[2], 0);
+ EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[3]), 3.0);
+ EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[4]), 3.138672);
+ EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[5]),
+ std::numeric_limits<float>::max());
+ EXPECT_TRUE(std::isnan(ArrowHalfFloatToFloat(data_buffer[6])));
+ EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[7]), INFINITY);
+ EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[8]), -INFINITY);
+ EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[9]), -1);
+ EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[10]), 0);
+
+ auto arrow_array = ImportArray(&array, float16());
+ ARROW_EXPECT_OK(arrow_array);
+}
+
TEST(ArrayTest, ArrayTestAppendToBoolArray) {
struct ArrowArray array;
diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index e68de587..4de8fa72 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -80,6 +80,57 @@ static inline int64_t _ArrowGrowByFactor(int64_t
current_capacity, int64_t new_c
}
}
+// float to half float conversion, adapted from Arrow Go
+// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go
+static inline uint16_t ArrowFloatToHalfFloat(float value) {
+ union {
+ float f;
+ uint32_t b;
+ } u;
+ u.f = value;
+
+ uint16_t sn = (uint16_t)((u.b >> 31) & 0x1);
+ uint16_t exp = (u.b >> 23) & 0xff;
+ int16_t res = (int16_t)exp - 127 + 15;
+ uint16_t fc = (uint16_t)(u.b >> 13) & 0x3ff;
+
+ if (exp == 0) {
+ res = 0;
+ } else if (exp == 0xff) {
+ res = 0x1f;
+ } else if (res > 0x1e) {
+ res = 0x1f;
+ fc = 0;
+ } else if (res < 0x01) {
+ res = 0;
+ fc = 0;
+ }
+
+ return (uint16_t)((sn << 15) | (uint16_t)(res << 10) | fc);
+}
+
+// half float to float conversion, adapted from Arrow Go
+// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go
+static inline float ArrowHalfFloatToFloat(uint16_t value) {
+ uint32_t sn = (uint32_t)((value >> 15) & 0x1);
+ uint32_t exp = (value >> 10) & 0x1f;
+ uint32_t res = exp + 127 - 15;
+ uint32_t fc = value & 0x3ff;
+
+ if (exp == 0) {
+ res = 0;
+ } else if (exp == 0x1f) {
+ res = 0xff;
+ }
+
+ union {
+ float f;
+ uint32_t b;
+ } u;
+ u.b = (uint32_t)(sn << 31) | (uint32_t)(res << 23) | (uint32_t)(fc << 13);
+ return u.f;
+}
+
static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
buffer->data = NULL;
buffer->size_bytes = 0;
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index b213db47..4831a40a 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -289,6 +289,12 @@ ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal*
decimal,
ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal*
decimal,
struct ArrowBuffer* buffer);
+/// \brief Get the half float value of a float
+static inline uint16_t ArrowFloatToHalfFloat(float value);
+
+/// \brief Get the float value of a half float
+static inline float ArrowHalfFloatToFloat(uint16_t value);
+
/// \brief Resolve a chunk index from increasing int64_t offsets
///
/// Given a buffer of increasing int64_t offsets that begin with 0 (e.g.,
offset buffer
diff --git a/src/nanoarrow/utils_test.cc b/src/nanoarrow/utils_test.cc
index e2eef993..7ff55941 100644
--- a/src/nanoarrow/utils_test.cc
+++ b/src/nanoarrow/utils_test.cc
@@ -543,6 +543,25 @@ TEST(DecimalTest, DecimalRoundtripBitshiftTest) {
ArrowBufferReset(&buffer);
}
+// test case adapted from
+// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16_test.go
+TEST(HalfFloatTest, FloatAndHalfFloatRoundTrip) {
+ uint16_t cases_bits[] = {
+ 0x8000, 0x7c00, 0xfc00, 0x3c00, 0x4000, 0xc000,
+ +0x0000, 0x5b8f, 0xdb8f, 0x48c8, 0xc8c8,
+ };
+ float cases_float[] = {
+ -0.0, INFINITY, -INFINITY, 1, 2, -2, 0, 241.875, -241.875, 9.5625,
-9.5625,
+ };
+
+ for (size_t i = 0; i < sizeof(cases_float) / sizeof(float); i++) {
+ uint16_t bits = ArrowFloatToHalfFloat(cases_float[i]);
+ EXPECT_EQ(bits, cases_bits[i]);
+ float floats = ArrowHalfFloatToFloat(bits);
+ EXPECT_FLOAT_EQ(floats, cases_float[i]);
+ }
+}
+
TEST(UtilsTest, ArrowResolveChunk64Test) {
int64_t offsets[] = {0, 2, 3, 6};
int64_t n_offsets = 4;