paleolimbot commented on code in PR #499: URL: https://github.com/apache/arrow-nanoarrow/pull/499#discussion_r1621083981
########## src/nanoarrow/utils.c: ########## @@ -450,3 +450,32 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decim return NANOARROW_OK; } + +// float to half float conversion, adapted from Arrow Go +// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go +uint16_t ArrowFloatToHalfFloat(float value) { Review Comment: Can/should this be `static inline`? I think we have some decimal helpers that are similar in scope to this and might be a good home. (This will fix the 'namespaced' CI failure, which is complaining because any non static inline functions need an entry at the top of `nanoarrow.h` to apply `NANOARROW_NAMESPACE`, if defined). ########## src/nanoarrow/buffer_inline.h: ########## @@ -226,6 +226,12 @@ static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, return ArrowBufferAppend(buffer, &value, sizeof(float)); } +static inline ArrowErrorCode ArrowBufferAppendHalfFloat(struct ArrowBuffer* buffer, + float value) { + uint16_t half_value = ArrowFloatToHalfFloat(value); + return ArrowBufferAppend(buffer, &half_value, sizeof(uint16_t)); +} Review Comment: Do we need this helper? If I'm reading this correctly, the workaround is fairly compact (`ArrowBufferAppendUint16(ArrowFloatToHalfFloat(value))`). ########## src/nanoarrow/utils.c: ########## @@ -450,3 +450,32 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decim return NANOARROW_OK; } + +// float to half float conversion, adapted from Arrow Go +// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go +uint16_t ArrowFloatToHalfFloat(float value) { + union { + float f; + uint32_t b; + } u; + u.f = value; + + uint16_t sn = (u.b >> 31) & 0x1; + uint16_t exp = (u.b >> 23) & 0xff; + uint16_t res = (int16_t)exp - 127 + 15; + uint16_t fc = (uint16_t)(u.b >> 13) & 0x3ff; + + if (exp == 0) { + res = 0; + } else if (exp == 0xff) { + res = 0x1f; + } else if (res > 0x1e) { + res = 0x1f; + fc = 0; + } else if (res < 0x01) { + res = 0; + fc = 0; + } + + return (uint16_t)((sn << 15) | (uint16_t)(res << 10) | fc); Review Comment: @zeroshade Could you take a look at this or suggest someone who can? (I don't have any experience with float16). ########## src/nanoarrow/array_test.cc: ########## @@ -787,6 +787,43 @@ TEST(ArrayTest, ArrayTestAppendToFloatArray) { EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe(), options)); } +TEST(ArrayTest, ArrayTestAppendToHalfFloatArray) { + struct ArrowArray array; + + ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_HALF_FLOAT), NANOARROW_OK); + EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendUInt(&array, 3), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendDouble(&array, 3.14), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendDouble(&array, std::numeric_limits<float>::max()), + NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendDouble(&array, NAN), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendDouble(&array, INFINITY), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendDouble(&array, -INFINITY), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendDouble(&array, -1), NANOARROW_OK); + EXPECT_EQ(ArrowArrayAppendDouble(&array, 0), NANOARROW_OK); + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK); + + EXPECT_EQ(array.length, 11); + EXPECT_EQ(array.null_count, 2); + auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]); + auto data_buffer = reinterpret_cast<const uint16_t*>(array.buffers[1]); + EXPECT_EQ(validity_buffer[0], 0b11111001); + EXPECT_EQ(validity_buffer[1], 0b00000111); + EXPECT_EQ(data_buffer[0], 15360); + EXPECT_EQ(data_buffer[1], 0); + EXPECT_EQ(data_buffer[2], 0); + EXPECT_EQ(data_buffer[3], 16896); + EXPECT_EQ(data_buffer[4], 16967); + EXPECT_EQ(data_buffer[5], 31744); + EXPECT_EQ(data_buffer[6], 32256); + EXPECT_EQ(data_buffer[7], 31744); + EXPECT_EQ(data_buffer[8], 64512); + EXPECT_EQ(data_buffer[9], 48128); + EXPECT_EQ(data_buffer[10], 0); Review Comment: Can we add a note about how to generate these values? (Perhaps numpy can do this?) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
