bkietz commented on code in PR #36073: URL: https://github.com/apache/arrow/pull/36073#discussion_r1307662503
########## cpp/src/arrow/util/float16.h: ########## @@ -0,0 +1,192 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <array> +#include <cstdint> +#include <cstring> +#include <iosfwd> +#include <limits> +#include <type_traits> + +#include "arrow/util/endian.h" +#include "arrow/util/ubsan.h" +#include "arrow/util/visibility.h" + +namespace arrow { +namespace util { + +/// \brief Class representing an IEEE half-precision float, encoded as a `uint16_t` +/// +/// The exact format is as follows (from LSB to MSB): +/// - bits 0-10: mantissa +/// - bits 10-15: exponent +/// - bit 15: sign +/// +class ARROW_EXPORT Float16 { + public: + Float16() = default; + constexpr explicit Float16(uint16_t value) : value_(value) {} + + /// \brief Create a `Float16` from a 32-bit float (may lose precision) + static Float16 FromFloat(float f); + + /// \brief Read a `Float16` from memory in native-endian byte order + static Float16 FromBytes(const uint8_t* src) { + return Float16(SafeLoadAs<uint16_t>(src)); + } + + /// \brief Read a `Float16` from memory in little-endian byte order + static Float16 FromLittleEndian(const uint8_t* src) { + return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src))); + } + + /// \brief Read a `Float16` from memory in big-endian byte order + static Float16 FromBigEndian(const uint8_t* src) { + return Float16(bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src))); + } + + /// \brief Return the value's integer representation + constexpr uint16_t bits() const { return value_; } + constexpr explicit operator uint16_t() const { return bits(); } + + /// \brief Return true if the value is negative (sign bit is set) + constexpr bool signbit() const { return (value_ & 0x8000) != 0; } + + /// \brief Return true if the value is NaN + constexpr bool is_nan() const { + return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0; + } + /// \brief Return true if the value is positive/negative infinity + constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; } + /// \brief Return true if the value is positive/negative zero + constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } + + /// \brief Convert to a 32-bit float + float ToFloat() const; + + /// \brief Copy the value's bytes in native-endian byte order + void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } + /// \brief Return the value's bytes in native-endian byte order + constexpr std::array<uint8_t, 2> ToBytes() const { +#if ARROW_LITTLE_ENDIAN + return ToLittleEndian(); +#else + return ToBigEndian(); +#endif + } + + /// \brief Copy the value's bytes in little-endian byte order + void ToLittleEndian(uint8_t* dest) const { + Float16{bit_util::ToLittleEndian(value_)}.ToBytes(dest); + } + /// \brief Return the value's bytes in little-endian byte order + constexpr std::array<uint8_t, 2> ToLittleEndian() const { +#if ARROW_LITTLE_ENDIAN + return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)}; +#else + return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)}; +#endif + } + + /// \brief Copy the value's bytes in big-endian byte order + void ToBigEndian(uint8_t* dest) const { + Float16{bit_util::ToBigEndian(value_)}.ToBytes(dest); + } + /// \brief Return the value's bytes in big-endian byte order + constexpr std::array<uint8_t, 2> ToBigEndian() const { +#if ARROW_LITTLE_ENDIAN + return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)}; +#else + return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)}; +#endif + } + + constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } + constexpr Float16 operator+() const { return Float16(value_); } + + friend constexpr bool operator==(Float16 lhs, Float16 rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return Float16::CompareEq(lhs, rhs); + } + friend constexpr bool operator!=(Float16 lhs, Float16 rhs) { return !(lhs == rhs); } + + friend constexpr bool operator<(Float16 lhs, Float16 rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return Float16::CompareLt(lhs, rhs); + } + friend constexpr bool operator>(Float16 lhs, Float16 rhs) { return rhs < lhs; } + + friend constexpr bool operator<=(Float16 lhs, Float16 rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return !Float16::CompareLt(rhs, lhs); + } + friend constexpr bool operator>=(Float16 lhs, Float16 rhs) { return rhs <= lhs; } + + ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg); + + protected: + uint16_t value_; + + private: + // Comparison helpers that assume neither operand is NaN + static constexpr bool CompareEq(Float16 lhs, Float16 rhs) { + return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero()); + } + static constexpr bool CompareLt(Float16 lhs, Float16 rhs) { + if (lhs.signbit()) { + if (rhs.signbit()) { + // Both are negative + return lhs.bits() > rhs.bits(); + } else { + // Handle +/-0 + return !lhs.is_zero() || rhs.bits() != 0; + } + } else if (rhs.signbit()) { + return false; + } else { + // Both are positive + return lhs.bits() < rhs.bits(); + } + } +}; + +static_assert(std::is_trivial_v<Float16>); + +} // namespace util +} // namespace arrow + +// TODO: Not complete Review Comment: Dropping overloads and specializations into std is UB except where specifically allowed, and I don't see that noted for [isnan](https://eel.is/c++draft/cmath.syn). We could get around that a little by having `isnan.h`: ```c++ #include <cmath> #include "arrow/util/float16.h" namespace arrow::util { using std::isnan; constexpr bool isnan(Float16 f) { return f.isnan(); } } ``` ... I'm not sure if that's worthwhile though; we'd only get any benefit out of that when we need to call isnan and we happen to be in a sufficiently generic context that we don't wish to have explicitly different code for `float` and `Float16`. I'd guess it's not worth the trouble -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
