benibus commented on code in PR #36073: URL: https://github.com/apache/arrow/pull/36073#discussion_r1333492541
########## cpp/src/arrow/util/float16.h: ########## @@ -0,0 +1,207 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <array> +#include <cstdint> +#include <cstring> +#include <iosfwd> +#include <limits> +#include <type_traits> + +#include "arrow/util/endian.h" +#include "arrow/util/macros.h" +#include "arrow/util/ubsan.h" +#include "arrow/util/visibility.h" + +namespace arrow { +namespace util { + +/// \brief Class representing an IEEE half-precision float, encoded as a `uint16_t` +/// +/// The exact format is as follows (from LSB to MSB): +/// - bits 0-10: mantissa +/// - bits 10-15: exponent +/// - bit 15: sign +/// +class ARROW_EXPORT Float16 { + public: + Float16() = default; + constexpr explicit Float16(uint16_t value) : value_(value) {} + + template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>>* = NULLPTR> + explicit Float16(T f) : Float16(FromNative(f)) {} + + /// \brief Create a `Float16` from a 32-bit float (may lose precision) + static Float16 FromFloat(float f); + /// \brief Create a `Float16` from a 64-bit float (may lose precision) + static Float16 FromDouble(double d); + /// \brief Create a `Float16` from a native floating-point value (may lose precision) + static Float16 FromNative(float f) { return FromFloat(f); } + static Float16 FromNative(double d) { return FromDouble(d); } + + /// \brief Read a `Float16` from memory in native-endian byte order + static Float16 FromBytes(const uint8_t* src) { + return Float16(SafeLoadAs<uint16_t>(src)); + } + + /// \brief Read a `Float16` from memory in little-endian byte order + static Float16 FromLittleEndian(const uint8_t* src) { + return Float16(::arrow::bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src))); + } + + /// \brief Read a `Float16` from memory in big-endian byte order + static Float16 FromBigEndian(const uint8_t* src) { + return Float16(::arrow::bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src))); + } + + /// \brief Return the value's integer representation + constexpr uint16_t bits() const { return value_; } + + /// \brief Return true if the value is negative (sign bit is set) + constexpr bool signbit() const { return (value_ & 0x8000) != 0; } + + /// \brief Return true if the value is NaN + constexpr bool is_nan() const { + return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0; + } + /// \brief Return true if the value is positive/negative infinity + constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; } + /// \brief Return true if the value is finite and not NaN + constexpr bool is_finite() const { return (value_ & 0x7c00) != 0x7c00; } + /// \brief Return true if the value is positive/negative zero + constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; } + + /// \brief Convert to a 32-bit float + float ToFloat() const; + /// \brief Convert to a 64-bit float + double ToDouble() const; + + explicit operator float() const { return ToFloat(); } + explicit operator double() const { return ToDouble(); } + + /// \brief Copy the value's bytes in native-endian byte order + void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); } + /// \brief Return the value's bytes in native-endian byte order + constexpr std::array<uint8_t, 2> ToBytes() const { +#if ARROW_LITTLE_ENDIAN + return ToLittleEndian(); +#else + return ToBigEndian(); +#endif + } + + /// \brief Copy the value's bytes in little-endian byte order + void ToLittleEndian(uint8_t* dest) const { + Float16{::arrow::bit_util::ToLittleEndian(value_)}.ToBytes(dest); + } + /// \brief Return the value's bytes in little-endian byte order + constexpr std::array<uint8_t, 2> ToLittleEndian() const { +#if ARROW_LITTLE_ENDIAN + return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)}; +#else + return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)}; +#endif + } + + /// \brief Copy the value's bytes in big-endian byte order + void ToBigEndian(uint8_t* dest) const { + Float16{::arrow::bit_util::ToBigEndian(value_)}.ToBytes(dest); + } + /// \brief Return the value's bytes in big-endian byte order + constexpr std::array<uint8_t, 2> ToBigEndian() const { +#if ARROW_LITTLE_ENDIAN + return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)}; +#else + return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)}; +#endif + } + + constexpr Float16 operator-() const { return Float16(value_ ^ 0x8000); } + constexpr Float16 operator+() const { return Float16(value_); } + + friend constexpr bool operator==(Float16 lhs, Float16 rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return Float16::CompareEq(lhs, rhs); + } + friend constexpr bool operator!=(Float16 lhs, Float16 rhs) { return !(lhs == rhs); } + + friend constexpr bool operator<(Float16 lhs, Float16 rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return Float16::CompareLt(lhs, rhs); + } + friend constexpr bool operator>(Float16 lhs, Float16 rhs) { return rhs < lhs; } + + friend constexpr bool operator<=(Float16 lhs, Float16 rhs) { + if (lhs.is_nan() || rhs.is_nan()) return false; + return !Float16::CompareLt(rhs, lhs); + } + friend constexpr bool operator>=(Float16 lhs, Float16 rhs) { return rhs <= lhs; } + + ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg); + + protected: + uint16_t value_; Review Comment: Hmm... I could go either way, but I'm leaning towards keeping it as is - mostly because it mirrors the properties of the native FP types, which seems to be something we're (at least implicitly) aspiring to do here. Plus, if we decide that the compiler/stl optimizations aren't worth the risk, then we could add the default initializer later without as much consequence as the reverse. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
