This is an automated email from the ASF dual-hosted git repository. apitrou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push: new b062998fda GH-46860: [C++] Making HalfFloatBuilder accept Float16 as well as uint16_t (#46981) b062998fda is described below commit b062998fdae7877686cd25b9f4d56db6dbff65d5 Author: Eric Dinse <di...@users.noreply.github.com> AuthorDate: Tue Jul 15 12:27:15 2025 -0400 GH-46860: [C++] Making HalfFloatBuilder accept Float16 as well as uint16_t (#46981) ### Rationale for this change #46860 Adding convenience methods for appending and retrieving Float16 to HalfFloatBuilder. ### What changes are included in this PR? HalfFloatBuilder has functions overloaded to accept Float16, tests, and documentation. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #46860 Lead-authored-by: Eric Dinse <293818+di...@users.noreply.github.com> Co-authored-by: Antoine Pitrou <anto...@python.org> Signed-off-by: Antoine Pitrou <anto...@python.org> --- cpp/src/arrow/array/array_test.cc | 72 ++++++++++++++++++++++ cpp/src/arrow/array/builder_primitive.h | 103 +++++++++++++++++++++++++++++++- cpp/src/arrow/type_fwd.h | 7 ++- cpp/src/arrow/util/float16.h | 1 + 4 files changed, 181 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 0dd75b01f6..c13377d3c3 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -38,6 +38,7 @@ #include "arrow/array/builder_binary.h" #include "arrow/array/builder_decimal.h" #include "arrow/array/builder_dict.h" +#include "arrow/array/builder_primitive.h" #include "arrow/array/builder_run_end.h" #include "arrow/array/builder_time.h" #include "arrow/array/data.h" @@ -60,6 +61,7 @@ #include "arrow/util/bitmap_builders.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" +#include "arrow/util/float16.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/macros.h" #include "arrow/util/range.h" @@ -72,6 +74,7 @@ namespace arrow { using internal::checked_cast; using internal::checked_pointer_cast; +using util::Float16; class TestArray : public ::testing::Test { public: @@ -4099,4 +4102,73 @@ TYPED_TEST(TestPrimitiveArray, IndexOperator) { } } +class TestHalfFloatBuilder : public ::testing::Test { + public: + void VerifyValue(const HalfFloatBuilder& builder, int64_t index, float expected) { + ASSERT_EQ(builder.GetValue(index), Float16(expected).bits()); + ASSERT_EQ(builder.GetValue<Float16>(index), Float16(expected)); + ASSERT_EQ(builder.GetValue<uint16_t>(index), Float16(expected).bits()); + ASSERT_EQ(builder[index], Float16(expected).bits()); + } +}; + +TEST_F(TestHalfFloatBuilder, TestAppend) { + HalfFloatBuilder builder; + ASSERT_OK(builder.Append(Float16(0.0f))); + ASSERT_OK(builder.Append(Float16(1.0f).bits())); + ASSERT_OK(builder.AppendNull()); + ASSERT_OK(builder.Reserve(3)); + builder.UnsafeAppend(Float16(3.0f)); + builder.UnsafeAppend(Float16(4.0f).bits()); + builder.UnsafeAppend(uint16_t{15872}); // 1.5f + + VerifyValue(builder, 0, 0.0f); + VerifyValue(builder, 1, 1.0f); + VerifyValue(builder, 3, 3.0f); + VerifyValue(builder, 4, 4.0f); + VerifyValue(builder, 5, 1.5f); +} + +TEST_F(TestHalfFloatBuilder, TestBulkAppend) { + HalfFloatBuilder builder; + + ASSERT_OK(builder.AppendValues(5, Float16(1.5))); + uint16_t val = Float16(2.0f).bits(); + ASSERT_OK(builder.AppendValues({val, val, val, val}, {0, 1, 0, 1})); + ASSERT_EQ(builder.length(), 9); + for (int i = 0; i < 5; i++) { + VerifyValue(builder, i, 1.5f); + } + + { + ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); + ASSERT_OK(array->ValidateFull()); + ASSERT_EQ(array->null_count(), 2); + ASSERT_EQ(array->length(), 9); + auto comp = ArrayFromJSON(float16(), "[1.5,1.5,1.5,1.5,1.5,null,2,null,2]"); + AssertArraysEqual(*array, *comp); + } + + std::vector<Float16> vals = {Float16(1.0f), Float16(2.0f), Float16(3.0f)}; + std::vector<bool> is_valid = {true, false, true}; + std::vector<uint8_t> valid_bytes = {1, 0, 1}; + std::vector<uint8_t> bitmap = {0b00000101}; + ASSERT_OK(builder.AppendValues(vals)); + ASSERT_OK(builder.AppendValues(vals, is_valid)); + ASSERT_OK(builder.AppendValues(vals.data(), vals.size(), is_valid)); + ASSERT_OK(builder.AppendValues(vals.data(), vals.size())); + ASSERT_OK(builder.AppendValues(vals.data(), vals.size(), valid_bytes.data())); + ASSERT_OK(builder.AppendValues(vals.data(), vals.size(), bitmap.data(), 0)); + + { + ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); + ASSERT_OK(array->ValidateFull()); + ASSERT_EQ(array->null_count(), 4); + ASSERT_EQ(array->length(), 18); + auto comp = + ArrayFromJSON(float16(), "[1,2,3,1,null,3,1,null,3,1,2,3,1,null,3,1,null,3]"); + AssertArraysEqual(*array, *comp); + } +} + } // namespace arrow diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h index 8ee3c260d3..6d79d6e964 100644 --- a/cpp/src/arrow/array/builder_primitive.h +++ b/cpp/src/arrow/array/builder_primitive.h @@ -26,6 +26,7 @@ #include "arrow/result.h" #include "arrow/type.h" #include "arrow/type_traits.h" +#include "arrow/util/float16.h" namespace arrow { @@ -364,7 +365,6 @@ using Int16Builder = NumericBuilder<Int16Type>; using Int32Builder = NumericBuilder<Int32Type>; using Int64Builder = NumericBuilder<Int64Type>; -using HalfFloatBuilder = NumericBuilder<HalfFloatType>; using FloatBuilder = NumericBuilder<FloatType>; using DoubleBuilder = NumericBuilder<DoubleType>; @@ -384,6 +384,107 @@ using DurationBuilder = NumericBuilder<DurationType>; /// @} +/// \addtogroup numeric-builders +/// +/// @{ + +class ARROW_EXPORT HalfFloatBuilder : public NumericBuilder<HalfFloatType> { + public: + using BaseClass = NumericBuilder<HalfFloatType>; + using Float16 = arrow::util::Float16; + + using BaseClass::Append; + using BaseClass::AppendValues; + using BaseClass::BaseClass; + using BaseClass::GetValue; + using BaseClass::UnsafeAppend; + + /// Scalar append a arrow::util::Float16 + Status Append(const Float16 val) { return Append(val.bits()); } + + /// Scalar append a arrow::util::Float16, without checking for capacity + void UnsafeAppend(const Float16 val) { UnsafeAppend(val.bits()); } + + /// \brief Append a sequence of elements in one shot + /// \param[in] values a contiguous array of arrow::util::Float16 + /// \param[in] length the number of values to append + /// \param[in] valid_bytes an optional sequence of bytes where non-zero + /// indicates a valid (non-null) value + /// \return Status + Status AppendValues(const Float16* values, int64_t length, + const uint8_t* valid_bytes = NULLPTR) { + return BaseClass::AppendValues(reinterpret_cast<const uint16_t*>(values), length, + valid_bytes); + } + + /// \brief Append a sequence of elements in one shot + /// \param[in] values a contiguous array of arrow::util::Float16 + /// \param[in] length the number of values to append + /// \param[in] bitmap a validity bitmap to copy (may be null) + /// \param[in] bitmap_offset an offset into the validity bitmap + /// \return Status + Status AppendValues(const Float16* values, int64_t length, const uint8_t* bitmap, + int64_t bitmap_offset) { + return BaseClass::AppendValues(reinterpret_cast<const uint16_t*>(values), length, + bitmap, bitmap_offset); + } + + /// \brief Append a sequence of elements in one shot + /// \param[in] values a contiguous array of arrow::util::Float16 + /// \param[in] length the number of values to append + /// \param[in] is_valid a std::vector<bool> indicating valid (1) or null + /// (0). Equal in length to values + /// \return Status + Status AppendValues(const Float16* values, int64_t length, + const std::vector<bool>& is_valid) { + return BaseClass::AppendValues(reinterpret_cast<const uint16_t*>(values), length, + is_valid); + } + + /// \brief Append a sequence of elements in one shot + /// \param[in] values a std::vector<arrow::util::Float16> + /// \param[in] is_valid a std::vector<bool> indicating valid (1) or null + /// (0). Equal in length to values + /// \return Status + Status AppendValues(const std::vector<Float16>& values, + const std::vector<bool>& is_valid) { + return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid); + } + + /// \brief Append a sequence of elements in one shot + /// \param[in] values a std::vector<arrow::util::Float16> + /// \return Status + Status AppendValues(const std::vector<Float16>& values) { + return AppendValues(values.data(), static_cast<int64_t>(values.size())); + } + + /// \brief Append one value many times in one shot + /// \param[in] length the number of values to append + /// \param[in] value a arrow::util::Float16 + Status AppendValues(int64_t length, Float16 value) { + RETURN_NOT_OK(Reserve(length)); + data_builder_.UnsafeAppend(length, value.bits()); + ArrayBuilder::UnsafeSetNotNull(length); + return Status::OK(); + } + + /// \brief Get the value at a certain index + /// \param[in] index the zero-based index + /// @tparam T arrow::util::Float16 or value_type (uint16_t) + template <typename T = BaseClass::value_type> + T GetValue(int64_t index) const { + static_assert(std::is_same_v<T, BaseClass::value_type> || + std::is_same_v<T, arrow::util::Float16>); + if constexpr (std::is_same_v<T, BaseClass::value_type>) { + return BaseClass::GetValue(index); + } else { + return Float16::FromBits(BaseClass::GetValue(index)); + } + } +}; + +/// @} + class ARROW_EXPORT BooleanBuilder : public ArrayBuilder, public internal::ArrayBuilderExtraOps<BooleanBuilder, bool> { diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h index 5a2fbde023..dc290cd327 100644 --- a/cpp/src/arrow/type_fwd.h +++ b/cpp/src/arrow/type_fwd.h @@ -242,12 +242,17 @@ _NUMERIC_TYPE_DECL(UInt8) _NUMERIC_TYPE_DECL(UInt16) _NUMERIC_TYPE_DECL(UInt32) _NUMERIC_TYPE_DECL(UInt64) -_NUMERIC_TYPE_DECL(HalfFloat) _NUMERIC_TYPE_DECL(Float) _NUMERIC_TYPE_DECL(Double) #undef _NUMERIC_TYPE_DECL +class HalfFloatType; +using HalfFloatArray = NumericArray<HalfFloatType>; +class HalfFloatBuilder; +struct HalfFloatScalar; +using HalfFloatTensor = NumericTensor<HalfFloatType>; + enum class DateUnit : char { DAY = 0, MILLI = 1 }; class DateType; diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h index feead50b1a..b52145cdc0 100644 --- a/cpp/src/arrow/util/float16.h +++ b/cpp/src/arrow/util/float16.h @@ -178,6 +178,7 @@ class ARROW_EXPORT Float16 { } }; +static_assert(std::is_standard_layout_v<Float16>); static_assert(std::is_trivial_v<Float16>); static_assert(sizeof(Float16) == sizeof(uint16_t));