Re: [PR] GH-46611: [Python][C++] Allow building float16 arrays without numpy [arrow]

via GitHub Wed, 28 May 2025 07:33:37 -0700


pitrou commented on code in PR #46618:
URL: https://github.com/apache/arrow/pull/46618#discussion_r2112047914



##########
python/pyarrow/src/arrow/python/helpers.cc:
##########
@@ -73,22 +75,27 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) 
{
   }
 }
 
-PyObject* PyHalf_FromHalf(npy_half value) {
-  PyObject* result = PyArrayScalar_New(Half);
-  if (result != NULL) {
-    PyArrayScalar_ASSIGN(result, Half, value);
-  }
+PyObject* PyFloat_FromHalf(uint16_t value) {
+  // Convert the uint16_t Float16 value to a PyFloat object
+  arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
+  PyObject* result = PyFloat_FromDouble(half_val.ToDouble());
   return result;
 }
 
-Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
-  if (PyArray_IsScalar(obj, Half)) {
-    *out = PyArrayScalar_VAL(obj, Half);
-    return Status::OK();
+Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
+  uint16_t value;
+  if (PyFloat_Check(obj)) {
+    // Use Arrow's Float16 implementation instead of NumPy
+    float float_val = static_cast<float>(PyFloat_AsDouble(obj));
+    arrow::util::Float16 half_val = arrow::util::Float16::FromFloat(float_val);
+    value = half_val.bits();
+  } else if (has_numpy() && PyArray_IsScalar(obj, Half)) {
+    value = PyArrayScalar_VAL(obj, Half);
   } else {
-    // XXX: cannot use npy_double_to_half() without linking with Numpy
-    return Status::TypeError("Expected np.float16 instance");
+    return Status::Invalid("Could not convert expected float with type ",

Review Comment:
   Why not `TypeError`?



##########
python/pyarrow/src/arrow/python/helpers.cc:
##########
@@ -73,22 +75,27 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) 
{
   }
 }
 
-PyObject* PyHalf_FromHalf(npy_half value) {
-  PyObject* result = PyArrayScalar_New(Half);
-  if (result != NULL) {
-    PyArrayScalar_ASSIGN(result, Half, value);
-  }
+PyObject* PyFloat_FromHalf(uint16_t value) {
+  // Convert the uint16_t Float16 value to a PyFloat object
+  arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
+  PyObject* result = PyFloat_FromDouble(half_val.ToDouble());
   return result;
 }
 
-Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
-  if (PyArray_IsScalar(obj, Half)) {
-    *out = PyArrayScalar_VAL(obj, Half);
-    return Status::OK();
+Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
+  uint16_t value;

Review Comment:
   You don't really need this if you `return` directly from the computed value 
below.



##########
python/pyarrow/src/arrow/python/helpers.cc:
##########
@@ -73,22 +75,27 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) 
{
   }
 }
 
-PyObject* PyHalf_FromHalf(npy_half value) {
-  PyObject* result = PyArrayScalar_New(Half);
-  if (result != NULL) {
-    PyArrayScalar_ASSIGN(result, Half, value);
-  }
+PyObject* PyFloat_FromHalf(uint16_t value) {
+  // Convert the uint16_t Float16 value to a PyFloat object
+  arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
+  PyObject* result = PyFloat_FromDouble(half_val.ToDouble());
   return result;
 }
 
-Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
-  if (PyArray_IsScalar(obj, Half)) {
-    *out = PyArrayScalar_VAL(obj, Half);
-    return Status::OK();
+Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
+  uint16_t value;
+  if (PyFloat_Check(obj)) {
+    // Use Arrow's Float16 implementation instead of NumPy

Review Comment:
   No need for this comment IMHO.



##########
python/pyarrow/src/arrow/python/helpers.cc:
##########
@@ -73,22 +75,27 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) 
{
   }
 }
 
-PyObject* PyHalf_FromHalf(npy_half value) {
-  PyObject* result = PyArrayScalar_New(Half);
-  if (result != NULL) {
-    PyArrayScalar_ASSIGN(result, Half, value);
-  }
+PyObject* PyFloat_FromHalf(uint16_t value) {
+  // Convert the uint16_t Float16 value to a PyFloat object
+  arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
+  PyObject* result = PyFloat_FromDouble(half_val.ToDouble());
   return result;

Review Comment:
   ```suggestion
     return PyFloat_FromDouble(half_val.ToDouble());
   ```



##########
python/pyarrow/src/arrow/python/type_traits.h:
##########
@@ -87,15 +86,18 @@ NPY_INT_DECL(ULONGLONG, UInt64, uint64_t);
 
 template <>
 struct npy_traits<NPY_FLOAT16> {
-  typedef npy_half value_type;
+  typedef uint16_t value_type;

Review Comment:
   Note this could also be `arrow::util::Float16`, if that's easy to do.



##########
python/pyarrow/src/arrow/python/python_to_arrow.cc:
##########
@@ -227,7 +227,16 @@ class PyValue {
 
   static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
     uint16_t value;

Review Comment:
   Here as well, no need for the intermediate variable.



##########
python/pyarrow/src/arrow/python/helpers.cc:
##########
@@ -73,22 +75,27 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) 
{
   }
 }
 
-PyObject* PyHalf_FromHalf(npy_half value) {
-  PyObject* result = PyArrayScalar_New(Half);
-  if (result != NULL) {
-    PyArrayScalar_ASSIGN(result, Half, value);
-  }
+PyObject* PyFloat_FromHalf(uint16_t value) {
+  // Convert the uint16_t Float16 value to a PyFloat object
+  arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
+  PyObject* result = PyFloat_FromDouble(half_val.ToDouble());
   return result;
 }
 
-Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
-  if (PyArray_IsScalar(obj, Half)) {
-    *out = PyArrayScalar_VAL(obj, Half);
-    return Status::OK();
+Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
+  uint16_t value;
+  if (PyFloat_Check(obj)) {
+    // Use Arrow's Float16 implementation instead of NumPy
+    float float_val = static_cast<float>(PyFloat_AsDouble(obj));
+    arrow::util::Float16 half_val = arrow::util::Float16::FromFloat(float_val);
+    value = half_val.bits();

Review Comment:
   ```suggestion
       return half_val.bits();
   ```



##########
python/pyarrow/src/arrow/python/helpers.h:
##########
@@ -43,11 +41,11 @@ class OwnedRef;
 // \return A shared pointer to DataType
 ARROW_PYTHON_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type 
type);
 
-// \brief Construct a np.float16 object from a npy_half value.
-ARROW_PYTHON_EXPORT PyObject* PyHalf_FromHalf(npy_half value);
+// \brief Construct a Python float object from a uint16_t value.

Review Comment:
   ```suggestion
   // \brief Construct a Python float object from a half-float uint16_t value.
   ```



##########
python/pyarrow/src/arrow/python/python_to_arrow.cc:
##########
@@ -227,7 +227,16 @@ class PyValue {
 
   static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
     uint16_t value;
-    RETURN_NOT_OK(PyFloat_AsHalf(obj, &value));
+    if (internal::PyFloatScalar_Check(obj)) {
+      ARROW_ASSIGN_OR_RAISE(value, PyFloat_AsHalf(obj));

Review Comment:
   ```suggestion
         return PyFloat_AsHalf(obj);
   ```



##########
python/pyarrow/src/arrow/python/helpers.cc:
##########
@@ -73,22 +75,27 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) 
{
   }
 }
 
-PyObject* PyHalf_FromHalf(npy_half value) {
-  PyObject* result = PyArrayScalar_New(Half);
-  if (result != NULL) {
-    PyArrayScalar_ASSIGN(result, Half, value);
-  }
+PyObject* PyFloat_FromHalf(uint16_t value) {
+  // Convert the uint16_t Float16 value to a PyFloat object
+  arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
+  PyObject* result = PyFloat_FromDouble(half_val.ToDouble());
   return result;
 }
 
-Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
-  if (PyArray_IsScalar(obj, Half)) {
-    *out = PyArrayScalar_VAL(obj, Half);
-    return Status::OK();
+Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
+  uint16_t value;
+  if (PyFloat_Check(obj)) {
+    // Use Arrow's Float16 implementation instead of NumPy
+    float float_val = static_cast<float>(PyFloat_AsDouble(obj));
+    arrow::util::Float16 half_val = arrow::util::Float16::FromFloat(float_val);
+    value = half_val.bits();
+  } else if (has_numpy() && PyArray_IsScalar(obj, Half)) {
+    value = PyArrayScalar_VAL(obj, Half);

Review Comment:
   ```suggestion
       return PyArrayScalar_VAL(obj, Half);
   ```



##########
python/pyarrow/src/arrow/python/python_to_arrow.cc:
##########
@@ -227,7 +227,16 @@ class PyValue {
 
   static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
     uint16_t value;
-    RETURN_NOT_OK(PyFloat_AsHalf(obj, &value));
+    if (internal::PyFloatScalar_Check(obj)) {
+      ARROW_ASSIGN_OR_RAISE(value, PyFloat_AsHalf(obj));
+    } else if (internal::PyIntScalar_Check(obj)) {
+      float float_val{};
+      RETURN_NOT_OK(internal::IntegerScalarToFloat32Safe(obj, &float_val));
+      const auto half_val = arrow::util::Float16::FromFloat(float_val);
+      value = half_val.bits();

Review Comment:
   ```suggestion
         return half_val.bits();
   ```



##########
python/pyarrow/src/arrow/python/helpers.cc:
##########
@@ -73,22 +75,27 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) 
{
   }
 }
 
-PyObject* PyHalf_FromHalf(npy_half value) {
-  PyObject* result = PyArrayScalar_New(Half);
-  if (result != NULL) {
-    PyArrayScalar_ASSIGN(result, Half, value);
-  }
+PyObject* PyFloat_FromHalf(uint16_t value) {
+  // Convert the uint16_t Float16 value to a PyFloat object
+  arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
+  PyObject* result = PyFloat_FromDouble(half_val.ToDouble());
   return result;
 }
 
-Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
-  if (PyArray_IsScalar(obj, Half)) {
-    *out = PyArrayScalar_VAL(obj, Half);
-    return Status::OK();
+Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
+  uint16_t value;
+  if (PyFloat_Check(obj)) {
+    // Use Arrow's Float16 implementation instead of NumPy
+    float float_val = static_cast<float>(PyFloat_AsDouble(obj));
+    arrow::util::Float16 half_val = arrow::util::Float16::FromFloat(float_val);
+    value = half_val.bits();
+  } else if (has_numpy() && PyArray_IsScalar(obj, Half)) {
+    value = PyArrayScalar_VAL(obj, Half);
   } else {
-    // XXX: cannot use npy_double_to_half() without linking with Numpy
-    return Status::TypeError("Expected np.float16 instance");
+    return Status::Invalid("Could not convert expected float with type ",

Review Comment:
   Also, let's make the error message a bit better perhaps.
   For example:
   ```
   TypeError: conversion to float16 expects a `float` or `np.float16` object, 
got XXX
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Re: [PR] GH-46611: [Python][C++] Allow building float16 arrays without numpy [arrow]

Reply via email to