This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c0d5a596f3 GH-49003: [C++] Don't consider `out_of_range` an error in 
float parsing (#49095)
c0d5a596f3 is described below

commit c0d5a596f300e25e194343af3766f4ca0e746300
Author: Álvaro Kothe <[email protected]>
AuthorDate: Tue Feb 3 14:59:44 2026 -0300

    GH-49003: [C++] Don't consider `out_of_range` an error in float parsing 
(#49095)
    
    ### Rationale for this change
    This PR restores the behavior previous to version 23 for floating-point 
parsing on overflow and subnormal.
    
    `fast_float` didn't assign an error code on overflow in version `3.10.1` 
and assigned `±Inf` on overflow and `0.0` on subnormal. With the update to 
version `8.1`, it started to assign `std::errc::result_out_of_range` in such 
cases.
    
    ### What changes are included in this PR?
    Ignores `std::errc::result_out_of_range` and produce `±Inf` / `0.0` as 
appropriate instead of failing the conversion.
    
    ### Are these changes tested?
    Yes. Created tests for overflow with positive and negative signed mantissa, 
and also created tests for subnormal, all of them for binary{16,32,64}.
    
    ### Are there any user-facing changes?
    It's a user facing change. The CSV reader on version `libarrow==23` was 
assigning them as strings, while before it was parsing it as `0` or `+- inf`.
    
    With this patch, the CSV reader in PyArrow outputs:
    
    ```python
    >>> import pyarrow
    >>> import pyarrow.csv
    >>> import io
    >>> table = 
pyarrow.csv.read_csv(io.BytesIO(f"data\n10E-617\n10E617\n-10E617".encode()))
    >>> print(table)
    pyarrow.Table
    data: double
    ----
    data: [[0,inf,-inf]]
    ```
    
    Closes #49003
    
    * GitHub Issue: #49003
    
    Authored-by: Alvaro-Kothe <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/util/value_parsing.cc      | 15 ++++++++++++---
 cpp/src/arrow/util/value_parsing_test.cc | 12 ++++++++++++
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/util/value_parsing.cc 
b/cpp/src/arrow/util/value_parsing.cc
index 1a8e8066d7..0cc71f276d 100644
--- a/cpp/src/arrow/util/value_parsing.cc
+++ b/cpp/src/arrow/util/value_parsing.cc
@@ -35,7 +35,10 @@ bool StringToFloat(const char* s, size_t length, char 
decimal_point, float* out)
       ::arrow_vendored::fast_float::chars_format::general, decimal_point};
   const auto res =
       ::arrow_vendored::fast_float::from_chars_advanced(s, s + length, *out, 
options);
-  return res.ec == std::errc() && res.ptr == s + length;
+  const bool is_valid_number =
+      res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+  const bool consumed_entire_string = res.ptr == s + length;
+  return is_valid_number && consumed_entire_string;
 }
 
 bool StringToFloat(const char* s, size_t length, char decimal_point, double* 
out) {
@@ -43,7 +46,10 @@ bool StringToFloat(const char* s, size_t length, char 
decimal_point, double* out
       ::arrow_vendored::fast_float::chars_format::general, decimal_point};
   const auto res =
       ::arrow_vendored::fast_float::from_chars_advanced(s, s + length, *out, 
options);
-  return res.ec == std::errc() && res.ptr == s + length;
+  const bool is_valid_number =
+      res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+  const bool consumed_entire_string = res.ptr == s + length;
+  return is_valid_number && consumed_entire_string;
 }
 
 // Half float
@@ -53,7 +59,10 @@ bool StringToFloat(const char* s, size_t length, char 
decimal_point, Float16* ou
   float temp_out;
   const auto res =
       ::arrow_vendored::fast_float::from_chars_advanced(s, s + length, 
temp_out, options);
-  const bool ok = res.ec == std::errc() && res.ptr == s + length;
+  const bool is_valid_number =
+      res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+  const bool consumed_entire_string = res.ptr == s + length;
+  const bool ok = is_valid_number && consumed_entire_string;
   if (ok) {
     *out = Float16::FromFloat(temp_out);
   }
diff --git a/cpp/src/arrow/util/value_parsing_test.cc 
b/cpp/src/arrow/util/value_parsing_test.cc
index b9e3b18444..b61f777685 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -141,6 +141,10 @@ TEST(StringConversion, ToFloat) {
   AssertConversion<FloatType>("0", 0.0f);
   AssertConversion<FloatType>("-0.0", -0.0f);
   AssertConversion<FloatType>("-1e20", -1e20f);
+  AssertConversion<FloatType>("4e38", std::numeric_limits<float>::infinity());
+  AssertConversion<FloatType>("-4e38", 
-std::numeric_limits<float>::infinity());
+  AssertConversion<FloatType>("1e-46", 0.0f);
+  AssertConversion<FloatType>("-1e-46", -0.0f);
   AssertConversion<FloatType>("+Infinity", 
std::numeric_limits<float>::infinity());
   AssertConversion<FloatType>("-Infinity", 
-std::numeric_limits<float>::infinity());
   AssertConversion<FloatType>("Infinity", 
std::numeric_limits<float>::infinity());
@@ -166,6 +170,10 @@ TEST(StringConversion, ToDouble) {
   AssertConversion<DoubleType>("0", 0);
   AssertConversion<DoubleType>("-0.0", -0.0);
   AssertConversion<DoubleType>("-1e100", -1e100);
+  AssertConversion<DoubleType>("2e308", 
std::numeric_limits<double>::infinity());
+  AssertConversion<DoubleType>("-2e308", 
-std::numeric_limits<double>::infinity());
+  AssertConversion<DoubleType>("1e-325", 0.0);
+  AssertConversion<DoubleType>("-1e-325", -0.0);
   AssertConversion<DoubleType>("+Infinity", 
std::numeric_limits<double>::infinity());
   AssertConversion<DoubleType>("-Infinity", 
-std::numeric_limits<double>::infinity());
   AssertConversion<DoubleType>("Infinity", 
std::numeric_limits<double>::infinity());
@@ -185,6 +193,10 @@ TEST(StringConversion, ToHalfFloat) {
   AssertConversion<HalfFloatType>("0", Float16(0.0f));
   AssertConversion<HalfFloatType>("-0.0", Float16(-0.0f));
   AssertConversion<HalfFloatType>("-1e15", Float16(-1e15));
+  AssertConversion<HalfFloatType>("7e4", Float16::FromBits(0x7c00));
+  AssertConversion<HalfFloatType>("-7e4", Float16::FromBits(0xfc00));
+  AssertConversion<HalfFloatType>("1e-9", Float16(0.0f));
+  AssertConversion<HalfFloatType>("-1e-9", Float16(-0.0f));
   AssertConversion<HalfFloatType>("+Infinity", Float16::FromBits(0x7c00));
   AssertConversion<HalfFloatType>("-Infinity", Float16::FromBits(0xfc00));
   AssertConversion<HalfFloatType>("Infinity", Float16::FromBits(0x7c00));

Reply via email to