Re: [PATCH] libstdc++: Another merge from fast_float upstream [PR107468]

2022-11-24 Thread Jonathan Wakely via Gcc-patches
On Thu, 24 Nov 2022 at 09:23, Jakub Jelinek wrote:
>
> Hi!
>
> Upstream fast_float came up with a cheaper test for
> fegetround () == FE_TONEAREST using one float addition, one subtraction
> and one comparison.  If we know we are rounding to nearest, we can use
> fast path in more cases as before.
> The following patch merges those changes into libstdc++.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK, thanks.


>
> 2022-11-24  Jakub Jelinek  
>
> PR libstdc++/107468
> * src/c++17/fast_float/MERGE: Adjust for merge from upstream.
> * src/c++17/fast_float/fast_float.h: Merge from fast_float
> 2ef9abbcf6a11958b6fa685a89d0150022e82e78 commit.
>
> --- libstdc++-v3/src/c++17/fast_float/MERGE.jj  2022-11-07 15:17:14.035071694 
> +0100
> +++ libstdc++-v3/src/c++17/fast_float/MERGE 2022-11-23 17:09:20.940866070 
> +0100
> @@ -1,4 +1,4 @@
> -662497742fea7055f0e0ee27e5a7ddc382c2c38e
> +2ef9abbcf6a11958b6fa685a89d0150022e82e78
>
>  The first line of this file holds the git revision number of the
>  last merge done from the master library sources.
> --- libstdc++-v3/src/c++17/fast_float/fast_float.h.jj   2022-11-07 
> 15:17:14.066071268 +0100
> +++ libstdc++-v3/src/c++17/fast_float/fast_float.h  2022-11-23 
> 17:19:41.735693122 +0100
> @@ -99,11 +99,11 @@ from_chars_result from_chars_advanced(co
> || defined(__MINGW64__)  \
> || defined(__s390x__)\
> || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) 
> || defined(__PPC64LE__)) )
> -#define FASTFLOAT_64BIT
> +#define FASTFLOAT_64BIT 1
>  #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86)   \
>   || defined(__arm__) || defined(_M_ARM)   \
>   || defined(__MINGW32__) || defined(__EMSCRIPTEN__))
> -#define FASTFLOAT_32BIT
> +#define FASTFLOAT_32BIT 1
>  #else
>// Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow.
>// We can never tell the register width, but the SIZE_MAX is a good 
> approximation.
> @@ -111,9 +111,9 @@ from_chars_result from_chars_advanced(co
>#if SIZE_MAX == 0x
>  #error Unknown platform (16-bit, unsupported)
>#elif SIZE_MAX == 0x
> -#define FASTFLOAT_32BIT
> +#define FASTFLOAT_32BIT 1
>#elif SIZE_MAX == 0x
> -#define FASTFLOAT_64BIT
> +#define FASTFLOAT_64BIT 1
>#else
>  #error Unknown platform (not 32-bit, not 64-bit?)
>#endif
> @@ -359,10 +359,12 @@ template  struct binary_form
>static inline constexpr int minimum_exponent();
>static inline constexpr int infinite_power();
>static inline constexpr int sign_index();
> +  static inline constexpr int min_exponent_fast_path(); // used when 
> fegetround() == FE_TONEAREST
>static inline constexpr int max_exponent_fast_path();
>static inline constexpr int max_exponent_round_to_even();
>static inline constexpr int min_exponent_round_to_even();
>static inline constexpr uint64_t max_mantissa_fast_path(int64_t power);
> +  static inline constexpr uint64_t max_mantissa_fast_path(); // used when 
> fegetround() == FE_TONEAREST
>static inline constexpr int largest_power_of_ten();
>static inline constexpr int smallest_power_of_ten();
>static inline constexpr T exact_power_of_ten(int64_t power);
> @@ -372,6 +374,22 @@ template  struct binary_form
>static inline constexpr equiv_uint hidden_bit_mask();
>  };
>
> +template <> inline constexpr int 
> binary_format::min_exponent_fast_path() {
> +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
> +  return 0;
> +#else
> +  return -22;
> +#endif
> +}
> +
> +template <> inline constexpr int 
> binary_format::min_exponent_fast_path() {
> +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
> +  return 0;
> +#else
> +  return -10;
> +#endif
> +}
> +
>  template <> inline constexpr int 
> binary_format::mantissa_explicit_bits() {
>return 52;
>  }
> @@ -418,13 +436,18 @@ template <> inline constexpr int binary_
>  template <> inline constexpr int 
> binary_format::max_exponent_fast_path() {
>return 10;
>  }
> -
> +template <> inline constexpr uint64_t 
> binary_format::max_mantissa_fast_path() {
> +  return uint64_t(2) << mantissa_explicit_bits();
> +}
>  template <> inline constexpr uint64_t 
> binary_format::max_mantissa_fast_path(int64_t power) {
>// caller is responsible to ensure that
>// power >= 0 && power <= 22
>//
>return max_mantissa_double[power];
>  }
> +template <> inline constexpr uint64_t 
> binary_format::max_mantissa_fast_path() {
> +  return uint64_t(2) << mantissa_explicit_bits();
> +}
>  template <> inline constexpr uint64_t 
> binary_format::max_mantissa_fast_path(int64_t power) {
>// caller is responsible to ensure that
>// power >= 0 && power <= 10
> @@ -619,10 +642,6 @@ parsed_number_string parse_number_string
>
>uint64_t i = 0; // an 

[PATCH] libstdc++: Another merge from fast_float upstream [PR107468]

2022-11-24 Thread Jakub Jelinek via Gcc-patches
Hi!

Upstream fast_float came up with a cheaper test for
fegetround () == FE_TONEAREST using one float addition, one subtraction
and one comparison.  If we know we are rounding to nearest, we can use
fast path in more cases as before.
The following patch merges those changes into libstdc++.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-11-24  Jakub Jelinek  

PR libstdc++/107468
* src/c++17/fast_float/MERGE: Adjust for merge from upstream.
* src/c++17/fast_float/fast_float.h: Merge from fast_float
2ef9abbcf6a11958b6fa685a89d0150022e82e78 commit.

--- libstdc++-v3/src/c++17/fast_float/MERGE.jj  2022-11-07 15:17:14.035071694 
+0100
+++ libstdc++-v3/src/c++17/fast_float/MERGE 2022-11-23 17:09:20.940866070 
+0100
@@ -1,4 +1,4 @@
-662497742fea7055f0e0ee27e5a7ddc382c2c38e
+2ef9abbcf6a11958b6fa685a89d0150022e82e78
 
 The first line of this file holds the git revision number of the
 last merge done from the master library sources.
--- libstdc++-v3/src/c++17/fast_float/fast_float.h.jj   2022-11-07 
15:17:14.066071268 +0100
+++ libstdc++-v3/src/c++17/fast_float/fast_float.h  2022-11-23 
17:19:41.735693122 +0100
@@ -99,11 +99,11 @@ from_chars_result from_chars_advanced(co
|| defined(__MINGW64__)  \
|| defined(__s390x__)\
|| (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || 
defined(__PPC64LE__)) )
-#define FASTFLOAT_64BIT
+#define FASTFLOAT_64BIT 1
 #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86)   \
  || defined(__arm__) || defined(_M_ARM)   \
  || defined(__MINGW32__) || defined(__EMSCRIPTEN__))
-#define FASTFLOAT_32BIT
+#define FASTFLOAT_32BIT 1
 #else
   // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow.
   // We can never tell the register width, but the SIZE_MAX is a good 
approximation.
@@ -111,9 +111,9 @@ from_chars_result from_chars_advanced(co
   #if SIZE_MAX == 0x
 #error Unknown platform (16-bit, unsupported)
   #elif SIZE_MAX == 0x
-#define FASTFLOAT_32BIT
+#define FASTFLOAT_32BIT 1
   #elif SIZE_MAX == 0x
-#define FASTFLOAT_64BIT
+#define FASTFLOAT_64BIT 1
   #else
 #error Unknown platform (not 32-bit, not 64-bit?)
   #endif
@@ -359,10 +359,12 @@ template  struct binary_form
   static inline constexpr int minimum_exponent();
   static inline constexpr int infinite_power();
   static inline constexpr int sign_index();
+  static inline constexpr int min_exponent_fast_path(); // used when 
fegetround() == FE_TONEAREST
   static inline constexpr int max_exponent_fast_path();
   static inline constexpr int max_exponent_round_to_even();
   static inline constexpr int min_exponent_round_to_even();
   static inline constexpr uint64_t max_mantissa_fast_path(int64_t power);
+  static inline constexpr uint64_t max_mantissa_fast_path(); // used when 
fegetround() == FE_TONEAREST
   static inline constexpr int largest_power_of_ten();
   static inline constexpr int smallest_power_of_ten();
   static inline constexpr T exact_power_of_ten(int64_t power);
@@ -372,6 +374,22 @@ template  struct binary_form
   static inline constexpr equiv_uint hidden_bit_mask();
 };
 
+template <> inline constexpr int 
binary_format::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -22;
+#endif
+}
+
+template <> inline constexpr int 
binary_format::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -10;
+#endif
+}
+
 template <> inline constexpr int 
binary_format::mantissa_explicit_bits() {
   return 52;
 }
@@ -418,13 +436,18 @@ template <> inline constexpr int binary_
 template <> inline constexpr int 
binary_format::max_exponent_fast_path() {
   return 10;
 }
-
+template <> inline constexpr uint64_t 
binary_format::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
 template <> inline constexpr uint64_t 
binary_format::max_mantissa_fast_path(int64_t power) {
   // caller is responsible to ensure that
   // power >= 0 && power <= 22
   //
   return max_mantissa_double[power];
 }
+template <> inline constexpr uint64_t 
binary_format::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
 template <> inline constexpr uint64_t 
binary_format::max_mantissa_fast_path(int64_t power) {
   // caller is responsible to ensure that
   // power >= 0 && power <= 10
@@ -619,10 +642,6 @@ parsed_number_string parse_number_string
 
   uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
 
-  while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
-i = i * 1 + parse_eight_digits_unrolled(p); // in rare cases, this 
will overflow, but that's ok
-p += 8;
-  }
   while ((p != pend) && is_integer(*p)) {
 // a