Improve performance of pg_strtointNN functions

David Rowley Wed, 30 Nov 2022 15:42:31 -0800

Over on [1], Dean and I were discussing why both gcc and clang don't
seem to want to optimize the multiplication that we're doing in
pg_strtoint16, pg_strtoint32 and pg_strtoint64 into something more
efficient.  It seems that this is down to the overflow checks.
Removing seems to allow the compiler to better optimize the compiled
code.  See the use of LEA instead of IMUL in [2].


Instead of using the pg_mul_sNN_overflow functions, we can just
accumulate the number in an unsigned version of the type and do an
overflow check by checking if the accumulated value has gone beyond a
10th of the maximum *signed* value for the type.  We then just need to
do some final overflow checks after the accumulation is done.  What
those depend on if it's a negative or positive number.

I ran a few microbenchmarks with the attached str2int.c file and I see
about a 10-20% performance increase:

$ ./str2int -100000000 100000000
n = -100000000, e = 100000000
pg_strtoint32 in 3.207926 seconds
pg_strtoint32_v2 in 2.763062 seconds (16.100399% faster)

v2 is the updated version of the function

On Windows, the gains are generally a bit more. I think this must be
due to the lack of overflow intrinsic functions.

>str2int.exe -100000000 100000000
n = -100000000, e = 100000000
pg_strtoint32 in 9.416000 seconds
pg_strtoint32_v2 in 8.099000 seconds (16.261267% faster)

I was thinking that we should likely apply this before doing the hex
literals, which is the main focus of [1].  The reason being is so that
that patch can immediately have faster conversions by allowing the
compiler to use bit shifting instead of other means of multiplying by
a power-of-2 number. I'm hoping this removes a barrier for Peter from
the small gripe I raised on that thread about the patch having slower
than required hex, octal and binary string parsing.

David

[1] 
https://postgr.es/m/caaphdvrl6_+wkgpqrhr7gh_6xy3hxm6a3qcsz5forurjdff...@mail.gmail.com
[2] https://godbolt.org/z/7YoMT63q1

#include <stdio.h>
#include <time.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#define PG_INT32_MIN    (-0x7FFFFFFF-1)
#define PG_INT32_MAX    (0x7FFFFFFF)

#define true 1
#define false 0

#ifndef _MSC_VER
#define HAVE__BUILTIN_CLZ
#define HAVE__BUILTIN_OP_OVERFLOW
#define likely(x)       __builtin_expect((x) != 0, 1)
#define unlikely(x) __builtin_expect((x) != 0, 0)
#else
#define likely(x)       x
#define unlikely(x) x
#endif

typedef char bool;
typedef unsigned int uint32;
typedef int int32;
typedef char int8;
typedef unsigned char uint8;
typedef long long int64;

static inline bool
pg_sub_s32_overflow(int32 a, int32 b, int32 *result)
{
#if defined(HAVE__BUILTIN_OP_OVERFLOW)
        return __builtin_sub_overflow(a, b, result);
#else
        int64           res = (int64) a - (int64) b;

        if (res > PG_INT32_MAX || res < PG_INT32_MIN)
        {
                *result = 0x5EED;               /* to avoid spurious warnings */
                return true;
        }
        *result = (int32) res;
        return false;
#endif
}

static inline bool
pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
{
#if defined(HAVE__BUILTIN_OP_OVERFLOW)
        return __builtin_mul_overflow(a, b, result);
#else
        int64           res = (int64) a * (int64) b;

        if (res > PG_INT32_MAX || res < PG_INT32_MIN)
        {
                *result = 0x5EED;               /* to avoid spurious warnings */
                return true;
        }
        *result = (int32) res;
        return false;
#endif
}

int32
pg_strtoint32(const char *s)
{
        const char *ptr = s;
        int32           tmp = 0;
        bool            neg = false;

        /* skip leading spaces */
        while (likely(*ptr) && isspace((unsigned char) *ptr))
                ptr++;

        /* handle sign */
        if (*ptr == '-')
        {
                ptr++;
                neg = true;
        }
        else if (*ptr == '+')
                ptr++;

        /* require at least one digit */
        if (unlikely(!isdigit((unsigned char) *ptr)))
                goto invalid_syntax;

        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');

                if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
                        unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
                ptr++;

        if (unlikely(*ptr != '\0'))
                goto invalid_syntax;

        if (!neg)
        {
                /* could fail if input is most negative number */
                if (unlikely(tmp == PG_INT32_MIN))
                        goto out_of_range;
                tmp = -tmp;
        }

        return tmp;

out_of_range:
        fprintf(stderr, "value \"%s\" is out of range for type %s",
                                        s, "integer");

invalid_syntax:
        fprintf(stderr, "invalid input syntax for type %s: \"%s\"",
                                        "integer", s);

        return 0;                                       /* keep compiler quiet 
*/
}

int32
pg_strtoint32_v2(const char *s)
{
        const char *ptr = s;
        uint32          tmp = 0;
        bool            neg = false;

        /* skip leading spaces */
        while (likely(*ptr) && isspace((unsigned char) *ptr))
                ptr++;

        /* handle sign */
        if (*ptr == '-')
        {
                ptr++;
                neg = true;
        }
        else if (*ptr == '+')
                ptr++;

        /* require at least one digit */
        if (unlikely(!isdigit((unsigned char) *ptr)))
                goto invalid_syntax;

        while (*ptr && isdigit((unsigned char) *ptr))
        {
//              if (unlikely(tmp > (PG_INT32_MAX / 10)))
                if (unlikely(tmp >= (1 - PG_INT32_MIN / 10)))
                        goto out_of_range;
                tmp = tmp * 10 + (*ptr++ - '0');
        }
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
                ptr++;

        if (unlikely(*ptr != '\0'))
                goto invalid_syntax;

        if (neg)
        {
                if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
                        goto out_of_range;
                return -((int32) tmp);
        }

        if (tmp > PG_INT32_MAX)
                goto out_of_range;

        return (int32) tmp;

out_of_range:
        fprintf(stderr, "value \"%s\" is out of range for type %s",
                                        s, "integer");
        exit(-1);

invalid_syntax:
        fprintf(stderr, "invalid input syntax for type %s: \"%s\"",
                                        "integer", s);
        exit(-1);

        return 0;                                       /* keep compiler quiet 
*/
}

static const char DIGIT_TABLE[200] =
"00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
"10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
"20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
"30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
"40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
"50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
"60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
"70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";

const uint8 pg_leftmost_one_pos[256] = {
        0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};

static inline int
pg_leftmost_one_pos32(uint32 word)
{
#ifdef HAVE__BUILTIN_CLZ
        return 31 - __builtin_clz(word);
#else
        int                     shift = 32 - 8;

        while ((word >> shift) == 0)
                shift -= 8;

        return shift + pg_leftmost_one_pos[(word >> shift) & 255];
#endif                                                  /* HAVE__BUILTIN_CLZ */
}

static inline int
decimalLength32(const uint32 v)
{
        int                     t;
        static const uint32 PowersOfTen[] = {
                1, 10, 100,
                1000, 10000, 100000,
                1000000, 10000000, 100000000,
                1000000000
        };

        /*
         * Compute base-10 logarithm by dividing the base-2 logarithm by a
         * good-enough approximation of the base-2 logarithm of 10
         */
        t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
        return t + (v >= PowersOfTen[t]);
}

/*
 * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
 * not NUL-terminated, and returns the length of that string representation
 *
 * Caller must ensure that 'a' points to enough memory to hold the result (at
 * least 10 bytes)
 */
int
pg_ultoa_n(uint32 value, char *a)
{
        int                     olength,
                                i = 0;

        /* Degenerate case */
        if (value == 0)
        {
                *a = '0';
                return 1;
        }

        olength = decimalLength32(value);

        /* Compute the result string. */
        while (value >= 10000)
        {
                const uint32 c = value - 10000 * (value / 10000);
                const uint32 c0 = (c % 100) << 1;
                const uint32 c1 = (c / 100) << 1;

                char       *pos = a + olength - i;

                value /= 10000;

                memcpy(pos - 2, DIGIT_TABLE + c0, 2);
                memcpy(pos - 4, DIGIT_TABLE + c1, 2);
                i += 4;
        }
        if (value >= 100)
        {
                const uint32 c = (value % 100) << 1;

                char       *pos = a + olength - i;

                value /= 100;

                memcpy(pos - 2, DIGIT_TABLE + c, 2);
                i += 2;
        }
        if (value >= 10)
        {
                const uint32 c = value << 1;

                char       *pos = a + olength - i;

                memcpy(pos - 2, DIGIT_TABLE + c, 2);
        }
        else
        {
                *a = (char) ('0' + value);
        }

        return olength;
}

int
pg_ltoa(int32 value, char *a)
{
        uint32          uvalue = (uint32) value;
        int                     len = 0;

        if (value < 0)
        {
                uvalue = (uint32) 0 - uvalue;
                a[len++] = '-';
        }
        len += pg_ultoa_n(uvalue, a + len);
        a[len] = '\0';
        return len;
}


//#define START -100000000
//#define END 100000000
#define DEFAULT_START PG_INT32_MIN
#define DEFAULT_END PG_INT32_MAX

int main(int argc, char **argv)
{
        clock_t start, end;
        int64 n, e;
        char buff[16];
        double v1_time, v2_time;

        if (argc < 3)
        {
                n = DEFAULT_START;
                e = DEFAULT_END;
        }
        else
        {
                n = pg_strtoint32(argv[1]);
                e = pg_strtoint32(argv[2]);
        }

        printf("n = %lld, e = %lld\n", n, e);
        start = clock();
        while (n <= e)
        {
                int64 y;
                pg_ltoa((int32) n, buff);

                y = pg_strtoint32(buff);
                
                if (n != y)
                        fprintf(stderr, "%lld != %lld\n", n, y);
                n++;
        }
        
        end = clock();
        v1_time = (double) (end - start) / CLOCKS_PER_SEC;
        printf("pg_strtoint32 in %f seconds\n", v1_time);

        if (argc < 3)
                n = DEFAULT_START;
        else
                n = pg_strtoint32_v2(argv[1]);
        
        start = clock();
        while (n <= e)
        {
                int64 y;
                pg_ltoa((int32) n, buff);
                
                y = pg_strtoint32_v2(buff);
                
                if (n != y)
                        fprintf(stderr, "%lld != %lld\n", n, y);
                n++;
        }
        
        end = clock();
        v2_time = (double) (end - start) / CLOCKS_PER_SEC;
        printf("pg_strtoint32_v2 in %f seconds (%f%% faster)\n", v2_time, 
v1_time / v2_time * 100.0 - 100.0);

}

From 47dc46bf18bcfa726400cc0abaa9eae65ff20864 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Thu, 1 Dec 2022 11:15:26 +1300
Subject: [PATCH v1] Improve performance of pg_strtointNN functions

Experiments have shown that modern versions of both gcc and clang are
unable to fully optimize the multiplication by 10 that we're doing in the
pg_strtointNN functions.  Both compilers seem to be making use of "imul",
which is not the most efficient way to multiply by 10.  This seems to be
due to the overflow checking that we're doing.  Without the overflow
checks, both clang and gcc are able to make use of the "add" and "lea"
instructions on x86.

To allow compilers more flexibility, here we adjust the code so that we
accumulate the number as an unsigned version of the type and remove the
use of pg_mul_sNN_overflow() and pg_sub_sNN_overflow().  The overflow
checking can be done simply by checking if the accumulated value has gone
beyond a 10th of the maximum signed value for the given type.  If it has
then the accumulation of the next digit will cause an overflow.  After
this is done, we do a final overflow check before converting the unsigned
version of the number back to its signed counterpart.

Testing has shown about a 10-20% speedup of the pg_strtointNN functions.
Systems without HAVE__BUILTIN_OP_OVERFLOW will be on the higher end of
that increase.

Author: David Rowley, Dean Rasheed
Discussion: 
https://postgr.es/m/caaphdvrl6_+wkgpqrhr7gh_6xy3hxm6a3qcsz5forurjdff...@mail.gmail.com
---
 src/backend/utils/adt/numutils.c | 83 +++++++++++++++-----------------
 1 file changed, 39 insertions(+), 44 deletions(-)

diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 834ec0b588..d769629852 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -91,15 +91,15 @@ decimalLength64(const uint64 v)
  * Allows any number of leading or trailing whitespace characters. Will throw
  * ereport() upon bad input format or overflow.
  *
- * NB: Accumulate input as a negative number, to deal with two's complement
+ * NB: Accumulate input as an unsigned number, to deal with two's complement
  * representation of the most negative number, which can't be represented as a
- * positive number.
+ * signed positive number.
  */
 int16
 pg_strtoint16(const char *s)
 {
        const char *ptr = s;
-       int16           tmp = 0;
+       uint16          tmp = 0;
        bool            neg = false;
 
        /* skip leading spaces */
@@ -122,11 +122,10 @@ pg_strtoint16(const char *s)
        /* process digits */
        while (*ptr && isdigit((unsigned char) *ptr))
        {
-               int8            digit = (*ptr++ - '0');
-
-               if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
-                       unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+               if (unlikely(tmp >= (1 - PG_INT16_MIN / 10)))
                        goto out_of_range;
+
+               tmp = tmp * 10 + (*ptr++ - '0');
        }
 
        /* allow trailing whitespace, but not other trailing chars */
@@ -136,15 +135,17 @@ pg_strtoint16(const char *s)
        if (unlikely(*ptr != '\0'))
                goto invalid_syntax;
 
-       if (!neg)
+       if (neg)
        {
-               /* could fail if input is most negative number */
-               if (unlikely(tmp == PG_INT16_MIN))
+               if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
                        goto out_of_range;
-               tmp = -tmp;
+               return -((int16) tmp);
        }
 
-       return tmp;
+       if (tmp > PG_INT16_MAX)
+               goto out_of_range;
+
+       return (int16) tmp;
 
 out_of_range:
        ereport(ERROR,
@@ -167,15 +168,15 @@ invalid_syntax:
  * Allows any number of leading or trailing whitespace characters. Will throw
  * ereport() upon bad input format or overflow.
  *
- * NB: Accumulate input as a negative number, to deal with two's complement
+ * NB: Accumulate input as an unsigned number, to deal with two's complement
  * representation of the most negative number, which can't be represented as a
- * positive number.
+ * signed positive number.
  */
 int32
 pg_strtoint32(const char *s)
 {
        const char *ptr = s;
-       int32           tmp = 0;
+       uint32          tmp = 0;
        bool            neg = false;
 
        /* skip leading spaces */
@@ -198,11 +199,10 @@ pg_strtoint32(const char *s)
        /* process digits */
        while (*ptr && isdigit((unsigned char) *ptr))
        {
-               int8            digit = (*ptr++ - '0');
-
-               if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
-                       unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+               if (unlikely(tmp >= (1 - PG_INT32_MIN / 10)))
                        goto out_of_range;
+
+               tmp = tmp * 10 + (*ptr++ - '0');
        }
 
        /* allow trailing whitespace, but not other trailing chars */
@@ -212,15 +212,17 @@ pg_strtoint32(const char *s)
        if (unlikely(*ptr != '\0'))
                goto invalid_syntax;
 
-       if (!neg)
+       if (neg)
        {
-               /* could fail if input is most negative number */
-               if (unlikely(tmp == PG_INT32_MIN))
+               if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
                        goto out_of_range;
-               tmp = -tmp;
+               return -((int32) tmp);
        }
 
-       return tmp;
+       if (tmp > PG_INT32_MAX)
+               goto out_of_range;
+
+       return (int32) tmp;
 
 out_of_range:
        ereport(ERROR,
@@ -243,25 +245,17 @@ invalid_syntax:
  * Allows any number of leading or trailing whitespace characters. Will throw
  * ereport() upon bad input format or overflow.
  *
- * NB: Accumulate input as a negative number, to deal with two's complement
+ * NB: Accumulate input as an unsigned number, to deal with two's complement
  * representation of the most negative number, which can't be represented as a
- * positive number.
+ * signed positive number.
  */
 int64
 pg_strtoint64(const char *s)
 {
        const char *ptr = s;
-       int64           tmp = 0;
+       uint64          tmp = 0;
        bool            neg = false;
 
-       /*
-        * Do our own scan, rather than relying on sscanf which might be broken
-        * for long long.
-        *
-        * As INT64_MIN can't be stored as a positive 64 bit integer, accumulate
-        * value as a negative number.
-        */
-
        /* skip leading spaces */
        while (*ptr && isspace((unsigned char) *ptr))
                ptr++;
@@ -282,11 +276,10 @@ pg_strtoint64(const char *s)
        /* process digits */
        while (*ptr && isdigit((unsigned char) *ptr))
        {
-               int8            digit = (*ptr++ - '0');
-
-               if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) ||
-                       unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+               if (unlikely(tmp >= (1 - PG_INT64_MIN / 10)))
                        goto out_of_range;
+
+               tmp = tmp * 10 + (*ptr++ - '0');
        }
 
        /* allow trailing whitespace, but not other trailing chars */
@@ -296,15 +289,17 @@ pg_strtoint64(const char *s)
        if (unlikely(*ptr != '\0'))
                goto invalid_syntax;
 
-       if (!neg)
+       if (neg)
        {
-               /* could fail if input is most negative number */
-               if (unlikely(tmp == PG_INT64_MIN))
+               if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)
                        goto out_of_range;
-               tmp = -tmp;
+               return -((int64) tmp);
        }
 
-       return tmp;
+       if (tmp > PG_INT64_MAX)
+               goto out_of_range;
+
+       return (int64) tmp;
 
 out_of_range:
        ereport(ERROR,
-- 
2.35.1.windows.2

Improve performance of pg_strtointNN functions

Reply via email to