Re: Efficient output for integer types

David Fetter Tue, 17 Sep 2019 22:52:34 -0700

On Wed, Sep 18, 2019 at 05:42:01AM +0200, David Fetter wrote:
> On Tue, Sep 17, 2019 at 09:01:57AM +0200, David Fetter wrote:
> > On Tue, Sep 17, 2019 at 08:55:05AM +0200, David Fetter wrote:
> > > On Sun, Sep 15, 2019 at 09:18:49AM +0200, David Fetter wrote:
> > > > Folks,
> > > > 
> > > > Please find attached a couple of patches intended to $subject.
> > > > 
> > > > This patch set cut the time to copy ten million rows of randomly sized
> > > > int8s (10 of them) by about a third, so at least for that case, it's
> > > > pretty decent.
> > > 
> > > Added int4 output, removed the sprintf stuff, as it didn't seem to
> > > help in any cases I was testing.
> > 
> > Found a couple of "whiles" that should have been "ifs."
> 
> Factored out some inefficient functions and made the guts use the more
> efficient function.


Fix copy-paste-o that introduced some unneeded 64-bit math.

Best,
David.
-- 
David Fetter <david(at)fetter(dot)org> http://fetter.org/
Phone: +1 415 235 3778

Remember to vote!
Consider donating to Postgres: http://www.postgresql.org/about/donate

>From b9b2e2dac6f5c6a15cf4161ff135d201ea52a207 Mon Sep 17 00:00:00 2001
From: David Fetter <da...@fetter.org>
Date: Sun, 15 Sep 2019 00:06:29 -0700
Subject: [PATCH v5] Make int4 and int8 operations more efficent
To: hackers
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------2.21.0"

This is a multi-part message in MIME format.
--------------2.21.0
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit


- Output routines now do more digits per iteration, and
- Code determines the number of decimal digits in int4/int8 efficiently
- Split off pg_ltoa_n from pg_ltoa
- Use same to make other functions shorter

diff --git a/src/backend/access/common/printsimple.c b/src/backend/access/common/printsimple.c
index 651ade14dd..17ca533b87 100644
--- a/src/backend/access/common/printsimple.c
+++ b/src/backend/access/common/printsimple.c
@@ -112,7 +112,7 @@ printsimple(TupleTableSlot *slot, DestReceiver *self)
 			case INT8OID:
 				{
 					int64		num = DatumGetInt64(value);
-					char		str[23];	/* sign, 21 digits and '\0' */
+					char		str[MAXINT8LEN];
 
 					pg_lltoa(num, str);
 					pq_sendcountedtext(&buf, str, strlen(str), false);
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 580043233b..3818dbaa85 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -39,6 +39,8 @@ jsonpath_scan.c: FLEX_NO_BACKUP=yes
 # jsonpath_scan is compiled as part of jsonpath_gram
 jsonpath_gram.o: jsonpath_scan.c
 
+numutils.o: CFLAGS += $(PERMIT_DECLARATION_AFTER_STATEMENT)
+
 # jsonpath_gram.c and jsonpath_scan.c are in the distribution tarball,
 # so they are not cleaned here.
 clean distclean maintainer-clean:
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 0ff9394a2f..6230807906 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -27,8 +27,6 @@
 #include "utils/builtins.h"
 
 
-#define MAXINT8LEN		25
-
 typedef struct
 {
 	int64		current;
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 70138feb29..8ef9fac717 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -20,6 +20,58 @@
 
 #include "common/int.h"
 #include "utils/builtins.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * A table of all two-digit numbers. This is used to speed up decimal digit
+ * generation by copying pairs of digits into the final output.
+ */
+static const char DIGIT_TABLE[200] = {
+	'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
+	'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
+	'2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
+	'3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
+	'4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
+	'5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+	'6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
+	'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
+	'8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
+	'9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
+};
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline uint32
+decimalLength32(const uint32 v)
+{
+	uint32			t;
+	static uint64	PowersOfTen[] =
+	{1,                10,                100,
+	 1000,             10000,             100000,
+	 1000000,          10000000,          100000000,
+	 1000000000};
+
+	t = (pg_leftmost_one_pos32(v) + 1)*1233/4096;
+	return t + (v >= PowersOfTen[t]);
+}
+
+static inline uint32
+decimalLength64(const uint64 v)
+{
+	uint32			t;
+	static uint64	PowersOfTen[] =
+	{1,                10,                100,
+	 1000,             10000,             100000,
+	 1000000,          10000000,          100000000,
+	 1000000000,       10000000000,       100000000000,
+	 1000000000000,    10000000000000,    100000000000000,
+	 1000000000000000, 10000000000000000, 100000000000000000,
+	 1000000000000000000};
+
+	t = (pg_leftmost_one_pos64(v) + 1)*1233/4096;
+	return t + (v >= PowersOfTen[t]);
+}
 
 /*
  * pg_atoi: convert string to integer
@@ -276,16 +328,17 @@ pg_itoa(int16 i, char *a)
 }
 
 /*
- * pg_ltoa: converts a signed 32-bit integer to its string representation
+ * pg_ltoa_n: converts a signed 32-bit integer to its string representation, not
+ * NUL-terminated, and returns the length of that string representation
  *
- * Caller must ensure that 'a' points to enough memory to hold the result
- * (at least 12 bytes, counting a leading sign and trailing NUL).
+ * Caller must ensure that 'a' points to enough memory to hold the result (at
+ * least 11 bytes, counting a leading sign).
  */
-void
-pg_ltoa(int32 value, char *a)
+static int32
+pg_ltoa_n(int32 value, char *a)
 {
-	char	   *start = a;
-	bool		neg = false;
+	uint32	olength;
+	uint32	i = 0, adjust = 0;
 
 	/*
 	 * Avoid problems with the most negative integer not being representable
@@ -293,53 +346,111 @@ pg_ltoa(int32 value, char *a)
 	 */
 	if (value == PG_INT32_MIN)
 	{
-		memcpy(a, "-2147483648", 12);
-		return;
+		memcpy(a, "-2147483648", 11);
+		return 11;
 	}
-	else if (value < 0)
+
+	/* Might as well handle this case, too */
+	if (value == 0)
+	{
+		memcpy(a, "0", 1);
+		return 1;
+	}
+
+	if (value < 0)
 	{
 		value = -value;
-		neg = true;
-	}
-
-	/* Compute the result string backwards. */
-	do
-	{
-		int32		remainder;
-		int32		oldval = value;
-
-		value /= 10;
-		remainder = oldval - value * 10;
-		*a++ = '0' + remainder;
-	} while (value != 0);
-
-	if (neg)
 		*a++ = '-';
+		adjust++;
+	}
+
+	olength = decimalLength32(value);
+
+	/* Compute the result string. */
+	while (value >= 100000000)
+	{
+		/* Expensive 64-bit division. Optimize? */
+		const	uint32 value2 = value % 100000000;
+
+		value /= 100000000;
 
-	/* Add trailing NUL byte, and back up 'a' to the last character. */
-	*a-- = '\0';
+		const uint32 c = value2 % 10000;
+		const uint32 d = value2 / 10000;
+		const uint32 c0 = (c % 100) << 1;
+		const uint32 c1 = (c / 100) << 1;
+		const uint32 d0 = (d % 100) << 1;
+		const uint32 d1 = (d / 100) << 1;
 
-	/* Reverse string. */
-	while (start < a)
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2);
+		memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2);
+		memcpy(a + olength - i - 6, DIGIT_TABLE + d0, 2);
+		memcpy(a + olength - i - 8, DIGIT_TABLE + d1, 2);
+		i += 8;
+	}
+
+	if (value >= 10000)
 	{
-		char		swap = *start;
+		const	uint32 c = value - 10000 * (value / 10000);
+
+		value /= 10000;
 
-		*start++ = *a;
-		*a-- = swap;
+		const	uint32 c0 = (c % 100) << 1;
+		const	uint32 c1 = (c / 100) << 1;
+
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2);
+		memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2);
+		i += 4;
 	}
+	if (value >= 100)
+	{
+		const uint32 c = (value % 100) << 1;
+
+		value /= 100;
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2);
+		i += 2;
+	}
+	if (value >= 10)
+	{
+		const uint32 c = value << 1;
+
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2);
+		i += 2;
+	}
+	else
+	{
+		*a = (char) ('0' + value);
+		i++;
+	}
+
+	i += adjust;
+	return i;
+}
+
+/*
+ * NUL-terminate the output of pg_ltoa_n.
+ *
+ * It is the caller's responsibility to ensure that a is at least 12 bytes long,
+ * which is enough room to hold a minus sign, a maximally long int32, and the
+ * above terminating NUL.
+ */
+void
+pg_ltoa(int32 value, char *a)
+{
+	int32 len = pg_ltoa_n(value, a);
+	a[len] = '\0';
 }
 
 /*
  * pg_lltoa: convert a signed 64-bit integer to its string representation
  *
  * Caller must ensure that 'a' points to enough memory to hold the result
- * (at least MAXINT8LEN+1 bytes, counting a leading sign and trailing NUL).
+ * (at least MAXINT8LEN bytes, counting a leading sign and trailing NUL).
  */
 void
 pg_lltoa(int64 value, char *a)
 {
-	char	   *start = a;
-	bool		neg = false;
+	uint32	olength;
+	uint32	i = 0;
 
 	/*
 	 * Avoid problems with the most negative integer not being representable
@@ -350,37 +461,82 @@ pg_lltoa(int64 value, char *a)
 		memcpy(a, "-9223372036854775808", 21);
 		return;
 	}
-	else if (value < 0)
+
+	/* Might as well handle this case, too */
+	if (value == 0)
+	{
+		memcpy(a, "0", 2);
+		return;
+	}
+
+	if (value < 0)
 	{
 		value = -value;
-		neg = true;
-	}
-
-	/* Compute the result string backwards. */
-	do
-	{
-		int64		remainder;
-		int64		oldval = value;
-
-		value /= 10;
-		remainder = oldval - value * 10;
-		*a++ = '0' + remainder;
-	} while (value != 0);
-
-	if (neg)
 		*a++ = '-';
+	}
+
+	olength = decimalLength64(value);
+
+	/* Compute the result string. */
+	while (value >= 100000000)
+	{
+		/* Expensive 64-bit division. Optimize? */
+		const	uint64 q = value / 100000000;
+		uint32	value2 = (uint32) (value - 100000000 * q);
+
+		value = q;
 
-	/* Add trailing NUL byte, and back up 'a' to the last character. */
-	*a-- = '\0';
+		const uint32 c = value2 % 10000;
+		const uint32 d = value2 / 10000;
+		const uint32 c0 = (c % 100) << 1;
+		const uint32 c1 = (c / 100) << 1;
+		const uint32 d0 = (d % 100) << 1;
+		const uint32 d1 = (d / 100) << 1;
 
-	/* Reverse string. */
-	while (start < a)
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2);
+		memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2);
+		memcpy(a + olength - i - 6, DIGIT_TABLE + d0, 2);
+		memcpy(a + olength - i - 8, DIGIT_TABLE + d1, 2);
+		i += 8;
+	}
+
+	/* Switch to 32-bit for speed */
+	uint32		value2 = (uint32) value;
+
+	if (value2 >= 10000)
 	{
-		char		swap = *start;
+		const	uint32 c = value2 - 10000 * (value2 / 10000);
+
+		value2 /= 10000;
+
+		const	uint32 c0 = (c % 100) << 1;
+		const	uint32 c1 = (c / 100) << 1;
 
-		*start++ = *a;
-		*a-- = swap;
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2);
+		memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2);
+		i += 4;
 	}
+	if (value2 >= 100)
+	{
+		const uint32 c = (value2 % 100) << 1;
+
+		value2 /= 100;
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2);
+		i += 2;
+	}
+	if (value2 >= 10)
+	{
+		const uint32 c = value2 << 1;
+
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2);
+		i += 2;
+	}
+	else
+	{
+		*a = (char) ('0' + value2);
+	}
+
+	a[olength] = '\0';
 }
 
 
@@ -409,60 +565,44 @@ pg_lltoa(int64 value, char *a)
 char *
 pg_ltostr_zeropad(char *str, int32 value, int32 minwidth)
 {
-	char	   *start = str;
-	char	   *end = &str[minwidth];
-	int32		num = value;
+	int32		len;
 
 	Assert(minwidth > 0);
 
-	/*
-	 * Handle negative numbers in a special way.  We can't just write a '-'
-	 * prefix and reverse the sign as that would overflow for INT32_MIN.
-	 */
-	if (num < 0)
+	if (value >= 0)
 	{
-		*start++ = '-';
-		minwidth--;
+		if (value < 100 && minwidth == 2) /* Short cut for common case */
+		{
+			const uint32 c = value << 1;
+			memcpy(str, DIGIT_TABLE + c, 2);
+			return str + 2;
+		}
 
+		len = pg_ltoa_n(value, str);
+		if (minwidth <= len)
+			return str + len;
+
+		memmove(str + minwidth - len, str, len);
+		for(int i = 0; i < minwidth - len; i++)
+		{
+			memcpy(str + i, DIGIT_TABLE, 1);
+		}
+		return str + minwidth;
+	}
+	else
+	{
 		/*
-		 * Build the number starting at the last digit.  Here remainder will
-		 * be a negative number, so we must reverse the sign before adding '0'
-		 * in order to get the correct ASCII digit.
+		 * Changing this number's sign would overflow PG_INT32_MAX,
+		 * so special-case it.
 		 */
-		while (minwidth--)
+		if (value == PG_INT32_MIN)
 		{
-			int32		oldval = num;
-			int32		remainder;
-
-			num /= 10;
-			remainder = oldval - num * 10;
-			start[minwidth] = '0' - remainder;
+			memcpy(str, "-2147483648", 11);
+			return str + 11;
 		}
+		*str++ = '-';
+		return pg_ltostr_zeropad(str, -value, minwidth - 1);
 	}
-	else
-	{
-		/* Build the number starting at the last digit */
-		while (minwidth--)
-		{
-			int32		oldval = num;
-			int32		remainder;
-
-			num /= 10;
-			remainder = oldval - num * 10;
-			start[minwidth] = '0' + remainder;
-		}
-	}
-
-	/*
-	 * If minwidth was not high enough to fit the number then num won't have
-	 * been divided down to zero.  We punt the problem to pg_ltostr(), which
-	 * will generate a correct answer in the minimum valid width.
-	 */
-	if (num != 0)
-		return pg_ltostr(str, value);
-
-	/* Otherwise, return last output character + 1 */
-	return end;
 }
 
 /*
@@ -486,62 +626,8 @@ pg_ltostr_zeropad(char *str, int32 value, int32 minwidth)
 char *
 pg_ltostr(char *str, int32 value)
 {
-	char	   *start;
-	char	   *end;
-
-	/*
-	 * Handle negative numbers in a special way.  We can't just write a '-'
-	 * prefix and reverse the sign as that would overflow for INT32_MIN.
-	 */
-	if (value < 0)
-	{
-		*str++ = '-';
-
-		/* Mark the position we must reverse the string from. */
-		start = str;
-
-		/* Compute the result string backwards. */
-		do
-		{
-			int32		oldval = value;
-			int32		remainder;
-
-			value /= 10;
-			remainder = oldval - value * 10;
-			/* As above, we expect remainder to be negative. */
-			*str++ = '0' - remainder;
-		} while (value != 0);
-	}
-	else
-	{
-		/* Mark the position we must reverse the string from. */
-		start = str;
-
-		/* Compute the result string backwards. */
-		do
-		{
-			int32		oldval = value;
-			int32		remainder;
-
-			value /= 10;
-			remainder = oldval - value * 10;
-			*str++ = '0' + remainder;
-		} while (value != 0);
-	}
-
-	/* Remember the end+1 and back up 'str' to the last character. */
-	end = str--;
-
-	/* Reverse string. */
-	while (start < str)
-	{
-		char		swap = *start;
-
-		*start++ = *str;
-		*str-- = swap;
-	}
-
-	return end;
+	int32	len = pg_ltoa_n(value, str);
+	return str + len;
 }
 
 /*
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 937ddb7ef0..628fe73573 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -18,6 +18,7 @@
 #include "nodes/nodes.h"
 #include "utils/fmgrprotos.h"
 
+#define MAXINT8LEN 21
 
 /* bool.c */
 extern bool parse_bool(const char *value, bool *result);
@@ -46,6 +47,7 @@ extern int32 pg_atoi(const char *s, int size, int c);
 extern int16 pg_strtoint16(const char *s);
 extern int32 pg_strtoint32(const char *s);
 extern void pg_itoa(int16 i, char *a);
+static int32 pg_ltoa_n(int32 l, char *a);
 extern void pg_ltoa(int32 l, char *a);
 extern void pg_lltoa(int64 ll, char *a);
 extern char *pg_ltostr_zeropad(char *str, int32 value, int32 minwidth);

--------------2.21.0--

Re: Efficient output for integer types

Reply via email to