Re: Yet another fast GiST build

Heikki Linnakangas Mon, 07 Sep 2020 02:48:20 -0700

On 24/02/2020 10:50, Andrey M. Borodin wrote:

On 24 февр. 2020 г., at 01:58, Thomas Munro <thomas.mu...@gmail.com> wrote:


On Thu, Feb 20, 2020 at 10:14 AM Thomas Munro <thomas.mu...@gmail.com> wrote:

1.  We expect floats to be in IEEE format, and the sort order of IEEE
floats is mostly correlated to the binary sort order of the bits
reinterpreted as an int.  It isn't in some special cases, but for this
use case we don't really care about that, we're just trying to
encourage locality.


I suppose there is a big jump in integer value (whether signed or
unsigned) as you cross from positive to negative floats, and then the
sort order is reversed.  I have no idea if either of those things is a
problem worth fixing.  That made me wonder if there might also be an
endianness problem.  It seems from some quick googling that all
current architectures have integers and floats of the same endianness.
Apparently this wasn't always the case, and some ARMs have a weird
half-flipped arrangement for 64 bit floats, but not 32 bit floats as
you are using here.


Yes, this leap is a problem for point as generic data type. And I do not know
how to fix it. It can cause inefficient Index Scans when searching near (0,0) 
and query
window touches simultaneously all quadrants (4x slower).

I took a stab at fixing this, see attached patch (applies on top of your patch v14).

To evaluate this, I used the other attached patch to expose the zorder function to SQL, and plotted points around zero with gnuplot. See the attached two images, one with patch v14, and the other one with this patch.

I'll continue looking at these patches in whole tomorrow. I think it's getting close to a committable state.

But everything will be just fine when all data is in 2nd quadrant.


Simon Riggs and friends would agree :-)

- Heikki

>From c7cadbb017aa3ec446136c36bb58b10d35ed095a Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Mon, 7 Sep 2020 12:23:20 +0300
Subject: [PATCH v15 3/4] Expose point_zorder() to SQL.

---
 src/backend/access/gist/gistproc.c | 19 +++++++++++++++++++
 src/include/catalog/pg_proc.dat    |  4 ++++
 2 files changed, 23 insertions(+)

diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c
index 387c66d3ca3..4ed9f46c9bb 100644
--- a/src/backend/access/gist/gistproc.c
+++ b/src/backend/access/gist/gistproc.c
@@ -1626,3 +1626,22 @@ gist_point_sortsupport(PG_FUNCTION_ARGS)
 	ssup->comparator = gist_bbox_fastcmp;
 	PG_RETURN_VOID();
 }
+
+/*
+ * Expose the Z-Order for debugging purposes
+ */
+Datum
+point_zorder(PG_FUNCTION_ARGS)
+{
+	Point	   *p = PG_GETARG_POINT_P(0);
+	uint64		zorder;
+
+	zorder = point_zorder_internal(p);
+
+	/*
+	 * XXX: Shift by one, so that when it's interpreted as a signed integer,
+	 * it's always positive. We lose the least-significant bit, but that's OK
+	 * for the quick plotting I'm using this for.
+	 */
+	return Int64GetDatum(zorder >> 1);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 96d7efd4270..9a98bed79e8 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3004,6 +3004,10 @@
   proname => 'point_div', prorettype => 'point', proargtypes => 'point point',
   prosrc => 'point_div' },
 
+{ oid => '9110',
+  proname => 'point_zorder', prorettype => 'int8', proargtypes => 'point',
+  prosrc => 'point_zorder' },
+
 { oid => '1445',
   proname => 'poly_npoints', prorettype => 'int4', proargtypes => 'polygon',
   prosrc => 'poly_npoints' },
-- 
2.20.1

>From a7e0237d0bfd4909c0c4e0237013efe0cd071dd4 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Mon, 7 Sep 2020 12:33:36 +0300
Subject: [PATCH v15 4/4] Map negative values better.

---
 src/backend/access/gist/gistproc.c | 169 +++++++++++++++++++++--------
 1 file changed, 121 insertions(+), 48 deletions(-)

diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c
index 4ed9f46c9bb..7ca7eda84b0 100644
--- a/src/backend/access/gist/gistproc.c
+++ b/src/backend/access/gist/gistproc.c
@@ -31,10 +31,11 @@ static bool gist_box_leaf_consistent(BOX *key, BOX *query,
 									 StrategyNumber strategy);
 static bool rtree_internal_consistent(BOX *key, BOX *query,
 									  StrategyNumber strategy);
-static int64 part_bits32_by2(uint32 x);
-static int64 interleave_bits32(uint32 x, uint32 y);
-static inline uint64 point_zorder_internal(Point *p);
-static int gist_bbox_fastcmp(Datum x, Datum y, SortSupport ssup);
+static uint64 part_bits32_by2(uint32 x);
+static uint64 interleave_bits32(uint32 x, uint32 y);
+static uint32 ieee_float32_to_uint32(float f);
+static uint64 point_zorder_internal(Point *p);
+static int	gist_bbox_fastcmp(Datum x, Datum y, SortSupport ssup);
 
 
 /* Minimum accepted ratio of split */
@@ -1549,75 +1550,147 @@ gist_poly_distance(PG_FUNCTION_ARGS)
 
 /* Z-order routines */
 /* Interleave 32 bits with zeroes */
-static int64
+static uint64
 part_bits32_by2(uint32 x)
 {
 	uint64		n = x;
 
 	n = (n | (n << 16)) & UINT64CONST(0x0000FFFF0000FFFF);
-	n = (n | (n <<  8)) & UINT64CONST(0x00FF00FF00FF00FF);
-	n = (n | (n <<  4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
-	n = (n | (n <<  2)) & UINT64CONST(0x3333333333333333);
-	n = (n | (n <<  1)) & UINT64CONST(0x5555555555555555);
+	n = (n | (n << 8)) & UINT64CONST(0x00FF00FF00FF00FF);
+	n = (n | (n << 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
+	n = (n | (n << 2)) & UINT64CONST(0x3333333333333333);
+	n = (n | (n << 1)) & UINT64CONST(0x5555555555555555);
 
 	return n;
 }
 
 /*
- * Compute Z-order for integers. Also called Morton code.
+ * Compute Z-order for a point
+ *
+ * Map a two-dimensional point to a single integer, in a way that preserves
+ * locality. Points that are close in the two-dimensional space are mapped to
+ * integer that are not far from each other. We do that by interleaving the
+ * bits in the X and Y components, this is called a Z-order or Morton Code.
+ *
+ * A Morton Code is normally defined only for integers, but the X and Y values
+ * of a point are floating point. We expect floats to be in IEEE format, and
+ * the sort order of IEEE floats is mostly correlated to the binary sort order
+ * of the bits reinterpreted as an int.  It isn't in some special cases, but
+ * for this use case we don't really care about that, we're just trying to
+ * encourage locality.
  */
-static int64
+
+static uint64
+point_zorder_internal(Point *p)
+{
+	return interleave_bits32(ieee_float32_to_uint32(p->x),
+							 ieee_float32_to_uint32(p->y));
+}
+
+static uint64
 interleave_bits32(uint32 x, uint32 y)
 {
 	return part_bits32_by2(x) | (part_bits32_by2(y) << 1);
 }
 
-/* Compute Z-order for Point */
-static inline uint64
-point_zorder_internal(Point *p)
+/*
+ * Convert a 32-bit IEEE float to uint32 in a way that preserves the ordering.
+ */
+static uint32
+ieee_float32_to_uint32(float f)
 {
-	/*
-	 * In this function we need to compute Morton codes for floating point
-	 * components p->x and p->y. But Morton codes are defined only for
-	 * integers.
-	 * We expect floats to be in IEEE format, and the sort order of IEEE
-	 * floats is mostly correlated to the binary sort order of the bits
-	 * reinterpreted as an int.  It isn't in some special cases, but for this
-	 * use case we don't really care about that, we're just trying to
-	 * encourage locality.
-	 * There is a big jump in integer value (whether signed or
-	 * unsigned) as you cross from positive to negative floats, and then the
-	 * sort order is reversed. This can have negative effect on searches when
-	 * query window touches many quadrants simultaneously. In worst case this
-	 * searches can be x4 more costly.
-	 * We generate a Morton code that interleaves the bits of N integers
-	 * to produce a single integer that preserves locality: things that were
-	 * close in the N dimensional space are close in the resulting integer.
+	/*----
+	 *
+	 * IEEE 754 floating point format
+	 * ------------------------------
+	 *
+	 * IEEE 754 floating point numbers have this format:
+	 *
+	 *   exponent (8 bits)
+	 *   |
+	 * s eeeeeeee mmmmmmmmmmmmmmmmmmmmmmm
+	 * |          |
+	 * sign       mantissa (23 bits)
+	 *
+	 * Infinity has all bits in the exponent set and the mantissa is
+	 * all-zeros. Negative infinity is the same but with the sign bit set.
+	 *
+	 * NaNs are represented with all bits in the exponent set, and the least
+	 * significant bit in the mantissa also set. The rest of the mantissa bits
+	 * can be used to distinguish different kinds of NaNs.
+	 *
+	 * The IEEE format has the nice property that when you take the bit
+	 * representation and interpret it as an integer, the order is preserved,
+	 * except for the sign. That holds for the +-Infinity values too.
+	 *
+	 * Mapping to uint32
+	 * -----------------
+	 *
+	 * In order to have a smooth transition from negative to positive numbers,
+	 * we map floats to unsigned integers like this:
+	 *
+	 * x < 0 to range 0-7FFFFFFF
+	 * x = 0 to value 8000000 (both positive and negative zero)
+	 * x > 0 to range 8000001-FFFFFFFF
+	 *
+	 * We don't care to distinguish different kind of NaNs, so they are all
+	 * mapped to the same arbitrary value, FFFFFFFF. Because of the IEEE bit
+	 * representation of NaNs, there aren't actually any non-NaN values that
+	 * would be mapped to FFFFFFFF, there is actually a range of unused values
+	 * on both ends of the uint32 space.
 	 */
-	union {
-		float f;
-		uint32 i;
-	} a,b;
-	a.f = p->x;
-	b.f = p->y;
-	if (isnan(a.f))
-		a.i = PG_INT32_MAX;
-	if (isnan(b.f))
-		b.i = PG_INT32_MAX;
-	return interleave_bits32(a.i, b.i);
+	if (isnan(f))
+		return 0xFFFFFFFF;
+	else
+	{
+		union
+		{
+			float		f;
+			uint32		i;
+		}			u;
+
+		u.f = f;
+
+		/* Check the sign bit */
+		if ((u.i & 0x80000000) != 0)
+		{
+			/*
+			 * Map the negative value to range 0-7FFFFFFF. This flips the sign
+			 * bit to 0 in the same instruction.
+			 */
+			Assert(f < 0);
+
+			u.i ^= 0xFFFFFFFF;
+		}
+		else
+		{
+			/* Map the positive value (or 0) to range 80000000-FFFFFFFF */
+			u.i |= 0x80000000;
+		}
+
+		return u.i;
+	}
 }
 
 static int
 gist_bbox_fastcmp(Datum x, Datum y, SortSupport ssup)
 {
-	Point	*p1 = &(DatumGetBoxP(x)->low);
-	Point	*p2 = &(DatumGetBoxP(y)->low);
-	uint64	 z1 = point_zorder_internal(p1);
-	uint64	 z2 = point_zorder_internal(p2);
-
-	return z1 == z2 ? 0 : z1 > z2 ? 1 : -1;
+	Point	   *p1 = &(DatumGetBoxP(x)->low);
+	Point	   *p2 = &(DatumGetBoxP(y)->low);
+	uint64		z1 = point_zorder_internal(p1);
+	uint64		z2 = point_zorder_internal(p2);
+
+	if (z1 > z2)
+		return 1;
+	else if (z1 < z2)
+		return -1;
+	else
+		return 0;
 }
 
+/*
+ * Sort support routine for fast GiST index build by sorting.
+ */
 Datum
 gist_point_sortsupport(PG_FUNCTION_ARGS)
 {
-- 
2.20.1

gist-point-test.sql
Description: application/sql

Re: Yet another fast GiST build

Reply via email to