Hi,

As discussed in this thread (https://www.postgresql.org/message-id/201010112055.o9bktzf7011...@wwwmaster.postgresql.org) btree_gist is broken for the inet and cidr types. The reason is that it uses convert_network_to_scalar() to make a lossy GiST index, but convert_network_to_scalar() throws away the netmask which is necessary to preserve the correct sort order of the keys.

This has been broken for as long as we have had btree_gist indexes over inet. And personally I am not a fan of PostgreSQL shipping with known broken features. But it is also not obvious to me what the best way to fix it is.

To refresh my memory I quickly hacked together a proof of concept patch which adds a couple of test cases to reproduce the bug plus uses basically the same implementation of the btree_gist as text and numeric, which changes to index keys from 64 bit floats to a variable sized bytea. This means that the indexes are no longer lossy.

Some questions/thoughts:

Is it even worth fixing btree_gist for inet when we already have inet_ops which can do more things and is not broken. We could just deprecate and then rip out gist_inet_ops?

If we are going to fix it I cannot see any reasonably sized fix which does not also corrupt all existing indexes on inet, even those which do not contain any net masks (those which contain netmasks are already broken anyway). Do we have some way to handle this kind of breakage nicely?

I see two ways to fix the inet indexes, either do as in my PoC patch and make the indexes no longer lossy. This will make it more similar to how btree_gist works for other data types but requires us to change the storage type of the index keys, but since all indexes will break anyway that might not be an issue.

The other way is to implement correct lossy indexes, using something similar to what network_abbrev_convert() does.

Does anyone have any thoughts on this?

Andreas
diff --git a/contrib/btree_gist/btree_gist--1.2.sql b/contrib/btree_gist/btree_gist--1.2.sql
index 1efe753043..7283c0bab7 100644
--- a/contrib/btree_gist/btree_gist--1.2.sql
+++ b/contrib/btree_gist/btree_gist--1.2.sql
@@ -1516,11 +1516,11 @@ AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT;
 
 CREATE FUNCTION gbt_inet_union(internal, internal)
-RETURNS gbtreekey16
+RETURNS gbtreekey_var
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT;
 
-CREATE FUNCTION gbt_inet_same(gbtreekey16, gbtreekey16, internal)
+CREATE FUNCTION gbt_inet_same(gbtreekey_var, gbtreekey_var, internal)
 RETURNS internal
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT;
@@ -1537,15 +1537,15 @@ AS
 	FUNCTION	1	gbt_inet_consistent (internal, inet, int2, oid, internal),
 	FUNCTION	2	gbt_inet_union (internal, internal),
 	FUNCTION	3	gbt_inet_compress (internal),
-	FUNCTION	4	gbt_decompress (internal),
+	FUNCTION	4	gbt_var_decompress (internal),
 	FUNCTION	5	gbt_inet_penalty (internal, internal, internal),
 	FUNCTION	6	gbt_inet_picksplit (internal, internal),
-	FUNCTION	7	gbt_inet_same (gbtreekey16, gbtreekey16, internal),
-	STORAGE		gbtreekey16;
+	FUNCTION	7	gbt_inet_same (gbtreekey_var, gbtreekey_var, internal),
+	STORAGE		gbtreekey_var;
 
 ALTER OPERATOR FAMILY gist_inet_ops USING gist ADD
-	OPERATOR	6	<>  (inet, inet) ;
-	-- no fetch support, the compress function is lossy
+	OPERATOR	6	<>  (inet, inet) ,
+	FUNCTION	9 (inet, inet) gbt_var_fetch (internal) ;
 
 -- Create the operator class
 CREATE OPERATOR CLASS gist_cidr_ops
@@ -1559,12 +1559,12 @@ AS
 	FUNCTION	1	gbt_inet_consistent (internal, inet, int2, oid, internal),
 	FUNCTION	2	gbt_inet_union (internal, internal),
 	FUNCTION	3	gbt_inet_compress (internal),
-	FUNCTION	4	gbt_decompress (internal),
+	FUNCTION	4	gbt_var_decompress (internal),
 	FUNCTION	5	gbt_inet_penalty (internal, internal, internal),
 	FUNCTION	6	gbt_inet_picksplit (internal, internal),
-	FUNCTION	7	gbt_inet_same (gbtreekey16, gbtreekey16, internal),
-	STORAGE		gbtreekey16;
+	FUNCTION	7	gbt_inet_same (gbtreekey_var, gbtreekey_var, internal),
+	STORAGE		gbtreekey_var;
 
 ALTER OPERATOR FAMILY gist_cidr_ops USING gist ADD
-	OPERATOR	6	<> (inet, inet) ;
-	-- no fetch support, the compress function is lossy
+	OPERATOR	6	<> (inet, inet) ,
+	FUNCTION	9 (inet, inet) gbt_var_fetch (internal) ;
diff --git a/contrib/btree_gist/btree_gist--1.5--1.6.sql b/contrib/btree_gist/btree_gist--1.5--1.6.sql
index 5e1fcb47bd..8875f33e1c 100644
--- a/contrib/btree_gist/btree_gist--1.5--1.6.sql
+++ b/contrib/btree_gist/btree_gist--1.5--1.6.sql
@@ -167,7 +167,7 @@ ALTER FUNCTION gbt_inet_compress(internal) PARALLEL SAFE;
 ALTER FUNCTION gbt_inet_penalty(internal, internal, internal) PARALLEL SAFE;
 ALTER FUNCTION gbt_inet_picksplit(internal, internal) PARALLEL SAFE;
 ALTER FUNCTION gbt_inet_union(internal, internal) PARALLEL SAFE;
-ALTER FUNCTION gbt_inet_same(gbtreekey16, gbtreekey16, internal) PARALLEL SAFE;
+ALTER FUNCTION gbt_inet_same(gbtreekey_var, gbtreekey_var, internal) PARALLEL SAFE;
 ALTER FUNCTION gbt_uuid_consistent(internal, uuid, smallint, oid, internal) PARALLEL SAFE;
 ALTER FUNCTION gbt_uuid_fetch(internal) PARALLEL SAFE;
 ALTER FUNCTION gbt_uuid_compress(internal) PARALLEL SAFE;
diff --git a/contrib/btree_gist/btree_inet.c b/contrib/btree_gist/btree_inet.c
index e4b3a946b2..bf7c1b7ecf 100644
--- a/contrib/btree_gist/btree_inet.c
+++ b/contrib/btree_gist/btree_inet.c
@@ -4,17 +4,11 @@
 #include "postgres.h"
 
 #include "btree_gist.h"
-#include "btree_utils_num.h"
+#include "btree_utils_var.h"
 #include "catalog/pg_type.h"
 #include "utils/builtins.h"
 #include "utils/inet.h"
 
-typedef struct inetkey
-{
-	double		lower;
-	double		upper;
-} inetKEY;
-
 /*
 ** inet ops
 */
@@ -27,54 +21,72 @@ PG_FUNCTION_INFO_V1(gbt_inet_same);
 
 
 static bool
-gbt_inetgt(const void *a, const void *b, FmgrInfo *flinfo)
+gbt_inetgt(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 {
-	return (*((const double *) a) > *((const double *) b));
+	//ereport(NOTICE, (errmsg("gt")));
+	return DatumGetBool(DirectFunctionCall2(network_gt,
+											PointerGetDatum(a),
+											PointerGetDatum(b)));
 }
+
 static bool
-gbt_inetge(const void *a, const void *b, FmgrInfo *flinfo)
+gbt_inetge(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 {
-	return (*((const double *) a) >= *((const double *) b));
+	//ereport(NOTICE, (errmsg("ge")));
+	return DatumGetBool(DirectFunctionCall2(network_ge,
+											PointerGetDatum(a),
+											PointerGetDatum(b)));
 }
+
 static bool
-gbt_ineteq(const void *a, const void *b, FmgrInfo *flinfo)
+gbt_ineteq(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 {
-	return (*((const double *) a) == *((const double *) b));
+	//ereport(NOTICE, (errmsg("eq")));
+	return DatumGetBool(DirectFunctionCall2(network_eq,
+											PointerGetDatum(a),
+											PointerGetDatum(b)));
 }
+
 static bool
-gbt_inetle(const void *a, const void *b, FmgrInfo *flinfo)
+gbt_inetle(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 {
-	return (*((const double *) a) <= *((const double *) b));
+	//ereport(NOTICE, (errmsg("le")));
+	return DatumGetBool(DirectFunctionCall2(network_le,
+											PointerGetDatum(a),
+											PointerGetDatum(b)));
 }
+
 static bool
-gbt_inetlt(const void *a, const void *b, FmgrInfo *flinfo)
+gbt_inetlt(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 {
-	return (*((const double *) a) < *((const double *) b));
+	//ereport(NOTICE, (errmsg("lt")));
+	return DatumGetBool(DirectFunctionCall2(network_lt,
+											PointerGetDatum(a),
+											PointerGetDatum(b)));
 }
 
-static int
-gbt_inetkey_cmp(const void *a, const void *b, FmgrInfo *flinfo)
+static int32
+gbt_inetkey_cmp(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 {
-	inetKEY    *ia = (inetKEY *) (((const Nsrt *) a)->t);
-	inetKEY    *ib = (inetKEY *) (((const Nsrt *) b)->t);
+	int32 x = DatumGetInt32(DirectFunctionCall2(network_cmp,
+											 PointerGetDatum(a),
+											 PointerGetDatum(b)));
 
-	if (ia->lower == ib->lower)
-	{
-		if (ia->upper == ib->upper)
-			return 0;
+	char *as = DatumGetCString(DirectFunctionCall1(inet_out, PointerGetDatum(a)));
+	char *bs = DatumGetCString(DirectFunctionCall1(inet_out, PointerGetDatum(b)));
 
-		return (ia->upper > ib->upper) ? 1 : -1;
-	}
+	//ereport(NOTICE, (errmsg("cmp %d %s %s", x, as, bs)));
 
-	return (ia->lower > ib->lower) ? 1 : -1;
+	return DatumGetInt32(DirectFunctionCall2(network_cmp,
+											 PointerGetDatum(a),
+											 PointerGetDatum(b)));
 }
 
-
-static const gbtree_ninfo tinfo =
+static const gbtree_vinfo tinfo =
 {
 	gbt_t_inet,
-	sizeof(double),
-	16,							/* sizeof(gbtreekey16) */
+	0,
+	false,
 	gbt_inetgt,
 	gbt_inetge,
 	gbt_ineteq,
@@ -94,25 +106,10 @@ Datum
 gbt_inet_compress(PG_FUNCTION_ARGS)
 {
 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
-	GISTENTRY  *retval;
-
-	if (entry->leafkey)
-	{
-		inetKEY    *r = (inetKEY *) palloc(sizeof(inetKEY));
-		bool		failure = false;
-
-		retval = palloc(sizeof(GISTENTRY));
-		r->lower = convert_network_to_scalar(entry->key, INETOID, &failure);
-		Assert(!failure);
-		r->upper = r->lower;
-		gistentryinit(*retval, PointerGetDatum(r),
-					  entry->rel, entry->page,
-					  entry->offset, false);
-	}
-	else
-		retval = entry;
-
-	PG_RETURN_POINTER(retval);
+
+	GISTENTRY *e2 = gbt_var_compress(entry, &tinfo);
+
+	PG_RETURN_POINTER(gbt_var_compress(entry, &tinfo));
 }
 
 
@@ -120,27 +117,21 @@ Datum
 gbt_inet_consistent(PG_FUNCTION_ARGS)
 {
 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
-	Datum		dquery = PG_GETARG_DATUM(1);
+	void	   *query = (void *) PG_GETARG_INET_PP(1);
 	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
 
 	/* Oid		subtype = PG_GETARG_OID(3); */
 	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
-	inetKEY    *kkk = (inetKEY *) DatumGetPointer(entry->key);
-	GBT_NUMKEY_R key;
-	double		query;
-	bool		failure = false;
-
-	query = convert_network_to_scalar(dquery, INETOID, &failure);
-	Assert(!failure);
+	bool		retval;
+	GBT_VARKEY *key = (GBT_VARKEY *) DatumGetPointer(entry->key);
+	GBT_VARKEY_R r = gbt_var_key_readable(key);
 
-	/* All cases served by this function are inexact */
-	*recheck = true;
+	/* All cases served by this function are exact */
+	*recheck = false;
 
-	key.lower = (GBT_NUMKEY *) &kkk->lower;
-	key.upper = (GBT_NUMKEY *) &kkk->upper;
-
-	PG_RETURN_BOOL(gbt_num_consistent(&key, (void *) &query,
-									  &strategy, GIST_LEAF(entry), &tinfo, fcinfo->flinfo));
+	retval = gbt_var_consistent(&r, query, strategy, PG_GET_COLLATION(),
+								GIST_LEAF(entry), &tinfo, fcinfo->flinfo);
+	PG_RETURN_BOOL(retval);
 }
 
 
@@ -148,41 +139,42 @@ Datum
 gbt_inet_union(PG_FUNCTION_ARGS)
 {
 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
-	void	   *out = palloc(sizeof(inetKEY));
+	int32	   *size = (int *) PG_GETARG_POINTER(1);
 
-	*(int *) PG_GETARG_POINTER(1) = sizeof(inetKEY);
-	PG_RETURN_POINTER(gbt_num_union((void *) out, entryvec, &tinfo, fcinfo->flinfo));
+	PG_RETURN_POINTER(gbt_var_union(entryvec, size, PG_GET_COLLATION(),
+									&tinfo, fcinfo->flinfo));
 }
 
 
 Datum
 gbt_inet_penalty(PG_FUNCTION_ARGS)
 {
-	inetKEY    *origentry = (inetKEY *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
-	inetKEY    *newentry = (inetKEY *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
+	GISTENTRY  *o = (GISTENTRY *) PG_GETARG_POINTER(0);
+	GISTENTRY  *n = (GISTENTRY *) PG_GETARG_POINTER(1);
 	float	   *result = (float *) PG_GETARG_POINTER(2);
 
-	penalty_num(result, origentry->lower, origentry->upper, newentry->lower, newentry->upper);
-
-	PG_RETURN_POINTER(result);
-
+	PG_RETURN_POINTER(gbt_var_penalty(result, o, n, PG_GET_COLLATION(),
+									  &tinfo, fcinfo->flinfo));
 }
 
 Datum
 gbt_inet_picksplit(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_POINTER(gbt_num_picksplit((GistEntryVector *) PG_GETARG_POINTER(0),
-										(GIST_SPLITVEC *) PG_GETARG_POINTER(1),
-										&tinfo, fcinfo->flinfo));
+	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+
+	gbt_var_picksplit(entryvec, v, PG_GET_COLLATION(),
+					  &tinfo, fcinfo->flinfo);
+	PG_RETURN_POINTER(v);
 }
 
 Datum
 gbt_inet_same(PG_FUNCTION_ARGS)
 {
-	inetKEY    *b1 = (inetKEY *) PG_GETARG_POINTER(0);
-	inetKEY    *b2 = (inetKEY *) PG_GETARG_POINTER(1);
+	Datum		d1 = PG_GETARG_DATUM(0);
+	Datum		d2 = PG_GETARG_DATUM(1);
 	bool	   *result = (bool *) PG_GETARG_POINTER(2);
 
-	*result = gbt_num_same((void *) b1, (void *) b2, &tinfo, fcinfo->flinfo);
+	*result = gbt_var_same(d1, d2, PG_GET_COLLATION(), &tinfo, fcinfo->flinfo);
 	PG_RETURN_POINTER(result);
 }
diff --git a/contrib/btree_gist/data/inet.data b/contrib/btree_gist/data/inet.data
index 687fcbab1c..167d51ab7a 100644
--- a/contrib/btree_gist/data/inet.data
+++ b/contrib/btree_gist/data/inet.data
@@ -673,3 +673,5 @@
 59.164.211.253
 218.33.169.7
 \N
+255.255.1.0/24
+255.255.1.0/26
diff --git a/contrib/btree_gist/expected/cidr.out b/contrib/btree_gist/expected/cidr.out
index 6d0995add6..9a202003c1 100644
--- a/contrib/btree_gist/expected/cidr.out
+++ b/contrib/btree_gist/expected/cidr.out
@@ -23,13 +23,19 @@ SELECT count(*) FROM cidrtmp WHERE a  = '121.111.63.82';
 SELECT count(*) FROM cidrtmp WHERE a >= '121.111.63.82';
  count 
 -------
-   310
+   312
 (1 row)
 
 SELECT count(*) FROM cidrtmp WHERE a >  '121.111.63.82';
  count 
 -------
-   309
+   311
+(1 row)
+
+SELECT count(*) FROM cidrtmp WHERE a >  '255.255.1.0/25';
+ count 
+-------
+     1
 (1 row)
 
 CREATE INDEX cidridx ON cidrtmp USING gist ( a );
@@ -55,12 +61,18 @@ SELECT count(*) FROM cidrtmp WHERE a  = '121.111.63.82'::cidr;
 SELECT count(*) FROM cidrtmp WHERE a >= '121.111.63.82'::cidr;
  count 
 -------
-   310
+   312
 (1 row)
 
 SELECT count(*) FROM cidrtmp WHERE a >  '121.111.63.82'::cidr;
  count 
 -------
-   309
+   311
+(1 row)
+
+SELECT count(*) FROM cidrtmp WHERE a >  '255.255.1.0/25'::cidr;
+ count 
+-------
+     1
 (1 row)
 
diff --git a/contrib/btree_gist/expected/inet.out b/contrib/btree_gist/expected/inet.out
index c323d903da..a6b4a9d33b 100644
--- a/contrib/btree_gist/expected/inet.out
+++ b/contrib/btree_gist/expected/inet.out
@@ -23,13 +23,19 @@ SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191';
 SELECT count(*) FROM inettmp WHERE a >= '89.225.196.191';
  count 
 -------
-   387
+   389
 (1 row)
 
 SELECT count(*) FROM inettmp WHERE a >  '89.225.196.191';
  count 
 -------
-   386
+   388
+(1 row)
+
+SELECT count(*) FROM inettmp WHERE a >  '255.255.1.0/25';
+ count 
+-------
+     1
 (1 row)
 
 CREATE INDEX inetidx ON inettmp USING gist ( a );
@@ -55,23 +61,28 @@ SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;
 SELECT count(*) FROM inettmp WHERE a >= '89.225.196.191'::inet;
  count 
 -------
-   387
+   389
 (1 row)
 
 SELECT count(*) FROM inettmp WHERE a >  '89.225.196.191'::inet;
  count 
 -------
-   386
+   388
+(1 row)
+
+SELECT count(*) FROM inettmp WHERE a >  '255.255.1.0/25'::inet;
+ count 
+-------
+     1
 (1 row)
 
 VACUUM ANALYZE inettmp;
--- gist_inet_ops lacks a fetch function, so this should not be index-only scan
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;
                     QUERY PLAN                    
 --------------------------------------------------
  Aggregate
-   ->  Index Scan using inetidx on inettmp
+   ->  Index Only Scan using inetidx on inettmp
          Index Cond: (a = '89.225.196.191'::inet)
 (3 rows)
 
@@ -83,13 +94,13 @@ SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;
 
 DROP INDEX inetidx;
 CREATE INDEX ON inettmp USING gist (a gist_inet_ops, a inet_ops);
--- likewise here (checks for core planner bug)
+-- checks for core planner bug
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;
-                     QUERY PLAN                     
-----------------------------------------------------
+                       QUERY PLAN                        
+---------------------------------------------------------
  Aggregate
-   ->  Index Scan using inettmp_a_a1_idx on inettmp
+   ->  Index Only Scan using inettmp_a_a1_idx on inettmp
          Index Cond: (a = '89.225.196.191'::inet)
 (3 rows)
 
diff --git a/contrib/btree_gist/sql/cidr.sql b/contrib/btree_gist/sql/cidr.sql
index 9bd77185b9..c867038839 100644
--- a/contrib/btree_gist/sql/cidr.sql
+++ b/contrib/btree_gist/sql/cidr.sql
@@ -15,6 +15,8 @@ SELECT count(*) FROM cidrtmp WHERE a >= '121.111.63.82';
 
 SELECT count(*) FROM cidrtmp WHERE a >  '121.111.63.82';
 
+SELECT count(*) FROM cidrtmp WHERE a >  '255.255.1.0/25';
+
 CREATE INDEX cidridx ON cidrtmp USING gist ( a );
 
 SET enable_seqscan=off;
@@ -28,3 +30,5 @@ SELECT count(*) FROM cidrtmp WHERE a  = '121.111.63.82'::cidr;
 SELECT count(*) FROM cidrtmp WHERE a >= '121.111.63.82'::cidr;
 
 SELECT count(*) FROM cidrtmp WHERE a >  '121.111.63.82'::cidr;
+
+SELECT count(*) FROM cidrtmp WHERE a >  '255.255.1.0/25'::cidr;
diff --git a/contrib/btree_gist/sql/inet.sql b/contrib/btree_gist/sql/inet.sql
index 4b8d354b00..ab2f17a048 100644
--- a/contrib/btree_gist/sql/inet.sql
+++ b/contrib/btree_gist/sql/inet.sql
@@ -16,6 +16,8 @@ SELECT count(*) FROM inettmp WHERE a >= '89.225.196.191';
 
 SELECT count(*) FROM inettmp WHERE a >  '89.225.196.191';
 
+SELECT count(*) FROM inettmp WHERE a >  '255.255.1.0/25';
+
 CREATE INDEX inetidx ON inettmp USING gist ( a );
 
 SET enable_seqscan=off;
@@ -30,9 +32,10 @@ SELECT count(*) FROM inettmp WHERE a >= '89.225.196.191'::inet;
 
 SELECT count(*) FROM inettmp WHERE a >  '89.225.196.191'::inet;
 
+SELECT count(*) FROM inettmp WHERE a >  '255.255.1.0/25'::inet;
+
 VACUUM ANALYZE inettmp;
 
--- gist_inet_ops lacks a fetch function, so this should not be index-only scan
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;
 
@@ -42,7 +45,7 @@ DROP INDEX inetidx;
 
 CREATE INDEX ON inettmp USING gist (a gist_inet_ops, a inet_ops);
 
--- likewise here (checks for core planner bug)
+-- checks for core planner bug
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;
 

Reply via email to