On 30.01.2017 12:04, Kyotaro HORIGUCHI wrote:
Hello,

At Mon, 30 Jan 2017 07:12:03 +0300, Nikita Glukhov <n.glu...@postgrespro.ru> 
wrote
in <9ea5b157-478c-8874-bc9b-050076b7d...@postgrespro.ru>:
Working on the tests for new SP-GiST opclasses for polygons and
circles, I've found a bug in the SP-GiST box_ops (added in 9.6): some operators
(&<, &>, $<|, |&>) have wrong tests in spg_box_quad_inner_consistent().
This obviously leads to incorrect results of a SP-GiST index scan (see tests
in the attached patch, their results were taken from a sequential scan).

Your problem is not necessarily evident for others.  Perhaps you
have to provide an explanation and/or a test case that describes
what is wrong for you so that others can get a clue for this
problem. Simpler test is better.

The problem is that there are mixed low/high values for x/y axes.  For example,
overLeft4D() compares x-RangeBox rect_box->range_box_x with y-Range 
&query->right,
and also lower2D() here must use query->high instead of query->low.

The corresponding test is very simple: insert 10000 nearly arbitrary boxes and
see the difference between results of sequential scan and results of index scan.

regression=# CREATE TEMPORARY TABLE quad_box_tbl (b box);

regression=# INSERT INTO quad_box_tbl
        SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
        FROM generate_series(1, 100) x,
                 generate_series(1, 100) y;

regression=# CREATE INDEX quad_box_tbl_idx ON quad_box_tbl USING spgist(b);

regression=# SET enable_seqscan = ON;
regression=# SET enable_indexscan = OFF;
regression=# SET enable_bitmapscan = OFF;

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<  box 
'((100,200),(300,500))';
 count
-------
  2900
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &>  box 
'((100,200),(300,500))';
 count
-------
  9100
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<| box 
'((100,200),(300,500))';
 count
-------
  4900
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b |&> box 
'((100,200),(300,500))';
 count
-------
  8100
(1 row)

regression=# SET enable_seqscan = OFF;
regression=# SET enable_indexscan = ON;
regression=# SET enable_bitmapscan = ON;

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<  box 
'((100,200),(300,500))';
 count
-------
  2430
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &>  box 
'((100,200),(300,500))';
 count
-------
  6208
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<| box 
'((100,200),(300,500))';
 count
-------
  1048
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b |&> box 
'((100,200),(300,500))';
 count
-------
  5084
(1 row)


The test,

| +INSERT INTO quad_box_tbl
| +     SELECT i, box(point(x, y), point(x + w, y + h))
| +     FROM (SELECT i,
| +                             random() * 1000 as x, random() * 1000 as y,
| +                             random() * 20 as w, random() * 20 as h

is inserting quad_boxes generated using random numbers then,

| +SELECT count(*) FROM quad_box_tbl WHERE b <<  box '((100,200),(300,500))';
| + count
| +-------
| +   891

counting them in this way is unstable. Even though this were
stable due to a fixed initial, this would be unacceptable, I
think. This kind of test should use nonrandom numbers.

I have replaced random data in this test with stable uniformly distributed data.
I would also like to use existing box data from rect.data that is loaded to
slow_emp4000 table in copy.sql test, but box.sql test is executed before 
copy.sql.


Even though I don't understand this in depth, the following
change seems somewhat wrong in direction. Changing the second
argument type seems breaking the basis of the design.

| -lower2D(RangeBox *range_box, Range *query)
| +lower2D(RangeBox *range_box, double query)

Maybe.  I also thought that these changes are quite doubtful, so I decided to
replace lowerEqual2D()/higherEqual2D() with overlower2D()/overhigher2D() and
not to touch lower2D()/higher2D().

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/utils/adt/geo_spgist.c b/src/backend/utils/adt/geo_spgist.c
index aacb340..2dc5496 100644
--- a/src/backend/utils/adt/geo_spgist.c
+++ b/src/backend/utils/adt/geo_spgist.c
@@ -286,6 +286,14 @@ lower2D(RangeBox *range_box, Range *query)
 		FPlt(range_box->right.low, query->low);
 }
 
+/* Can any range from range_box to be overlower than this argument? */
+static bool
+overlower2D(RangeBox *range_box, Range *query)
+{
+	return FPle(range_box->left.low, query->high) &&
+		FPle(range_box->right.low, query->high);
+}
+
 /* Can any range from range_box to be higher than this argument? */
 static bool
 higher2D(RangeBox *range_box, Range *query)
@@ -294,6 +302,14 @@ higher2D(RangeBox *range_box, Range *query)
 		FPgt(range_box->right.high, query->high);
 }
 
+/* Can any range from range_box to be overhigher than this argument? */
+static bool
+overhigher2D(RangeBox *range_box, Range *query)
+{
+	return FPge(range_box->left.high, query->low) &&
+		FPge(range_box->right.high, query->low);
+}
+
 /* Can any rectangle from rect_box be left of this argument? */
 static bool
 left4D(RectBox *rect_box, RangeBox *query)
@@ -305,7 +321,7 @@ left4D(RectBox *rect_box, RangeBox *query)
 static bool
 overLeft4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(&rect_box->range_box_x, &query->right);
+	return overlower2D(&rect_box->range_box_x, &query->left);
 }
 
 /* Can any rectangle from rect_box be right of this argument? */
@@ -319,7 +335,7 @@ right4D(RectBox *rect_box, RangeBox *query)
 static bool
 overRight4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(&rect_box->range_box_x, &query->right);
+	return overhigher2D(&rect_box->range_box_x, &query->left);
 }
 
 /* Can any rectangle from rect_box be below of this argument? */
@@ -333,7 +349,7 @@ below4D(RectBox *rect_box, RangeBox *query)
 static bool
 overBelow4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(&rect_box->range_box_y, &query->left);
+	return overlower2D(&rect_box->range_box_y, &query->right);
 }
 
 /* Can any rectangle from rect_box be above of this argument? */
@@ -347,7 +363,7 @@ above4D(RectBox *rect_box, RangeBox *query)
 static bool
 overAbove4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(&rect_box->range_box_y, &query->right);
+	return overhigher2D(&rect_box->range_box_y, &query->right);
 }
 
 /*
diff --git a/src/test/regress/expected/box.out b/src/test/regress/expected/box.out
index 5f8b945..251df93 100644
--- a/src/test/regress/expected/box.out
+++ b/src/test/regress/expected/box.out
@@ -455,3 +455,108 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
 
 RESET enable_seqscan;
 DROP INDEX box_spgist;
+--
+-- Test the SP-GiST index on the larger volume of data
+--
+CREATE TEMPORARY TABLE quad_box_tbl (b box);
+INSERT INTO quad_box_tbl
+	SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
+	FROM generate_series(1, 100) x,
+		 generate_series(1, 100) y;
+-- insert repeating data to test allTheSame
+INSERT INTO quad_box_tbl
+	SELECT '((200, 300),(210, 310))'
+	FROM generate_series(1, 1000);
+INSERT INTO quad_box_tbl
+	VALUES
+		(NULL),
+		(NULL),
+		('((-infinity,-infinity),(infinity,infinity))'),
+		('((-infinity,100),(-infinity,500))'),
+		('((-infinity,-infinity),(700,infinity))');
+CREATE INDEX quad_box_tbl_idx ON quad_box_tbl USING spgist(b);
+SET enable_seqscan = OFF;
+SET enable_indexscan = ON;
+SET enable_bitmapscan = ON;
+SELECT count(*) FROM quad_box_tbl WHERE b <<  box '((100,200),(300,500))';
+ count 
+-------
+   901
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b &<  box '((100,200),(300,500))';
+ count 
+-------
+  3901
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b &&  box '((100,200),(300,500))';
+ count 
+-------
+  1653
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b &>  box '((100,200),(300,500))';
+ count 
+-------
+ 10100
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b >>  box '((100,200),(300,500))';
+ count 
+-------
+  7000
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b >>  box '((100,200),(300,500))';
+ count 
+-------
+  7000
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b <<| box '((100,200),(300,500))';
+ count 
+-------
+  1900
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b &<| box '((100,200),(300,500))';
+ count 
+-------
+  5901
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b |&> box '((100,200),(300,500))';
+ count 
+-------
+  9100
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b |>> box '((100,200),(300,500))';
+ count 
+-------
+  5000
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b @>  box '((201,301),(202,303))';
+ count 
+-------
+  1003
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b <@  box '((100,200),(300,500))';
+ count 
+-------
+  1600
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b ~=  box '((200,300),(205,305))';
+ count 
+-------
+     1
+(1 row)
+
+RESET enable_seqscan;
+RESET enable_indexscan;
+RESET enable_bitmapscan;
+DROP INDEX quad_box_tbl_idx;
diff --git a/src/test/regress/sql/box.sql b/src/test/regress/sql/box.sql
index 128a016..37f05cb 100644
--- a/src/test/regress/sql/box.sql
+++ b/src/test/regress/sql/box.sql
@@ -179,3 +179,52 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
 RESET enable_seqscan;
 
 DROP INDEX box_spgist;
+
+--
+-- Test the SP-GiST index on the larger volume of data
+--
+CREATE TEMPORARY TABLE quad_box_tbl (b box);
+
+INSERT INTO quad_box_tbl
+	SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
+	FROM generate_series(1, 100) x,
+		 generate_series(1, 100) y;
+
+-- insert repeating data to test allTheSame
+INSERT INTO quad_box_tbl
+	SELECT '((200, 300),(210, 310))'
+	FROM generate_series(1, 1000);
+
+INSERT INTO quad_box_tbl
+	VALUES
+		(NULL),
+		(NULL),
+		('((-infinity,-infinity),(infinity,infinity))'),
+		('((-infinity,100),(-infinity,500))'),
+		('((-infinity,-infinity),(700,infinity))');
+
+CREATE INDEX quad_box_tbl_idx ON quad_box_tbl USING spgist(b);
+
+SET enable_seqscan = OFF;
+SET enable_indexscan = ON;
+SET enable_bitmapscan = ON;
+
+SELECT count(*) FROM quad_box_tbl WHERE b <<  box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b &<  box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b &&  box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b &>  box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b >>  box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b >>  box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b <<| box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b &<| box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b |&> box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b |>> box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b @>  box '((201,301),(202,303))';
+SELECT count(*) FROM quad_box_tbl WHERE b <@  box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b ~=  box '((200,300),(205,305))';
+
+RESET enable_seqscan;
+RESET enable_indexscan;
+RESET enable_bitmapscan;
+
+DROP INDEX quad_box_tbl_idx;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to