On 30.01.2017 12:04, Kyotaro HORIGUCHI wrote:
Hello,
At Mon, 30 Jan 2017 07:12:03 +0300, Nikita Glukhov <n.glu...@postgrespro.ru>
wrote
in <9ea5b157-478c-8874-bc9b-050076b7d...@postgrespro.ru>:
Working on the tests for new SP-GiST opclasses for polygons and
circles, I've found a bug in the SP-GiST box_ops (added in 9.6): some operators
(&<, &>, $<|, |&>) have wrong tests in spg_box_quad_inner_consistent().
This obviously leads to incorrect results of a SP-GiST index scan (see tests
in the attached patch, their results were taken from a sequential scan).
Your problem is not necessarily evident for others. Perhaps you
have to provide an explanation and/or a test case that describes
what is wrong for you so that others can get a clue for this
problem. Simpler test is better.
The problem is that there are mixed low/high values for x/y axes. For example,
overLeft4D() compares x-RangeBox rect_box->range_box_x with y-Range
&query->right,
and also lower2D() here must use query->high instead of query->low.
The corresponding test is very simple: insert 10000 nearly arbitrary boxes and
see the difference between results of sequential scan and results of index scan.
regression=# CREATE TEMPORARY TABLE quad_box_tbl (b box);
regression=# INSERT INTO quad_box_tbl
SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
FROM generate_series(1, 100) x,
generate_series(1, 100) y;
regression=# CREATE INDEX quad_box_tbl_idx ON quad_box_tbl USING spgist(b);
regression=# SET enable_seqscan = ON;
regression=# SET enable_indexscan = OFF;
regression=# SET enable_bitmapscan = OFF;
regression=# SELECT count(*) FROM quad_box_tbl WHERE b &< box
'((100,200),(300,500))';
count
-------
2900
(1 row)
regression=# SELECT count(*) FROM quad_box_tbl WHERE b &> box
'((100,200),(300,500))';
count
-------
9100
(1 row)
regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<| box
'((100,200),(300,500))';
count
-------
4900
(1 row)
regression=# SELECT count(*) FROM quad_box_tbl WHERE b |&> box
'((100,200),(300,500))';
count
-------
8100
(1 row)
regression=# SET enable_seqscan = OFF;
regression=# SET enable_indexscan = ON;
regression=# SET enable_bitmapscan = ON;
regression=# SELECT count(*) FROM quad_box_tbl WHERE b &< box
'((100,200),(300,500))';
count
-------
2430
(1 row)
regression=# SELECT count(*) FROM quad_box_tbl WHERE b &> box
'((100,200),(300,500))';
count
-------
6208
(1 row)
regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<| box
'((100,200),(300,500))';
count
-------
1048
(1 row)
regression=# SELECT count(*) FROM quad_box_tbl WHERE b |&> box
'((100,200),(300,500))';
count
-------
5084
(1 row)
The test,
| +INSERT INTO quad_box_tbl
| + SELECT i, box(point(x, y), point(x + w, y + h))
| + FROM (SELECT i,
| + random() * 1000 as x, random() * 1000 as y,
| + random() * 20 as w, random() * 20 as h
is inserting quad_boxes generated using random numbers then,
| +SELECT count(*) FROM quad_box_tbl WHERE b << box '((100,200),(300,500))';
| + count
| +-------
| + 891
counting them in this way is unstable. Even though this were
stable due to a fixed initial, this would be unacceptable, I
think. This kind of test should use nonrandom numbers.
I have replaced random data in this test with stable uniformly distributed data.
I would also like to use existing box data from rect.data that is loaded to
slow_emp4000 table in copy.sql test, but box.sql test is executed before
copy.sql.
Even though I don't understand this in depth, the following
change seems somewhat wrong in direction. Changing the second
argument type seems breaking the basis of the design.
| -lower2D(RangeBox *range_box, Range *query)
| +lower2D(RangeBox *range_box, double query)
Maybe. I also thought that these changes are quite doubtful, so I decided to
replace lowerEqual2D()/higherEqual2D() with overlower2D()/overhigher2D() and
not to touch lower2D()/higher2D().
--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/utils/adt/geo_spgist.c b/src/backend/utils/adt/geo_spgist.c
index aacb340..2dc5496 100644
--- a/src/backend/utils/adt/geo_spgist.c
+++ b/src/backend/utils/adt/geo_spgist.c
@@ -286,6 +286,14 @@ lower2D(RangeBox *range_box, Range *query)
FPlt(range_box->right.low, query->low);
}
+/* Can any range from range_box to be overlower than this argument? */
+static bool
+overlower2D(RangeBox *range_box, Range *query)
+{
+ return FPle(range_box->left.low, query->high) &&
+ FPle(range_box->right.low, query->high);
+}
+
/* Can any range from range_box to be higher than this argument? */
static bool
higher2D(RangeBox *range_box, Range *query)
@@ -294,6 +302,14 @@ higher2D(RangeBox *range_box, Range *query)
FPgt(range_box->right.high, query->high);
}
+/* Can any range from range_box to be overhigher than this argument? */
+static bool
+overhigher2D(RangeBox *range_box, Range *query)
+{
+ return FPge(range_box->left.high, query->low) &&
+ FPge(range_box->right.high, query->low);
+}
+
/* Can any rectangle from rect_box be left of this argument? */
static bool
left4D(RectBox *rect_box, RangeBox *query)
@@ -305,7 +321,7 @@ left4D(RectBox *rect_box, RangeBox *query)
static bool
overLeft4D(RectBox *rect_box, RangeBox *query)
{
- return lower2D(&rect_box->range_box_x, &query->right);
+ return overlower2D(&rect_box->range_box_x, &query->left);
}
/* Can any rectangle from rect_box be right of this argument? */
@@ -319,7 +335,7 @@ right4D(RectBox *rect_box, RangeBox *query)
static bool
overRight4D(RectBox *rect_box, RangeBox *query)
{
- return higher2D(&rect_box->range_box_x, &query->right);
+ return overhigher2D(&rect_box->range_box_x, &query->left);
}
/* Can any rectangle from rect_box be below of this argument? */
@@ -333,7 +349,7 @@ below4D(RectBox *rect_box, RangeBox *query)
static bool
overBelow4D(RectBox *rect_box, RangeBox *query)
{
- return lower2D(&rect_box->range_box_y, &query->left);
+ return overlower2D(&rect_box->range_box_y, &query->right);
}
/* Can any rectangle from rect_box be above of this argument? */
@@ -347,7 +363,7 @@ above4D(RectBox *rect_box, RangeBox *query)
static bool
overAbove4D(RectBox *rect_box, RangeBox *query)
{
- return higher2D(&rect_box->range_box_y, &query->right);
+ return overhigher2D(&rect_box->range_box_y, &query->right);
}
/*
diff --git a/src/test/regress/expected/box.out b/src/test/regress/expected/box.out
index 5f8b945..251df93 100644
--- a/src/test/regress/expected/box.out
+++ b/src/test/regress/expected/box.out
@@ -455,3 +455,108 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
RESET enable_seqscan;
DROP INDEX box_spgist;
+--
+-- Test the SP-GiST index on the larger volume of data
+--
+CREATE TEMPORARY TABLE quad_box_tbl (b box);
+INSERT INTO quad_box_tbl
+ SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
+ FROM generate_series(1, 100) x,
+ generate_series(1, 100) y;
+-- insert repeating data to test allTheSame
+INSERT INTO quad_box_tbl
+ SELECT '((200, 300),(210, 310))'
+ FROM generate_series(1, 1000);
+INSERT INTO quad_box_tbl
+ VALUES
+ (NULL),
+ (NULL),
+ ('((-infinity,-infinity),(infinity,infinity))'),
+ ('((-infinity,100),(-infinity,500))'),
+ ('((-infinity,-infinity),(700,infinity))');
+CREATE INDEX quad_box_tbl_idx ON quad_box_tbl USING spgist(b);
+SET enable_seqscan = OFF;
+SET enable_indexscan = ON;
+SET enable_bitmapscan = ON;
+SELECT count(*) FROM quad_box_tbl WHERE b << box '((100,200),(300,500))';
+ count
+-------
+ 901
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b &< box '((100,200),(300,500))';
+ count
+-------
+ 3901
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b && box '((100,200),(300,500))';
+ count
+-------
+ 1653
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b &> box '((100,200),(300,500))';
+ count
+-------
+ 10100
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b >> box '((100,200),(300,500))';
+ count
+-------
+ 7000
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b >> box '((100,200),(300,500))';
+ count
+-------
+ 7000
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b <<| box '((100,200),(300,500))';
+ count
+-------
+ 1900
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b &<| box '((100,200),(300,500))';
+ count
+-------
+ 5901
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b |&> box '((100,200),(300,500))';
+ count
+-------
+ 9100
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b |>> box '((100,200),(300,500))';
+ count
+-------
+ 5000
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b @> box '((201,301),(202,303))';
+ count
+-------
+ 1003
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b <@ box '((100,200),(300,500))';
+ count
+-------
+ 1600
+(1 row)
+
+SELECT count(*) FROM quad_box_tbl WHERE b ~= box '((200,300),(205,305))';
+ count
+-------
+ 1
+(1 row)
+
+RESET enable_seqscan;
+RESET enable_indexscan;
+RESET enable_bitmapscan;
+DROP INDEX quad_box_tbl_idx;
diff --git a/src/test/regress/sql/box.sql b/src/test/regress/sql/box.sql
index 128a016..37f05cb 100644
--- a/src/test/regress/sql/box.sql
+++ b/src/test/regress/sql/box.sql
@@ -179,3 +179,52 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
RESET enable_seqscan;
DROP INDEX box_spgist;
+
+--
+-- Test the SP-GiST index on the larger volume of data
+--
+CREATE TEMPORARY TABLE quad_box_tbl (b box);
+
+INSERT INTO quad_box_tbl
+ SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
+ FROM generate_series(1, 100) x,
+ generate_series(1, 100) y;
+
+-- insert repeating data to test allTheSame
+INSERT INTO quad_box_tbl
+ SELECT '((200, 300),(210, 310))'
+ FROM generate_series(1, 1000);
+
+INSERT INTO quad_box_tbl
+ VALUES
+ (NULL),
+ (NULL),
+ ('((-infinity,-infinity),(infinity,infinity))'),
+ ('((-infinity,100),(-infinity,500))'),
+ ('((-infinity,-infinity),(700,infinity))');
+
+CREATE INDEX quad_box_tbl_idx ON quad_box_tbl USING spgist(b);
+
+SET enable_seqscan = OFF;
+SET enable_indexscan = ON;
+SET enable_bitmapscan = ON;
+
+SELECT count(*) FROM quad_box_tbl WHERE b << box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b &< box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b && box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b &> box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b >> box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b >> box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b <<| box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b &<| box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b |&> box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b |>> box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b @> box '((201,301),(202,303))';
+SELECT count(*) FROM quad_box_tbl WHERE b <@ box '((100,200),(300,500))';
+SELECT count(*) FROM quad_box_tbl WHERE b ~= box '((200,300),(205,305))';
+
+RESET enable_seqscan;
+RESET enable_indexscan;
+RESET enable_bitmapscan;
+
+DROP INDEX quad_box_tbl_idx;
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers