Hi!

On 29.06.2019 1:23, Julien Rouhaud wrote:
But that kinda resembles stuff we already have - selectivity/cost. So
why shouldn't this be considered as part of costing?
Yeah, I'm not entirely convinced that we need anything new here.
The cost estimate function can detect such situations, and so can
the index AM at scan start --- for example, btree checks for
contradictory quals at scan start.  There's a certain amount of
duplicative effort involved there perhaps, but you also have to
keep in mind that we don't know the values of run-time-determined
comparison values until scan start.  So if you want certainty rather
than just a cost estimate, you may have to do these sorts of checks
at scan start.
Ah, I didn't know about _bt_preprocess_keys().  I'm not familiar with
this code, so please bear with me.  IIUC the idea would be to add
additional logic in gingetbitmap() / ginNewScanKey() to drop some
quals at runtime.  But that would mean that additional logic would
also be required in BitmapHeapScan, or that all the returned bitmap
should be artificially marked as lossy to enforce a recheck?


We have a similar solution for this problem.  The idea is to avoid full index
scan inside GIN itself when we have some GIN entries, and forcibly recheck
all tuples if triconsistent() returns GIN_MAYBE for the keys that emitted no
GIN entries.

The attached patch in its current shape contain at least two ugly places:

1. We still need to initialize empty scan key to call triconsistent(), but
   then we have to remove it from the list of scan keys.  Simple refactoring
   of ginFillScanKey() can be helpful here.
2. We need to replace GIN_SEARCH_MODE_EVERYTHING with GIN_SEARCH_MODE_ALL
   if there are no GIN entries and some key requested GIN_SEARCH_MODE_ALL
   because we need to skip NULLs in GIN_SEARCH_MODE_ALL.  Simplest example here
   is "array @> '{}'": triconsistent() returns GIN_TRUE, recheck is not forced,
   and GIN_SEARCH_MODE_EVERYTHING returns NULLs that are not rechecked.  Maybe
   it would be better to introduce new GIN_SEARCH_MODE_EVERYTHING_NON_NULL.



Example:

CREATE TABLE test AS SELECT i::text AS t FROM generate_series(0, 999999) i;

CREATE INDEX ON test USING gin (t gin_trgm_ops);

-- master
EXPLAIN ANALYZE SELECT * FROM test WHERE LIKE '%1234%' AND t LIKE '%1%';
                                                          QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test  (cost=11777.99..16421.73 rows=7999 width=32) (actual 
time=65.431..65.857 rows=300 loops=1)
   Recheck Cond: ((t ~~ '%1234%'::text) AND (t ~~ '%1%'::text))
   Rows Removed by Index Recheck: 2
   Heap Blocks: exact=114
   ->  Bitmap Index Scan on test_t_idx  (cost=0.00..11775.99 rows=7999 width=0) 
(actual time=65.380..65.380 rows=302 loops=1)
         Index Cond: ((t ~~ '%1234%'::text) AND (t ~~ '%1%'::text))
 Planning Time: 0.151 ms
 Execution Time: 65.900 ms
(8 rows)


-- patched
EXPLAIN ANALYZE SELECT * FROM test WHERE t LIKE '%1234%' AND t LIKE '%1%';
                                                      QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test  (cost=20.43..176.79 rows=42 width=6) (actual 
time=0.287..0.424 rows=300 loops=1)
   Recheck Cond: ((t ~~ '%1234%'::text) AND (t ~~ '%1%'::text))
   Rows Removed by Index Recheck: 2
   Heap Blocks: exact=114
   ->  Bitmap Index Scan on test_t_idx  (cost=0.00..20.42 rows=42 width=0) 
(actual time=0.271..0.271 rows=302 loops=1)
         Index Cond: ((t ~~ '%1234%'::text) AND (t ~~ '%1%'::text))
 Planning Time: 0.080 ms
 Execution Time: 0.450 ms
(8 rows)

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

>From 6e54ed98501e98ee0608702ce2e4adc56fcf354a Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Fri, 28 Jun 2019 23:18:39 +0300
Subject: [PATCH] Avoid full GIN index scan

---
 src/backend/access/gin/ginget.c  |  2 ++
 src/backend/access/gin/ginscan.c | 24 +++++++++++++++++++++++-
 src/backend/utils/adt/selfuncs.c | 12 +++++++++++-
 src/include/access/gin_private.h |  1 +
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index b18ae2b..317fa1f 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -1891,6 +1891,8 @@ gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 		if (!scanGetItem(scan, iptr, &iptr, &recheck))
 			break;
 
+		recheck |= so->forcedRecheck;
+
 		if (ItemPointerIsLossyPage(&iptr))
 			tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
 		else
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index 74d9821..a32a6ff 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -141,7 +141,8 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum,
 	uint32		i;
 
 	/* Non-default search modes add one "hidden" entry to each key */
-	if (searchMode != GIN_SEARCH_MODE_DEFAULT)
+	if (searchMode != GIN_SEARCH_MODE_DEFAULT &&
+		(searchMode != GIN_SEARCH_MODE_ALL || nQueryValues))
 		nQueryValues++;
 	key->nentries = nQueryValues;
 	key->nuserentries = nUserQueryValues;
@@ -265,6 +266,7 @@ ginNewScanKey(IndexScanDesc scan)
 	GinScanOpaque so = (GinScanOpaque) scan->opaque;
 	int			i;
 	bool		hasNullQuery = false;
+	bool		hasSearchAllMode = false;
 	MemoryContext oldCtx;
 
 	/*
@@ -286,6 +288,7 @@ ginNewScanKey(IndexScanDesc scan)
 		palloc(so->allocentries * sizeof(GinScanEntry));
 
 	so->isVoidRes = false;
+	so->forcedRecheck = false;
 
 	for (i = 0; i < scan->numberOfKeys; i++)
 	{
@@ -371,6 +374,18 @@ ginNewScanKey(IndexScanDesc scan)
 					   skey->sk_argument, nQueryValues,
 					   queryValues, categories,
 					   partial_matches, extra_data);
+
+		if (searchMode == GIN_SEARCH_MODE_ALL && nQueryValues <= 0)
+		{
+			/*
+			 * Don't emit ALL key with no entries, check only whether
+			 * unconditional recheck is needed.
+			 */
+			GinScanKey	key = &so->keys[--so->nkeys];
+
+			hasSearchAllMode = true;
+			so->forcedRecheck = key->triConsistentFn(key) != GIN_TRUE;
+		}
 	}
 
 	/*
@@ -384,6 +399,13 @@ ginNewScanKey(IndexScanDesc scan)
 					   InvalidStrategy, GIN_SEARCH_MODE_EVERYTHING,
 					   (Datum) 0, 0,
 					   NULL, NULL, NULL, NULL);
+
+		/*
+		 * XXX Need to use ALL mode instead of EVERYTHING to skip NULLs if ALL
+		 * mode has been seen.
+		 */
+		if (hasSearchAllMode)
+			so->keys[so->nkeys - 1].scanEntry[0]->searchMode = GIN_SEARCH_MODE_ALL;
 	}
 
 	/*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d7e3f09..e54c0db 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6258,6 +6258,16 @@ gincost_pattern(IndexOptInfo *index, int indexcol,
 		return false;
 	}
 
+	if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_ALL)
+	{
+		/*
+		 * GIN does not emit scan entries for empty GIN_SEARCH_MODE_ALL keys,
+		 * and it can avoid full index scan if there are entries from other
+		 * keys, so we can skip setting of 'haveFullScan' flag.
+		 */
+		return true;
+	}
+
 	for (i = 0; i < nentries; i++)
 	{
 		/*
@@ -6641,7 +6651,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 		return;
 	}
 
-	if (counts.haveFullScan || indexQuals == NIL)
+	if (counts.haveFullScan || indexQuals == NIL || counts.searchEntries <= 0)
 	{
 		/*
 		 * Full index scan will be required.  We treat this as if every key in
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index afb3e15..369b1da 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -359,6 +359,7 @@ typedef struct GinScanOpaqueData
 	MemoryContext keyCtx;		/* used to hold key and entry data */
 
 	bool		isVoidRes;		/* true if query is unsatisfiable */
+	bool		forcedRecheck;	/* recheck all returned tuples */
 } GinScanOpaqueData;
 
 typedef GinScanOpaqueData *GinScanOpaque;
-- 
2.7.4

Reply via email to