On 01/30/2014 01:53 AM, Tomas Vondra wrote:
(3) A file with explain plans for 4 queries suffering ~2x slowdown,
     and explain plans with 9.4 master and Heikki's patches is available
     here:

       http://www.fuzzy.cz/tmp/gin/queries.txt

     All the queries have 6 common words, and the explain plans look
     just fine to me - exactly like the plans for other queries.

     Two things now caught my eye. First some of these queries actually
     have words repeated - either exactly like "database & database" or
     in negated form like "!anything & anything". Second, while
     generating the queries, I use "dumb" frequency, where only exact
     matches count. I.e. "write != written" etc. But the actual number
     of hits may be much higher - for example "write" matches exactly
     just 5% documents, but using @@ it matches more than 20%.

     I don't know if that's the actual cause though.

Ok, here's another variant of these patches. Compared to git master, it does three things:

1. It adds the concept of ternary consistent function internally, but no
catalog changes. It's implemented by calling the regular boolean consistent function "both ways".

2. Use a binary heap to get the "next" item from the entries in a scan. I'm pretty sure this makes sense, because arguably it makes the code more readable, and reduces the number of item pointer comparisons significantly for queries with a lot of entries.

3. Only perform the pre-consistent check to try skipping entries, if we don't already have the next item from the entry loaded in the array. This is a tradeoff, you will lose some of the performance gain you might get from pre-consistent checks, but it also limits the performance loss you might get from doing useless pre-consistent checks.

So taken together, I would expect this patch to make some of the performance gains less impressive, but also limit the loss we saw with some of the other patches.

Tomas, could you run your test suite with this patch, please?

- Heikki

diff --git a/src/backend/access/gin/Makefile b/src/backend/access/gin/Makefile
index aabc62f..db4f496 100644
--- a/src/backend/access/gin/Makefile
+++ b/src/backend/access/gin/Makefile
@@ -14,6 +14,6 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
 	ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
-	ginbulk.o ginfast.o ginpostinglist.o
+	ginbulk.o ginfast.o ginpostinglist.o ginlogic.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index a45d722..f2ea962 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -32,41 +32,6 @@ typedef struct pendingPosition
 	bool	   *hasMatchKey;
 } pendingPosition;
 
-
-/*
- * Convenience function for invoking a key's consistentFn
- */
-static bool
-callConsistentFn(GinState *ginstate, GinScanKey key)
-{
-	/*
-	 * If we're dealing with a dummy EVERYTHING key, we don't want to call the
-	 * consistentFn; just claim it matches.
-	 */
-	if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
-	{
-		key->recheckCurItem = false;
-		return true;
-	}
-
-	/*
-	 * Initialize recheckCurItem in case the consistentFn doesn't know it
-	 * should set it.  The safe assumption in that case is to force recheck.
-	 */
-	key->recheckCurItem = true;
-
-	return DatumGetBool(FunctionCall8Coll(&ginstate->consistentFn[key->attnum - 1],
-								 ginstate->supportCollation[key->attnum - 1],
-										  PointerGetDatum(key->entryRes),
-										  UInt16GetDatum(key->strategy),
-										  key->query,
-										  UInt32GetDatum(key->nuserentries),
-										  PointerGetDatum(key->extra_data),
-									   PointerGetDatum(&key->recheckCurItem),
-										  PointerGetDatum(key->queryValues),
-									 PointerGetDatum(key->queryCategories)));
-}
-
 /*
  * Goes to the next page if current offset is outside of bounds
  */
@@ -453,13 +418,31 @@ restartScanEntry:
 	freeGinBtreeStack(stackEntry);
 }
 
+static int
+entryCmp(Datum a, Datum b, void *arg)
+{
+	GinScanEntry ea = (GinScanEntry) DatumGetPointer(a);
+	GinScanEntry eb = (GinScanEntry) DatumGetPointer(b);
+	return  -ginCompareItemPointers(&ea->curItem, &eb->curItem);
+}
+
 static void
 startScanKey(GinState *ginstate, GinScanKey key)
 {
+	int			i;
 	ItemPointerSetMin(&key->curItem);
 	key->curItemMatches = false;
 	key->recheckCurItem = false;
 	key->isFinished = false;
+
+	GinInitConsistentMethod(ginstate, key);
+
+	key->entryHeap = binaryheap_allocate(key->nentries, entryCmp, NULL);
+	for (i = 0; i < key->nentries; i++)
+		binaryheap_add(key->entryHeap, PointerGetDatum(key->scanEntry[i]));
+
+	key->nunloaded = 0;
+	key->unloadedEntries = palloc(key->nentries * sizeof(GinScanEntry));
 }
 
 static void
@@ -649,6 +632,11 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
  *
  * Item pointers are returned in ascending order.
  *
+ * If 'ifcheap' is passed as TRUE, the function only advances curItem if it's
+ * relatively cheap to do so.  In the current implementation, cheap means that
+ * the next item loaded in the entry->list array. Returns TRUE if curItem was
+ * advanced, FALSE otherwise.
+ *
  * Note: this can return a "lossy page" item pointer, indicating that the
  * entry potentially matches all items on that heap page.  However, it is
  * not allowed to return both a lossy page pointer and exact (regular)
@@ -656,9 +644,9 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
  * current implementation this is guaranteed by the behavior of tidbitmaps.
  */
-static void
+static bool
 entryGetItem(GinState *ginstate, GinScanEntry entry,
-			 ItemPointerData advancePast)
+			 ItemPointerData advancePast, bool ifcheap)
 {
 	Assert(!entry->isFinished);
 
@@ -768,12 +756,15 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
 			/* If we've processed the current batch, load more items */
 			while (entry->offset >= entry->nlist)
 			{
+				if (ifcheap)
+					return false;
+
 				entryLoadMoreItems(ginstate, entry, advancePast);
 
 				if (entry->isFinished)
 				{
 					ItemPointerSetInvalid(&entry->curItem);
-					return;
+					return true;
 				}
 			}
 
@@ -782,6 +773,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
 		} while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0 ||
 				 (entry->reduceResult == TRUE && dropItem(entry)));
 	}
+	return true;
 }
 
 /*
@@ -812,180 +804,291 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
 	ItemPointerData minItem;
 	ItemPointerData curPageLossy;
 	uint32		i;
-	uint32		lossyEntry;
 	bool		haveLossyEntry;
 	GinScanEntry entry;
-	bool		res;
 	MemoryContext oldCtx;
-	bool		allFinished;
+	bool		gotItem;
+	GinLogicValue res;
 
 	Assert(!key->isFinished);
 
 	/*
 	 * We might have already tested this item; if so, no need to repeat work.
-	 * (Note: the ">" case can happen, if minItem is exact but we previously
+	 * (Note: the ">" case can happen, if advancePast is exact but we previously
 	 * had to set curItem to a lossy-page pointer.)
 	 */
 	if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
 		return;
 
-	/*
-	 * Find the minimum item > advancePast among the active entry streams.
-	 *
-	 * Note: a lossy-page entry is encoded by a ItemPointer with max value for
-	 * offset (0xffff), so that it will sort after any exact entries for the
-	 * same page.  So we'll prefer to return exact pointers not lossy
-	 * pointers, which is good.
-	 */
-	ItemPointerSetMax(&minItem);
-	allFinished = true;
-	for (i = 0; i < key->nentries; i++)
-	{
-		entry = key->scanEntry[i];
+	oldCtx = CurrentMemoryContext;
 
+	for (;;)
+	{
 		/*
-		 * Advance this stream if necessary.
+		 * Find the minimum item > advancePast among the active entry streams.
 		 *
-		 * In particular, since entry->curItem was initialized with
-		 * ItemPointerSetMin, this ensures we fetch the first item for each
-		 * entry on the first call.
+		 * Note: a lossy-page entry is encoded by a ItemPointer with max value
+		 * for offset (0xffff), so that it will sort after any exact entries
+		 * for the same page.  So we'll prefer to return exact pointers not
+		 * lossy pointers, which is good.
 		 */
-		while (entry->isFinished == FALSE &&
-			   ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+		ItemPointerSetMax(&minItem);
+		gotItem = false;
+		while (!binaryheap_empty(key->entryHeap))
 		{
-			entryGetItem(ginstate, entry, advancePast);
+			entry = (GinScanEntry) DatumGetPointer(binaryheap_first(key->entryHeap));
+			if (entry->isFinished)
+				binaryheap_remove_first(key->entryHeap);
+			else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+			{
+				/* Advance this entry, if it's cheap to do so */
+				if (entryGetItem(ginstate, entry, advancePast, true))
+					binaryheap_replace_first(key->entryHeap, PointerGetDatum(entry));
+				else
+				{
+					binaryheap_remove_first(key->entryHeap);
+					key->unloadedEntries[key->nunloaded++] = entry;
+				}
+			}
+			else
+			{
+				gotItem = true;
+				minItem = entry->curItem;
+				break;
+			}
 		}
 
-		if (!entry->isFinished)
+		/*
+		 * We must have an item from at least one source to have a match.
+		 * Fetch the next item > advancePast from one of the streams.
+		 */
+		if (!gotItem)
 		{
-			allFinished = FALSE;
-			if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
-				minItem = entry->curItem;
+			if (key->nunloaded == 0)
+			{
+				/* all entries are finished */
+				key->isFinished = TRUE;
+				return;
+			}
+
+			entry = key->unloadedEntries[--key->nunloaded];
+			entryGetItem(ginstate, entry, advancePast, false);
+			binaryheap_add(key->entryHeap, PointerGetDatum(entry));
+			continue;
 		}
-	}
 
-	if (allFinished)
-	{
-		/* all entries are finished */
-		key->isFinished = TRUE;
-		return;
-	}
+		/*
+		 * We now have minItem set to the minimum among input streams that
+		 * we know. Some streams might be in unknown state, meaning we don't
+		 * know the next value from that input.
+		 *
+		 * Determine if any items between advancePast and minItem might match.
+		 * Such items might come from one of the unknown sources, but it's
+		 * possible that the consistent function can refute them all, ie. 
+		 * the consistent logic says that they cannot match without any of the
+		 * sources that we have loaded.
+		 */
+		if (key->nunloaded > 0)
+		{
+			for (i = 0; i < key->nentries; i++)
+			{
+				entry = key->scanEntry[i];
+				if (entry->isFinished)
+					key->entryRes[i] = GIN_FALSE;
+				else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+				{
+					/* this source is 'unloaded' */
+					key->entryRes[i] = GIN_MAYBE;
+				}
+				else
+				{
+					/*
+					 * we know the next item from this source to be >= minItem,
+					 * hence it's false for any items before < minItem
+					 */
+					key->entryRes[i] = GIN_FALSE;
+				}
+			}
 
-	/*
-	 * OK, set key->curItem and perform consistentFn test.
-	 */
-	key->curItem = minItem;
+			MemoryContextSwitchTo(tempCtx);
+			res = key->triConsistentFn(key);
+			MemoryContextSwitchTo(oldCtx);
 
-	/*
-	 * Lossy-page entries pose a problem, since we don't know the correct
-	 * entryRes state to pass to the consistentFn, and we also don't know what
-	 * its combining logic will be (could be AND, OR, or even NOT). If the
-	 * logic is OR then the consistentFn might succeed for all items in the
-	 * lossy page even when none of the other entries match.
-	 *
-	 * If we have a single lossy-page entry then we check to see if the
-	 * consistentFn will succeed with only that entry TRUE.  If so, we return
-	 * a lossy-page pointer to indicate that the whole heap page must be
-	 * checked.  (On subsequent calls, we'll do nothing until minItem is past
-	 * the page altogether, thus ensuring that we never return both regular
-	 * and lossy pointers for the same page.)
-	 *
-	 * This idea could be generalized to more than one lossy-page entry, but
-	 * ideally lossy-page entries should be infrequent so it would seldom be
-	 * the case that we have more than one at once.  So it doesn't seem worth
-	 * the extra complexity to optimize that case. If we do find more than
-	 * one, we just punt and return a lossy-page pointer always.
-	 *
-	 * Note that only lossy-page entries pointing to the current item's page
-	 * should trigger this processing; we might have future lossy pages in the
-	 * entry array, but they aren't relevant yet.
-	 */
-	ItemPointerSetLossyPage(&curPageLossy,
-							GinItemPointerGetBlockNumber(&key->curItem));
+			if (res == GIN_FALSE)
+			{
+				/*
+				 * All items between advancePast and minItem have been refuted.
+				 * Proceed with minItem.
+				 */
+				advancePast = minItem;
+				advancePast.ip_posid--;
+			}
+			else
+			{
+				/*
+				 * There might be matches smaller than minItem coming from one
+				 * of the unknown sources. Load more items, and retry.
+				 */
+				entry = key->unloadedEntries[--key->nunloaded];
+				entryGetItem(ginstate, entry, advancePast, false);
+				binaryheap_add(key->entryHeap, PointerGetDatum(entry));
+				continue;
+			}
+		}
 
-	lossyEntry = 0;
-	haveLossyEntry = false;
-	for (i = 0; i < key->nentries; i++)
-	{
-		entry = key->scanEntry[i];
-		if (entry->isFinished == FALSE &&
-			ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
+		/*
+		 * Ok, we now know that there are no matches < minItem. Proceed to
+		 * check if it's a match.
+		 */
+		key->curItem = minItem;
+		advancePast = minItem;
+		advancePast.ip_posid--;
+		ItemPointerSetLossyPage(&curPageLossy,
+								GinItemPointerGetBlockNumber(&minItem));
+
+		/*
+		 * We might not have loaded all the entry streams for this TID. We
+		 * could call the consistent function, passing MAYBE for those entries,
+		 * to see if it can decide if this TID matches based on the information
+		 * we have. But if the consistent-function is expensive, and cannot
+		 * in fact decide with partial information, that could be a big loss.
+		 * So, loop back to load the missing entries, before calling the
+		 * consistent function.
+		 */
+		while (key->nunloaded > 0)
+		{
+			entry = key->unloadedEntries[--key->nunloaded];
+			entryGetItem(ginstate, entry, advancePast, false);
+			binaryheap_add(key->entryHeap, PointerGetDatum(entry));
+		}
+
+
+		/*
+		 * Lossy-page entries pose a problem, since we don't know the correct
+		 * entryRes state to pass to the consistentFn, and we also don't know
+		 * what its combining logic will be (could be AND, OR, or even NOT).
+		 * If the logic is OR then the consistentFn might succeed for all items
+		 * in the lossy page even when none of the other entries match.
+		 *
+		 * Our strategy is to call the tri-state consistent function, with the
+		 * lossy-page entries set to MAYBE, and all the other entries FALSE.
+		 * If it returns FALSE, none of the lossy items alone are enough for a
+		 * match, so we don't need to return a lossy-page pointer. Otherwise,
+		 * return a lossy-page pointer to indicate that the whole heap page must
+		 * be checked.  (On subsequent calls, we'll do nothing until minItem is
+		 * past the page altogether, thus ensuring that we never return both
+		 * regular and lossy pointers for the same page.)
+		 *
+		 * An exception is that we don't need to try it both ways (ie. pass
+		 * MAYBE) if the lossy pointer is in a "hidden" entry, because the
+		 * consistentFn's result can't depend on that (but mark the result as
+		 * 'recheck').
+		 *
+		 * Note that only lossy-page entries pointing to the current item's
+		 * page should trigger this processing; we might have future lossy
+		 * pages in the entry array, but they aren't relevant yet.
+		 */
+		haveLossyEntry = false;
+		for (i = 0; i < key->nentries; i++)
+		{
+			entry = key->scanEntry[i];
+			if (entry->isFinished == FALSE &&
+				ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
+			{
+				key->entryRes[i] = GIN_MAYBE;
+				haveLossyEntry = true;
+			}
+			else
+				key->entryRes[i] = GIN_FALSE;
+		}
+
+		if (haveLossyEntry)
 		{
-			if (haveLossyEntry)
+			MemoryContextSwitchTo(tempCtx);
+			res = key->triConsistentFn(key);
+			MemoryContextSwitchTo(oldCtx);
+
+			if (res == GIN_TRUE || res == GIN_MAYBE)
 			{
-				/* Multiple lossy entries, punt */
+				/* Some of the lossy items on the heap page might match, punt */
 				key->curItem = curPageLossy;
 				key->curItemMatches = true;
 				key->recheckCurItem = true;
 				return;
 			}
-			lossyEntry = i;
-			haveLossyEntry = true;
 		}
-	}
 
-	/* prepare for calling consistentFn in temp context */
-	oldCtx = MemoryContextSwitchTo(tempCtx);
+		/*
+		 * Let's call the consistent function to check if this is a match.
+		 *
+		 * At this point we know that we don't need to return a lossy
+		 * whole-page pointer, but we might have matches for individual exact
+		 * item pointers, possibly in combination with a lossy pointer. Pass
+		 * lossy pointers as MAYBE to the ternary consistent function, to
+		 * let it decide if this tuple satisfies the overall key, even though
+		 * we don't know whether the lossy entries match.
+		 *
+		 * We might also not have advanced all the entry streams up to this
+		 * point yet. It's possible that the consistent function can
+		 * nevertheless decide that this is definitely a match or not a match,
+		 * even though we don't know if those unknown entries match, so we
+		 * pass them as MAYBE.
+		 */
+		for (i = 0; i < key->nentries; i++)
+		{
+			entry = key->scanEntry[i];
+			if (entry->isFinished)
+				key->entryRes[i] = GIN_FALSE;
+			else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+				key->entryRes[i] = GIN_MAYBE; /* not loaded yet */
+			else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
+				key->entryRes[i] = GIN_MAYBE;
+			else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
+				key->entryRes[i] = GIN_TRUE;
+			else
+				key->entryRes[i] = GIN_FALSE;
+		}
 
-	if (haveLossyEntry)
-	{
-		/* Single lossy-page entry, so see if whole page matches */
-		memset(key->entryRes, FALSE, key->nentries);
-		key->entryRes[lossyEntry] = TRUE;
+		MemoryContextSwitchTo(tempCtx);
+		res = key->triConsistentFn(key);
+		MemoryContextSwitchTo(oldCtx);
 
-		if (callConsistentFn(ginstate, key))
+		switch (res)
 		{
-			/* Yes, so clean up ... */
-			MemoryContextSwitchTo(oldCtx);
-			MemoryContextReset(tempCtx);
-
-			/* and return lossy pointer for whole page */
-			key->curItem = curPageLossy;
-			key->curItemMatches = true;
-			key->recheckCurItem = true;
-			return;
-		}
-	}
+			case GIN_TRUE:
+				key->curItemMatches = true;
+				/* triConsistentFn set recheckCurItem */
+				break;
 
-	/*
-	 * At this point we know that we don't need to return a lossy whole-page
-	 * pointer, but we might have matches for individual exact item pointers,
-	 * possibly in combination with a lossy pointer.  Our strategy if there's
-	 * a lossy pointer is to try the consistentFn both ways and return a hit
-	 * if it accepts either one (forcing the hit to be marked lossy so it will
-	 * be rechecked).  An exception is that we don't need to try it both ways
-	 * if the lossy pointer is in a "hidden" entry, because the consistentFn's
-	 * result can't depend on that.
-	 *
-	 * Prepare entryRes array to be passed to consistentFn.
-	 */
-	for (i = 0; i < key->nentries; i++)
-	{
-		entry = key->scanEntry[i];
-		if (entry->isFinished == FALSE &&
-			ginCompareItemPointers(&entry->curItem, &key->curItem) == 0)
-			key->entryRes[i] = TRUE;
-		else
-			key->entryRes[i] = FALSE;
-	}
-	if (haveLossyEntry)
-		key->entryRes[lossyEntry] = TRUE;
+			case GIN_FALSE:
+				key->curItemMatches = false;
+				break;
 
-	res = callConsistentFn(ginstate, key);
+			case GIN_MAYBE:
+				key->curItemMatches = true;
+				key->recheckCurItem = true;
+				break;
 
-	if (!res && haveLossyEntry && lossyEntry < key->nuserentries)
-	{
-		/* try the other way for the lossy item */
-		key->entryRes[lossyEntry] = FALSE;
+			default:
+				/*
+				 * the 'default' case shouldn't happen, but if the consistent
+				 * function returns something bogus, this is the safe result
+				 */
+				key->curItemMatches = true;
+				key->recheckCurItem = true;
+				break;
+		}
 
-		res = callConsistentFn(ginstate, key);
+		/*
+		 * We have a tuple, and we know if it mathes or not. If it's a
+		 * non-match, we could continue to find the next matching tuple, but
+		 * let's break out and give scanGetItem a chance to advance the other
+		 * keys. They might be able to skip past to a much higher TID, allowing
+		 * us to save work.
+		 */
+		break;
 	}
 
-	key->curItemMatches = res;
-	/* If we matched a lossy entry, force recheckCurItem = true */
-	if (haveLossyEntry)
-		key->recheckCurItem = true;
-
 	/* clean up after consistentFn calls */
 	MemoryContextSwitchTo(oldCtx);
 	MemoryContextReset(tempCtx);
@@ -1080,7 +1183,7 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
 
 			/*
 			 * If this is the first key, remember this location as a
-			 * potential match.
+			 * potential match, and proceed to check the rest of the keys.
 			 *
 			 * Otherwise, check if this is the same item that we checked the
 			 * previous keys for (or a lossy pointer for the same page). If
@@ -1091,21 +1194,20 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
 			if (i == 0)
 			{
 				*item = key->curItem;
+				continue;
+			}
+
+			if (ItemPointerIsLossyPage(&key->curItem) ||
+				ItemPointerIsLossyPage(item))
+			{
+				Assert (GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
+				match = (GinItemPointerGetBlockNumber(&key->curItem) ==
+						 GinItemPointerGetBlockNumber(item));
 			}
 			else
 			{
-				if (ItemPointerIsLossyPage(&key->curItem) ||
-					ItemPointerIsLossyPage(item))
-				{
-					Assert (GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
-					match = (GinItemPointerGetBlockNumber(&key->curItem) ==
-							 GinItemPointerGetBlockNumber(item));
-				}
-				else
-				{
-					Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
-					match = (ginCompareItemPointers(&key->curItem, item) == 0);
-				}
+				Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
+				match = (ginCompareItemPointers(&key->curItem, item) == 0);
 			}
 		}
 	} while (!match);
@@ -1322,7 +1424,7 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
 	{
 		GinScanKey	key = so->keys + i;
 
-		memset(key->entryRes, FALSE, key->nentries);
+		memset(key->entryRes, GIN_FALSE, key->nentries);
 	}
 	memset(pos->hasMatchKey, FALSE, so->nkeys);
 
@@ -1579,7 +1681,7 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
 		{
 			GinScanKey	key = so->keys + i;
 
-			if (!callConsistentFn(&so->ginstate, key))
+			if (!key->boolConsistentFn(key))
 			{
 				match = false;
 				break;
diff --git a/src/backend/access/gin/ginlogic.c b/src/backend/access/gin/ginlogic.c
new file mode 100644
index 0000000..e499c6e
--- /dev/null
+++ b/src/backend/access/gin/ginlogic.c
@@ -0,0 +1,136 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginlogic.c
+ *	  routines for performing binary- and ternary-logic consistent checks.
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginlogic.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/reloptions.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "miscadmin.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+
+/*
+ * A dummy consistent function for an EVERYTHING key. Just claim it matches.
+ */
+static bool
+trueConsistentFn(GinScanKey key)
+{
+	key->recheckCurItem = false;
+	return true;
+}
+static GinLogicValue
+trueTriConsistentFn(GinScanKey key)
+{
+	return GIN_MAYBE;
+}
+
+/*
+ * A function for calling a regular, binary logic, consistent function.
+ */
+static bool
+normalBoolConsistentFn(GinScanKey key)
+{
+	/*
+	 * Initialize recheckCurItem in case the consistentFn doesn't know it
+	 * should set it.  The safe assumption in that case is to force recheck.
+	 */
+	key->recheckCurItem = true;
+
+	return DatumGetBool(FunctionCall8Coll(key->consistentFmgrInfo,
+										  key->collation,
+										  PointerGetDatum(key->entryRes),
+										  UInt16GetDatum(key->strategy),
+										  key->query,
+										  UInt32GetDatum(key->nuserentries),
+										  PointerGetDatum(key->extra_data),
+									   PointerGetDatum(&key->recheckCurItem),
+										  PointerGetDatum(key->queryValues),
+									 PointerGetDatum(key->queryCategories)));
+}
+
+/*
+ * This function implements a tri-state consistency check, using a boolean
+ * consistent function provided by the opclass.
+ *
+ * If there is only one MAYBE input, our strategy is to try the consistentFn
+ * both ways. If it returns TRUE for both, the tuple matches regardless of
+ * the MAYBE input, so we return TRUE. Likewise, if it returns FALSE for both,
+ * we return FALSE. Otherwise return MAYBE.
+ */
+static GinLogicValue
+shimTriConsistentFn(GinScanKey key)
+{
+	bool		foundMaybe = false;
+	int			maybeEntry = -1;
+	int			i;
+	bool		boolResult1;
+	bool		boolResult2;
+	bool		recheck1;
+	bool		recheck2;
+
+	for (i = 0; i < key->nentries; i++)
+	{
+		if (key->entryRes[i] == GIN_MAYBE)
+		{
+			if (foundMaybe)
+				return GIN_MAYBE;		/* more than one MAYBE input */
+			maybeEntry = i;
+			foundMaybe = true;
+		}
+	}
+
+	/*
+	 * If none of the inputs were MAYBE, so we can just call consistent
+	 * function as is.
+	 */
+	if (!foundMaybe)
+		return normalBoolConsistentFn(key);
+
+	/* Try the consistent function with the maybe-input set both ways */
+	key->entryRes[maybeEntry] = GIN_TRUE;
+	boolResult1 = normalBoolConsistentFn(key);
+	recheck1 = key->recheckCurItem;
+
+	key->entryRes[maybeEntry] = GIN_FALSE;
+	boolResult2 = normalBoolConsistentFn(key);
+	recheck2 = key->recheckCurItem;
+
+	if (!boolResult1 && !boolResult2)
+		return GIN_FALSE;
+
+	key->recheckCurItem = recheck1 || recheck2;
+	if (boolResult1 && boolResult2)
+		return GIN_TRUE;
+	else
+		return GIN_MAYBE;
+}
+
+void
+GinInitConsistentMethod(GinState *ginstate, GinScanKey key)
+{
+	if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
+	{
+		key->boolConsistentFn = trueConsistentFn;
+		key->triConsistentFn = trueTriConsistentFn;
+	}
+	else
+	{
+		key->consistentFmgrInfo = &ginstate->consistentFn[key->attnum - 1];
+		key->collation = ginstate->supportCollation[key->attnum - 1];
+		key->boolConsistentFn = normalBoolConsistentFn;
+		key->triConsistentFn = shimTriConsistentFn;
+	}
+}
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index bb0ab31..2b733ab 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -13,10 +13,13 @@
 #include "access/genam.h"
 #include "access/gin.h"
 #include "access/itup.h"
+#include "lib/binaryheap.h"
 #include "fmgr.h"
 #include "storage/bufmgr.h"
 #include "utils/rbtree.h"
 
+typedef struct GinScanKeyData *GinScanKey;
+typedef struct GinScanEntryData *GinScanEntry;
 
 /*
  * Page opaque data in an inverted index page.
@@ -588,6 +591,19 @@ extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
 extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
 				 GinNullCategory *category);
 
+/* ginlogic.c */
+
+enum
+{
+	GIN_FALSE = 0,
+	GIN_TRUE = 1,
+	GIN_MAYBE = 2
+} GinLogicValueEnum;
+
+typedef char GinLogicValue;
+
+extern void GinInitConsistentMethod(GinState *ginstate, GinScanKey key);
+
 /* gininsert.c */
 extern Datum ginbuild(PG_FUNCTION_ARGS);
 extern Datum ginbuildempty(PG_FUNCTION_ARGS);
@@ -732,10 +748,6 @@ extern void ginVacuumPostingTreeLeaf(Relation rel, Buffer buf, GinVacuumState *g
  * nuserentries is the number that extractQueryFn returned (which is what
  * we report to consistentFn).	The "user" entries must come first.
  */
-typedef struct GinScanKeyData *GinScanKey;
-
-typedef struct GinScanEntryData *GinScanEntry;
-
 typedef struct GinScanKeyData
 {
 	/* Real number of entries in scanEntry[] (always > 0) */
@@ -746,8 +758,19 @@ typedef struct GinScanKeyData
 	/* array of GinScanEntry pointers, one per extracted search condition */
 	GinScanEntry *scanEntry;
 
+	/* A heap containing unfinished entries, with curItem > advancePast */
+	binaryheap *entryHeap;
+
+	/* An array containing unfinished entries with curItem <= advancePast */
+	GinScanEntry *unloadedEntries;
+	int			nunloaded;
+
 	/* array of check flags, reported to consistentFn */
 	bool	   *entryRes;
+	bool		(*boolConsistentFn) (GinScanKey key);
+	bool		(*triConsistentFn) (GinScanKey key);
+	FmgrInfo   *consistentFmgrInfo;
+	Oid			collation;
 
 	/* other data needed for calling consistentFn */
 	Datum		query;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to