From 7e86fe5aaff4ef18abc79bdc3bd0d0f1a23f2fc8 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Wed, 18 Jun 2025 19:32:57 -0400
Subject: [PATCH v2] Simplify and improve row compare NULL handling.

Improve _bt_first's handling of NULL row comparison members in such a
way as to make it consistent with _bt_check_rowcompare's approach.  In
other words, make sure that code that starts primitive index scans that
involve row compares fully agrees with code that ends primitive index
scans -- even in the presence of a NULL row compare member.  This makes
row comparisons more similar to scalar inequality keys, obviating the
need for _bt_set_startikey to consider row comparisons directly.

_bt_first will now avoid uselessly scanning earlier index tuples, that
cannot possibly contain matching tuples due to the use of a NULL row
compare member.  The goal isn't to improve performance; the goal is to
make _bt_first agree with _bt_check_rowcompare about where primitive
index scans should start and end.

Also go back to marking row compare members after the first as required
when safe.  This is more or less a straight revert of 2016 bugfix commit
a298a1e0.  We can address the complaint that led to that commit by being
more careful about the conditions where NULLs can end the scan in the
row compare path (for row members beyond the first one, which can once
again be marked required when safe).

Follow-up to bugfix commit 5f4d98d4, which taught _bt_set_startikey to
avoid the use of forcenonrequired mode iff it reached a row compare key.
Now _bt_set_startikey never needs to avoid the use of nonrequired mode,
which is simpler, more consistent, and more robust.
---
 src/backend/access/nbtree/nbtpreprocesskeys.c |  18 ++-
 src/backend/access/nbtree/nbtsearch.c         |  43 +++++-
 src/backend/access/nbtree/nbtutils.c          | 122 +++++++++++-------
 3 files changed, 126 insertions(+), 57 deletions(-)

diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c
index a136e4bbf..44edb2e48 100644
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@@ -786,12 +786,26 @@ _bt_mark_scankey_required(ScanKey skey)
 	if (skey->sk_flags & SK_ROW_HEADER)
 	{
 		ScanKey		subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
+		AttrNumber	attno = skey->sk_attno;
 
 		/* First subkey should be same column/operator as the header */
 		Assert(subkey->sk_flags & SK_ROW_MEMBER);
-		Assert(subkey->sk_attno == skey->sk_attno);
+		Assert(subkey->sk_attno == attno);
 		Assert(subkey->sk_strategy == skey->sk_strategy);
-		subkey->sk_flags |= addflags;
+
+		for (;;)
+		{
+			Assert(subkey->sk_flags & SK_ROW_MEMBER);
+			if (subkey->sk_attno != attno)
+				break;			/* non-adjacent key, so not required */
+			if (subkey->sk_strategy != skey->sk_strategy)
+				break;			/* wrong direction, so not required */
+			subkey->sk_flags |= addflags;
+			if (subkey->sk_flags & SK_ROW_END)
+				break;
+			subkey++;
+			attno++;
+		}
 	}
 }
 
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 36544ecfd..ff453ec4e 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -1288,14 +1288,23 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 			 * even if the row comparison is of ">" or "<" type, because the
 			 * condition applied to all but the last row member is effectively
 			 * ">=" or "<=", and so the extra keys don't break the positioning
-			 * scheme.  But, by the same token, if we aren't able to use all
-			 * the row members, then the part of the row comparison that we
+			 * scheme.  But, by the same token, if a "column gap" appears
+			 * between members, then the part of the row comparison that we
 			 * did use has to be treated as just a ">=" or "<=" condition, and
 			 * so we'd better adjust strat_total accordingly.
+			 *
+			 * We're able to use a _more_ restrictive strategy when we
+			 * encounter a NULL row compare member (which is unsatisfiable).
+			 * For example, a qual "(a, b, c) >= (1, NULL, 77)" will use an
+			 * insertion scan key "a > 1".  All rows where "a = 1" have to
+			 * perform a NULL row member comparison (or would, if we didn't
+			 * use the more restrictive ">" strategy), which is guranteed to
+			 * return false/return NULL.
 			 */
 			if (i == keysz - 1)
 			{
-				bool		used_all_subkeys = false;
+				bool		keep_strat_total = false;
+				bool		tighten_strat_total = false;
 
 				Assert(!(subkey->sk_flags & SK_ROW_END));
 				for (;;)
@@ -1307,19 +1316,26 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 					if (subkey->sk_strategy != cur->sk_strategy)
 						break;	/* wrong direction, can't use it */
 					if (subkey->sk_flags & SK_ISNULL)
-						break;	/* can't use null keys */
+					{
+						/* Unsatisfiable NULL row member */
+						keep_strat_total = true;
+						tighten_strat_total = true;
+						break;
+					}
 					Assert(keysz < INDEX_MAX_KEYS);
 					memcpy(inskey.scankeys + keysz, subkey,
 						   sizeof(ScanKeyData));
 					keysz++;
 					if (subkey->sk_flags & SK_ROW_END)
 					{
-						used_all_subkeys = true;
+						keep_strat_total = true;
 						break;
 					}
 				}
-				if (!used_all_subkeys)
+				if (!keep_strat_total)
 				{
+					/* Use less restrictive strategy (and fewer keys) */
+					Assert(!tighten_strat_total);
 					switch (strat_total)
 					{
 						case BTLessStrategyNumber:
@@ -1330,7 +1346,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 							break;
 					}
 				}
-				break;			/* done with outer loop */
+				if (tighten_strat_total)
+				{
+					/* Use more restrictive strategy (and fewer keys) */
+					switch (strat_total)
+					{
+						case BTLessEqualStrategyNumber:
+							strat_total = BTLessStrategyNumber;
+							break;
+						case BTGreaterEqualStrategyNumber:
+							strat_total = BTGreaterStrategyNumber;
+							break;
+					}
+				}
+				break;			/* done with outermost loop */
 			}
 		}
 		else
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index c71d1b6f2..ea6fab02f 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -2565,23 +2565,7 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
 			 * whether or not every tuple on the page satisfies a RowCompare
 			 * key based only on firsttup and lasttup -- so we just give up.
 			 */
-			if (!start_past_saop_eq && !so->skipScan)
-				break;			/* unsafe to go further */
-
-			/*
-			 * We have to be even more careful with RowCompares that come
-			 * after an array: we assume it's unsafe to even bypass the array.
-			 * Calling _bt_start_array_keys to recover the scan's arrays
-			 * following use of forcenonrequired mode isn't compatible with
-			 * _bt_check_rowcompare's continuescan=false behavior with NULL
-			 * row compare members.  _bt_advance_array_keys must not make a
-			 * decision on the basis of a key not being satisfied in the
-			 * opposite-to-scan direction until the scan reaches a leaf page
-			 * where the same key begins to be satisfied in scan direction.
-			 * The _bt_first !used_all_subkeys behavior makes this limitation
-			 * hard to work around some other way.
-			 */
-			return;				/* completely unsafe to set pstate.startikey */
+			break;				/* unsafe */
 		}
 		if (key->sk_strategy != BTEqualStrategyNumber)
 		{
@@ -3078,6 +3062,34 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 
 		Assert(subkey->sk_flags & SK_ROW_MEMBER);
 
+		/*
+		 * Unlike the simple-scankey case, NULL row members aren't disallowed
+		 * (except when it's the first row element that has the NULL arg,
+		 * where preprocessing recognizes the scan's qual as unsatisfiable).
+		 * But it can never match any rows.
+		 *
+		 * If this row comparison member is marked required in the current
+		 * scan direction, we can stop the scan; there can't be another tuple
+		 * that will succeed.
+		 */
+		if (subkey->sk_flags & SK_ISNULL)
+		{
+			/* can't be the first row member (preprocessing catches this) */
+			Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
+
+			if (forcenonrequired)
+			{
+				/* treating scan's keys as non-required */
+			}
+			else if ((subkey->sk_flags & SK_BT_REQFWD) &&
+					 ScanDirectionIsForward(dir))
+				*continuescan = false;
+			else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
+					 ScanDirectionIsBackward(dir))
+				*continuescan = false;
+			return false;
+		}
+
 		if (subkey->sk_attno > tupnatts)
 		{
 			/*
@@ -3087,11 +3099,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			 * attribute passes the qual.
 			 */
 			Assert(BTreeTupleIsPivot(tuple));
-			cmpresult = 0;
-			if (subkey->sk_flags & SK_ROW_END)
-				break;
-			subkey++;
-			continue;
+			return true;
 		}
 
 		datum = index_getattr(tuple,
@@ -3107,6 +3115,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			}
 			else if (subkey->sk_flags & SK_BT_NULLS_FIRST)
 			{
+				int			reqflags = SK_BT_REQBKWD;
+
 				/*
 				 * Since NULLs are sorted before non-NULLs, we know we have
 				 * reached the lower limit of the range of values for this
@@ -3118,13 +3128,34 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 				 * have initially positioned to the start of the index.
 				 * (_bt_advance_array_keys also relies on this behavior during
 				 * forward scans.)
+				 *
+				 * For example, quals like "WHERE (a, b, c) < (2, 42, 333)"
+				 * can terminate a backwards scan upon reaching the rightmost
+				 * tuple whose "a" column has a NULL, since the entire index
+				 * cannot contain further matches to the left of that tuple.
+				 * NULL is "<" 2, but it nevertheless indicates the end of all
+				 * matching tuples to our < row compare qual.
+				 *
+				 * Note, however, that it is _not_ safe to end the scan on a
+				 * row member that's marked required in the opposite-to-scan
+				 * direction for row compare members beyond the first one.
+				 * We'll only terminate the scan on a second or subsequent row
+				 * member when they're marked required in the scan direction.
+				 * For example, quals like "WHERE (a, b, c) > (2, 42, 333)"
+				 * can terminate a backwards scan upon reaching the index's
+				 * rightmost "a = 2" tuple whose "b" column contains a NULL.
+				 * NULL is treated like just another value that's "<" 42.
 				 */
-				if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
+				if (subkey->sk_attno == skey->sk_attno)
+					reqflags |= SK_BT_REQFWD;	/* for first member only */
+				if ((subkey->sk_flags & reqflags) &&
 					ScanDirectionIsBackward(dir))
 					*continuescan = false;
 			}
 			else
 			{
+				int			reqflags = SK_BT_REQFWD;
+
 				/*
 				 * Since NULLs are sorted after non-NULLs, we know we have
 				 * reached the upper limit of the range of values for this
@@ -3136,8 +3167,27 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 				 * may have initially positioned to the end of the index.
 				 * (_bt_advance_array_keys also relies on this behavior during
 				 * backward scans.)
+				 *
+				 * For example, quals like "WHERE (a, b, c) > (2, 42, 333)"
+				 * can terminate a forwards scan upon reaching the leftmost
+				 * tuple whose "a" column has a NULL, since the entire index
+				 * cannot contain further matches to the right of that tuple.
+				 * NULL is ">" 2, but it nevertheless indicates the end of all
+				 * matching tuples to our > row compare qual.
+				 *
+				 * Note, however, that it is _not_ safe to end the scan on a
+				 * row member that's marked required in the opposite-to-scan
+				 * direction for row compare members beyond the first one.
+				 * We'll only terminate the scan on a second or subsequent row
+				 * member when they're marked required in the scan direction.
+				 * For example, quals like "WHERE (a, b, c) < (2, 42, 333)"
+				 * can terminate a forwards scan upon reaching the index's
+				 * leftmost "a = 2" tuple whose "b" column contains a NULL.
+				 * NULL is treated like just another value that's ">" 42.
 				 */
-				if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
+				if (subkey->sk_attno == skey->sk_attno)
+					reqflags |= SK_BT_REQBKWD;	/* for first member only */
+				if ((subkey->sk_flags & reqflags) &&
 					ScanDirectionIsForward(dir))
 					*continuescan = false;
 			}
@@ -3148,30 +3198,6 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			return false;
 		}
 
-		if (subkey->sk_flags & SK_ISNULL)
-		{
-			/*
-			 * Unlike the simple-scankey case, this isn't a disallowed case
-			 * (except when it's the first row element that has the NULL arg).
-			 * But it can never match.  If all the earlier row comparison
-			 * columns are required for the scan direction, we can stop the
-			 * scan, because there can't be another tuple that will succeed.
-			 */
-			Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
-			subkey--;
-			if (forcenonrequired)
-			{
-				/* treating scan's keys as non-required */
-			}
-			else if ((subkey->sk_flags & SK_BT_REQFWD) &&
-					 ScanDirectionIsForward(dir))
-				*continuescan = false;
-			else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
-					 ScanDirectionIsBackward(dir))
-				*continuescan = false;
-			return false;
-		}
-
 		/* Perform the test --- three-way comparison not bool operator */
 		cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
 													subkey->sk_collation,
-- 
2.49.0

