From a56cb787bb302d8bc1a2f53bf1e82bc1cb37fb01 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Sun, 28 Jul 2024 16:36:34 +0300
Subject: [PATCH v29 2/2] Teach bitmap scan about transforming OR clauses to
 ANY expression

Now, (expr op C1) OR (expr op C2) ... are transformet to
expr op ANY(ARRAY[C1, C2, ...]) during matching a clause to index.
This commit teaches bitmap scan planning to take advantage of this
transformation.

The similar clauses are grouped together before considering bitmap-OR.
Groups of similar clauses are matched to indexes both together and one-by-one.
---
 src/backend/optimizer/path/indxpath.c      | 266 ++++++++++++++++++++-
 src/test/regress/expected/create_index.out |  28 +--
 src/test/regress/expected/join.out         |  56 +++--
 src/tools/pgindent/typedefs.list           |   1 +
 4 files changed, 304 insertions(+), 47 deletions(-)

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 4fd0bbce2ce..f697b95db39 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1177,6 +1177,248 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
 	return result;
 }
 
+typedef struct
+{
+	int			indexnum;
+	int			colnum;
+	Oid			opno;
+	int			argindex;
+} OrArgIndexMatch;
+
+static int
+or_arg_index_match_cmp(const void *a, const void *b)
+{
+	const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a;
+	const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b;
+
+	if (match_a->indexnum < match_b->indexnum)
+		return -1;
+	else if (match_a->indexnum > match_b->indexnum)
+		return 1;
+
+	if (match_a->colnum < match_b->colnum)
+		return -1;
+	else if (match_a->colnum > match_b->colnum)
+		return 1;
+
+	if (match_a->opno < match_b->opno)
+		return -1;
+	else if (match_a->opno > match_b->opno)
+		return 1;
+
+	if (match_a->argindex < match_b->argindex)
+		return -1;
+	else if (match_a->argindex > match_b->argindex)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Group clauses "col op const" belonging to the single index clause into
+ * dedicated ORs.
+ */
+static List *
+group_or_args(PlannerInfo *root, RelOptInfo *rel,
+			  RestrictInfo *rinfo, List *orargs)
+{
+	int			n = list_length(orargs);
+	int			i;
+	int			group_start;
+	OrArgIndexMatch *matches;
+	ListCell   *lc;
+	ListCell   *lc2;
+	List	   *result = NIL;
+
+	/*
+	 * Find corresponding index column for each clause in target form.
+	 */
+	i = -1;
+	matches = (OrArgIndexMatch *) palloc(sizeof(OrArgIndexMatch) * n);
+	foreach(lc, orargs)
+	{
+		Node	   *arg = lfirst(lc);
+		RestrictInfo *argrinfo;
+		OpExpr	   *clause;
+		Oid			opno;
+		Node	   *leftop,
+				   *rightop;
+		Node	   *nconst_expr;
+		int			indexnum;
+		int			colnum;
+
+		i++;
+		matches[i].argindex = i;
+		matches[i].indexnum = -1;
+		matches[i].colnum = -1;
+		matches[i].opno = InvalidOid;
+
+		if (!IsA(arg, RestrictInfo))
+			continue;
+
+		argrinfo = castNode(RestrictInfo, arg);
+
+		if (!IsA(argrinfo->clause, OpExpr))
+			continue;
+
+		clause = (OpExpr *) argrinfo->clause;
+		opno = clause->opno;
+
+		leftop = get_leftop(clause);
+		if (IsA(leftop, RelabelType))
+			leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+		rightop = get_rightop(clause);
+		if (IsA(rightop, RelabelType))
+			rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+		if (IsA(leftop, Const) || IsA(leftop, Param))
+		{
+			opno = get_commutator(opno);
+
+			if (!OidIsValid(opno))
+			{
+				/* commutator doesn't exist, we can't reverse the order */
+				continue;
+			}
+
+			nconst_expr = rightop;
+		}
+		else if (IsA(rightop, Const) || IsA(rightop, Param))
+		{
+			nconst_expr = leftop;
+		}
+		else
+		{
+			continue;
+		}
+
+		indexnum = 0;
+		foreach(lc2, rel->indexlist)
+		{
+			IndexOptInfo *index = (IndexOptInfo *) lfirst(lc2);
+
+			/* Ignore index if it doesn't support bitmap scans */
+			if (!index->amhasgetbitmap)
+				continue;
+
+			for (colnum = 0; colnum < index->nkeycolumns; colnum++)
+			{
+				if (match_index_to_operand(nconst_expr, colnum, index))
+				{
+					matches[i].indexnum = indexnum;
+					matches[i].colnum = colnum;
+					matches[i].opno = opno;
+				}
+			}
+			indexnum++;
+		}
+	}
+
+	/* Sort clauses to make similar clauses go together */
+	pg_qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp);
+
+	/* Group similar clauses */
+	group_start = 0;
+	for (i = 1; i <= n; i++)
+	{
+		if (group_start >= 0 &&
+			(i == n ||
+			 matches[i].indexnum != matches[group_start].indexnum ||
+			 matches[i].colnum != matches[group_start].colnum ||
+			 matches[i].opno != matches[group_start].opno ||
+			 matches[i].indexnum == -1))
+		{
+			if (i - group_start == 1)
+			{
+				result = lappend(result, list_nth(orargs, matches[group_start].argindex));
+			}
+			else
+			{
+				List	   *args = NIL;
+				RestrictInfo *subrinfo = makeNode(RestrictInfo);
+				int			j;
+
+				for (j = group_start; j < i; j++)
+					args = lappend(args, list_nth(orargs, matches[j].argindex));
+
+				*subrinfo = *rinfo;
+				subrinfo->clause = make_orclause(args);
+				subrinfo->orclause = subrinfo->clause;
+				result = lappend(result, subrinfo);
+			}
+
+			group_start = i;
+		}
+	}
+	return result;
+}
+
+/*
+ * Generate bitmap paths for group produced by group_or_args()
+ */
+static List *
+make_bitmap_paths_for_or_group(PlannerInfo *root, RelOptInfo *rel,
+							   RestrictInfo *ri, List *other_clauses)
+{
+	List	   *jointlist = NIL;
+	List	   *splitlist = NIL;
+	ListCell   *lc;
+	List	   *orargs;
+	List	   *args = ((BoolExpr *) ri->clause)->args;
+	Cost		jointcost = 0.0,
+				splitcost = 0.0;
+	Path	   *bitmapqual;
+	List	   *indlist;
+
+	/*
+	 * First, try to match the whole group to the one index.
+	 */
+	orargs = list_make1(ri);
+	indlist = build_paths_for_OR(root, rel,
+								 orargs,
+								 other_clauses);
+	if (indlist != NIL)
+	{
+		bitmapqual = choose_bitmap_and(root, rel, indlist);
+		jointcost = bitmapqual->total_cost;
+		jointlist = list_make1(bitmapqual);
+	}
+
+	/*
+	 * Also try to match all containing clauses one-by-one.
+	 */
+	foreach(lc, args)
+	{
+		orargs = list_make1(lfirst(lc));
+
+		indlist = build_paths_for_OR(root, rel,
+									 orargs,
+									 other_clauses);
+
+		if (indlist == NIL)
+		{
+			splitlist = NIL;
+			break;
+		}
+
+		bitmapqual = choose_bitmap_and(root, rel, indlist);
+		splitcost += bitmapqual->total_cost;
+		splitlist = lappend(splitlist, bitmapqual);
+	}
+
+	/*
+	 * Pick the best option.
+	 */
+	if (splitlist == NIL)
+		return jointlist;
+	else if (jointlist == NIL)
+		return splitlist;
+	else
+		return (jointcost < splitcost) ? jointlist : splitlist;
+}
+
+
 /*
  * generate_bitmap_or_paths
  *		Look through the list of clauses to find OR clauses, and generate
@@ -1207,6 +1449,7 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
 		List	   *pathlist;
 		Path	   *bitmapqual;
 		ListCell   *j;
+		List	   *grouped;
 
 		/* Ignore RestrictInfos that aren't ORs */
 		if (!restriction_is_or_clause(rinfo))
@@ -1217,7 +1460,8 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
 		 * the OR, else we can't use it.
 		 */
 		pathlist = NIL;
-		foreach(j, ((BoolExpr *) rinfo->orclause)->args)
+		grouped = group_or_args(root, rel, rinfo, ((BoolExpr *) rinfo->orclause)->args);
+		foreach(j, grouped)
 		{
 			Node	   *orarg = (Node *) lfirst(j);
 			List	   *indlist;
@@ -1237,12 +1481,28 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
 															   andargs,
 															   all_clauses));
 			}
+			else if (restriction_is_or_clause(castNode(RestrictInfo, orarg)))
+			{
+				RestrictInfo *ri = castNode(RestrictInfo, orarg);
+
+				indlist = make_bitmap_paths_for_or_group(root, rel, ri, list_delete(all_clauses, rinfo));
+
+				if (indlist == NIL)
+				{
+					pathlist = NIL;
+					break;
+				}
+				else
+				{
+					pathlist = list_concat(pathlist, indlist);
+					continue;
+				}
+			}
 			else
 			{
 				RestrictInfo *ri = castNode(RestrictInfo, orarg);
 				List	   *orargs;
 
-				Assert(!restriction_is_or_clause(ri));
 				orargs = list_make1(ri);
 
 				indlist = build_paths_for_OR(root, rel,
@@ -2878,7 +3138,7 @@ match_orclause_to_indexcol(PlannerInfo *root,
 			if (!OidIsValid(opno))
 			{
 				/* commutator doesn't exist, we can't reverse the order */
-				return false;
+				return NULL;
 			}
 
 			nconst_expr = rightop;
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index c2b25936c8c..c6feef03810 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1982,25 +1982,24 @@ SELECT count(*) FROM tenk1
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41;
-                                                       QUERY PLAN                                                       
-------------------------------------------------------------------------------------------------------------------------
+                                                         QUERY PLAN                                                          
+-----------------------------------------------------------------------------------------------------------------------------
  Aggregate
    ->  Bitmap Heap Scan on tenk1
-         Recheck Cond: (((hundred = 42) AND ((thousand = 42) OR (thousand = 99) OR (tenthous < 2))) OR (thousand = 41))
+         Recheck Cond: (((hundred = 42) AND ((thousand = ANY ('{42,99}'::integer[])) OR (tenthous < 2))) OR (thousand = 41))
+         Filter: (((hundred = 42) AND ((thousand = 42) OR (thousand = 99) OR (tenthous < 2))) OR (thousand = 41))
          ->  BitmapOr
                ->  BitmapAnd
                      ->  Bitmap Index Scan on tenk1_hundred
                            Index Cond: (hundred = 42)
                      ->  BitmapOr
                            ->  Bitmap Index Scan on tenk1_thous_tenthous
-                                 Index Cond: (thousand = 42)
-                           ->  Bitmap Index Scan on tenk1_thous_tenthous
-                                 Index Cond: (thousand = 99)
+                                 Index Cond: (thousand = ANY ('{42,99}'::integer[]))
                            ->  Bitmap Index Scan on tenk1_thous_tenthous
                                  Index Cond: (tenthous < 2)
                ->  Bitmap Index Scan on tenk1_thous_tenthous
                      Index Cond: (thousand = 41)
-(16 rows)
+(15 rows)
 
 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41;
@@ -2012,22 +2011,21 @@ SELECT count(*) FROM tenk1
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2);
-                                                       QUERY PLAN                                                        
--------------------------------------------------------------------------------------------------------------------------
+                                                          QUERY PLAN                                                          
+------------------------------------------------------------------------------------------------------------------------------
  Aggregate
    ->  Bitmap Heap Scan on tenk1
-         Recheck Cond: ((hundred = 42) AND ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2))))
+         Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR (thousand = ANY ('{42,41}'::integer[]))))
+         Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))
          ->  BitmapAnd
                ->  Bitmap Index Scan on tenk1_hundred
                      Index Cond: (hundred = 42)
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_thous_tenthous
-                           Index Cond: (thousand = 42)
-                     ->  Bitmap Index Scan on tenk1_thous_tenthous
-                           Index Cond: (thousand = 41)
                      ->  Bitmap Index Scan on tenk1_thous_tenthous
                            Index Cond: ((thousand = 99) AND (tenthous = 2))
-(13 rows)
+                     ->  Bitmap Index Scan on tenk1_thous_tenthous
+                           Index Cond: (thousand = ANY ('{42,41}'::integer[]))
+(12 rows)
 
 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index abe98ff3c53..d26a93831d5 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4226,20 +4226,20 @@ select * from tenk1 a join tenk1 b on
  Nested Loop
    Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4)))
    ->  Bitmap Heap Scan on tenk1 b
-         Recheck Cond: ((unique1 = 2) OR (hundred = 4))
+         Recheck Cond: ((hundred = 4) OR (unique1 = 2))
          ->  BitmapOr
-               ->  Bitmap Index Scan on tenk1_unique1
-                     Index Cond: (unique1 = 2)
                ->  Bitmap Index Scan on tenk1_hundred
                      Index Cond: (hundred = 4)
+               ->  Bitmap Index Scan on tenk1_unique1
+                     Index Cond: (unique1 = 2)
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
+               Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 1)
                      ->  Bitmap Index Scan on tenk1_unique2
                            Index Cond: (unique2 = 3)
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 = 1)
 (17 rows)
 
 explain (costs off)
@@ -4253,12 +4253,12 @@ select * from tenk1 a join tenk1 b on
          Filter: ((unique1 = 2) OR (ten = 4))
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
+               Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 1)
                      ->  Bitmap Index Scan on tenk1_unique2
                            Index Cond: (unique2 = 3)
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 = 1)
 (12 rows)
 
 explain (costs off)
@@ -4270,21 +4270,21 @@ select * from tenk1 a join tenk1 b on
  Nested Loop
    Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
    ->  Bitmap Heap Scan on tenk1 b
-         Recheck Cond: ((unique1 = 2) OR (hundred = 4))
+         Recheck Cond: ((hundred = 4) OR (unique1 = 2))
          ->  BitmapOr
-               ->  Bitmap Index Scan on tenk1_unique1
-                     Index Cond: (unique1 = 2)
                ->  Bitmap Index Scan on tenk1_hundred
                      Index Cond: (hundred = 4)
+               ->  Bitmap Index Scan on tenk1_unique1
+                     Index Cond: (unique1 = 2)
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique1 = 1) OR (unique2 = ANY ('{3,7}'::integer[])))
+               Recheck Cond: ((unique2 = ANY ('{3,7}'::integer[])) OR (unique1 = 1))
                Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 1)
                      ->  Bitmap Index Scan on tenk1_unique2
                            Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 = 1)
 (18 rows)
 
 explain (costs off)
@@ -4296,21 +4296,21 @@ select * from tenk1 a join tenk1 b on
  Nested Loop
    Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
    ->  Bitmap Heap Scan on tenk1 b
-         Recheck Cond: ((unique1 = 2) OR (hundred = 4))
+         Recheck Cond: ((hundred = 4) OR (unique1 = 2))
          ->  BitmapOr
-               ->  Bitmap Index Scan on tenk1_unique1
-                     Index Cond: (unique1 = 2)
                ->  Bitmap Index Scan on tenk1_hundred
                      Index Cond: (hundred = 4)
+               ->  Bitmap Index Scan on tenk1_unique1
+                     Index Cond: (unique1 = 2)
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique1 = 1) OR (unique2 = ANY ('{3,7}'::integer[])))
+               Recheck Cond: ((unique2 = ANY ('{3,7}'::integer[])) OR (unique1 = 1))
                Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 1)
                      ->  Bitmap Index Scan on tenk1_unique2
                            Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 = 1)
 (18 rows)
 
 explain (costs off)
@@ -4324,18 +4324,16 @@ select * from tenk1 a join tenk1 b on
    ->  Seq Scan on tenk1 b
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = ANY ('{3,7}'::integer[])))
+               Recheck Cond: ((unique2 = ANY ('{3,7}'::integer[])) OR (unique1 = ANY ('{3,1}'::integer[])) OR (unique1 < 20))
                Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 < 20)
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 3)
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = 1)
                      ->  Bitmap Index Scan on tenk1_unique2
                            Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-(16 rows)
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 < 20)
+(14 rows)
 
 --
 -- test placement of movable quals in a parameterized join tree
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 992c80f9350..ba66983cf45 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1764,6 +1764,7 @@ OprCacheKey
 OprInfo
 OprProofCacheEntry
 OprProofCacheKey
+OrArgIndexMatch
 OuterJoinClauseInfo
 OutputPluginCallbacks
 OutputPluginOptions
-- 
2.39.3 (Apple Git-145)

