From 1829564b42450cac8d030d9942ef0a5f26ff86fe Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Fri, 28 Mar 2025 00:15:29 +0200
Subject: [PATCH v1] Make group_similar_or_args() reorder clause list as little
 as possible

Currently, group_similar_or_args() permutes original positions of clauses
independently on whether it manages to find any groups of similar clauses.
While we are not providing any strict warranties on saving the original order
of OR-clauses, it is preferred that the original order be modified as little
as possible.

This commit changes the reordering algorithm of group_similar_or_args() in
the following way.  We reorder each group of similar clauses so that the
first item of the group stays in place, but all the other items are moved
after it.  So, if there are no similar clauses, the order of clauses stays
the same.  When there are some groups, only required reordering happens while
the rest of the clauses remain in their places.

Reported-by: Andrei Lepikhov <lepihov@gmail.com>
Discussion: https://postgr.es/m/3ac7c436-81e1-4191-9caf-b0dd70b51511%40gmail.com
---
 src/backend/optimizer/path/indxpath.c        | 60 +++++++++++++++++++-
 src/test/regress/expected/create_index.out   | 18 +++---
 src/test/regress/expected/join.out           | 52 ++++++++---------
 src/test/regress/expected/partition_join.out | 12 ++--
 4 files changed, 100 insertions(+), 42 deletions(-)

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index a43ca16d683..2a18bf7c7c3 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1189,6 +1189,8 @@ typedef struct
 	Oid			inputcollid;	/* OID of the OpClause input collation */
 	int			argindex;		/* index of the clause in the list of
 								 * arguments */
+	int			groupindex;		/* value of argindex for the fist clause in
+								 * the group of similar clauses */
 } OrArgIndexMatch;
 
 /*
@@ -1229,6 +1231,29 @@ or_arg_index_match_cmp(const void *a, const void *b)
 	return 0;
 }
 
+/*
+ * Another comparison function for OrArgIndexMatch.  It sorts groups together
+ * using groupindex.  The group items are then sorted by argindex.
+ */
+static int
+or_arg_index_match_cmp_group(const void *a, const void *b)
+{
+	const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a;
+	const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b;
+
+	if (match_a->groupindex < match_b->groupindex)
+		return -1;
+	else if (match_a->groupindex > match_b->groupindex)
+		return 1;
+
+	if (match_a->argindex < match_b->argindex)
+		return -1;
+	else if (match_a->argindex > match_b->argindex)
+		return 1;
+
+	return 0;
+}
+
 /*
  * group_similar_or_args
  *		Transform incoming OR-restrictinfo into a list of sub-restrictinfos,
@@ -1282,6 +1307,7 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
 
 		i++;
 		matches[i].argindex = i;
+		matches[i].groupindex = i;
 		matches[i].indexnum = -1;
 		matches[i].colnum = -1;
 		matches[i].opno = InvalidOid;
@@ -1400,9 +1426,41 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
 		return orargs;
 	}
 
-	/* Sort clauses to make similar clauses go together */
+	/*
+	 * Sort clauses to make similar clauses go together.  But at the same
+	 * time, we would like to change the order of clauses as little as
+	 * possible.  To have this property, we reorder each group of similar
+	 * clauses so that the first item of the group stays in place, but all the
+	 * other items are moved after it.  So, if there are no similar clauses,
+	 * the order of clauses stays the same.  When there are some groups, only
+	 * required reordering happens while the rest of the clauses remain in
+	 * their places.  That is achieved by assigning a 'groupindex' to each
+	 * clause: the number of the first item in the group in the original
+	 * clause list.
+	 */
 	qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp);
 
+	/* Assign groupindex to the sorted clauses */
+	for (i = 1; i < n; i++)
+	{
+		/*
+		 * When two clauses are similar and should belong to the same group,
+		 * copy the 'groupindex' from the previous clause.  Given we are
+		 * considering clauses in direct order, all the clauses would have a
+		 * 'groupindex' equal to the 'groupindex' of the first clause in the
+		 * group.
+		 */
+		if ((matches[i].indexnum == matches[i - 1].indexnum ||
+			 matches[i].colnum == matches[i - 1].colnum ||
+			 matches[i].opno == matches[i - 1].opno ||
+			 matches[i].inputcollid == matches[i - 1].inputcollid) &&
+			matches[i].indexnum != -1)
+			matches[i].groupindex = matches[i - 1].groupindex;
+	}
+
+	/* Resort clauses first by groupindex then by argindex */
+	qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp_group);
+
 	/*
 	 * Group similar clauses into single sub-restrictinfo. Side effect: the
 	 * resulting list of restrictions will be sorted by indexnum and colnum.
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index bd5f002cf20..3bd18bbbac9 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1899,13 +1899,13 @@ SELECT * FROM tenk1
                                                                 QUERY PLAN                                                                 
 -------------------------------------------------------------------------------------------------------------------------------------------
  Bitmap Heap Scan on tenk1
-   Recheck Cond: (((thousand = 42) AND (tenthous IS NULL)) OR ((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))))
+   Recheck Cond: (((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))) OR ((thousand = 42) AND (tenthous IS NULL)))
    Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42) OR (tenthous IS NULL))
    ->  BitmapOr
-         ->  Bitmap Index Scan on tenk1_thous_tenthous
-               Index Cond: ((thousand = 42) AND (tenthous IS NULL))
          ->  Bitmap Index Scan on tenk1_thous_tenthous
                Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
+         ->  Bitmap Index Scan on tenk1_thous_tenthous
+               Index Cond: ((thousand = 42) AND (tenthous IS NULL))
 (8 rows)
 
 EXPLAIN (COSTS OFF)
@@ -1938,13 +1938,13 @@ SELECT * FROM tenk1
                                                                      QUERY PLAN                                                                      
 -----------------------------------------------------------------------------------------------------------------------------------------------------
  Bitmap Heap Scan on tenk1
-   Recheck Cond: (((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))) OR ((thousand = 42) AND (tenthous = '1'::smallint)))
+   Recheck Cond: (((thousand = 42) AND (tenthous = '1'::smallint)) OR ((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))))
    Filter: ((tenthous = '1'::smallint) OR (tenthous = '3'::bigint) OR (tenthous = '42'::bigint))
    ->  BitmapOr
-         ->  Bitmap Index Scan on tenk1_thous_tenthous
-               Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
          ->  Bitmap Index Scan on tenk1_thous_tenthous
                Index Cond: ((thousand = 42) AND (tenthous = '1'::smallint))
+         ->  Bitmap Index Scan on tenk1_thous_tenthous
+               Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
 (8 rows)
 
 EXPLAIN (COSTS OFF)
@@ -2129,16 +2129,16 @@ SELECT count(*) FROM tenk1
 ---------------------------------------------------------------------------------------------------------------------------
  Aggregate
    ->  Bitmap Heap Scan on tenk1
-         Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR ((thousand = 42) OR (thousand = 41))))
+         Recheck Cond: ((hundred = 42) AND (((thousand = 42) OR (thousand = 41)) OR ((thousand = 99) AND (tenthous = 2))))
          Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))
          ->  BitmapAnd
                ->  Bitmap Index Scan on tenk1_hundred
                      Index Cond: (hundred = 42)
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_thous_tenthous
-                           Index Cond: ((thousand = 99) AND (tenthous = 2))
                      ->  Bitmap Index Scan on tenk1_thous_tenthous
                            Index Cond: (thousand = ANY ('{42,41}'::integer[]))
+                     ->  Bitmap Index Scan on tenk1_thous_tenthous
+                           Index Cond: ((thousand = 99) AND (tenthous = 2))
 (12 rows)
 
 SELECT count(*) FROM tenk1
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a57bb18c24f..14da5708451 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4333,20 +4333,20 @@ select * from tenk1 a join tenk1 b on
  Nested Loop
    Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4)))
    ->  Bitmap Heap Scan on tenk1 b
-         Recheck Cond: ((hundred = 4) OR (unique1 = 2))
+         Recheck Cond: ((unique1 = 2) OR (hundred = 4))
          ->  BitmapOr
-               ->  Bitmap Index Scan on tenk1_hundred
-                     Index Cond: (hundred = 4)
                ->  Bitmap Index Scan on tenk1_unique1
                      Index Cond: (unique1 = 2)
+               ->  Bitmap Index Scan on tenk1_hundred
+                     Index Cond: (hundred = 4)
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
+               Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique2
-                           Index Cond: (unique2 = 3)
                      ->  Bitmap Index Scan on tenk1_unique1
                            Index Cond: (unique1 = 1)
+                     ->  Bitmap Index Scan on tenk1_unique2
+                           Index Cond: (unique2 = 3)
 (17 rows)
 
 explain (costs off)
@@ -4360,12 +4360,12 @@ select * from tenk1 a join tenk1 b on
          Filter: ((unique1 = 2) OR (ten = 4))
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
+               Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique2
-                           Index Cond: (unique2 = 3)
                      ->  Bitmap Index Scan on tenk1_unique1
                            Index Cond: (unique1 = 1)
+                     ->  Bitmap Index Scan on tenk1_unique2
+                           Index Cond: (unique2 = 3)
 (12 rows)
 
 explain (costs off)
@@ -4377,21 +4377,21 @@ select * from tenk1 a join tenk1 b on
  Nested Loop
    Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
    ->  Bitmap Heap Scan on tenk1 b
-         Recheck Cond: ((hundred = 4) OR (unique1 = 2))
+         Recheck Cond: ((unique1 = 2) OR (hundred = 4))
          ->  BitmapOr
-               ->  Bitmap Index Scan on tenk1_hundred
-                     Index Cond: (hundred = 4)
                ->  Bitmap Index Scan on tenk1_unique1
                      Index Cond: (unique1 = 2)
+               ->  Bitmap Index Scan on tenk1_hundred
+                     Index Cond: (hundred = 4)
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
+               Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7)))
                Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique2
-                           Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
                      ->  Bitmap Index Scan on tenk1_unique1
                            Index Cond: (unique1 = 1)
+                     ->  Bitmap Index Scan on tenk1_unique2
+                           Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
 (18 rows)
 
 explain (costs off)
@@ -4403,21 +4403,21 @@ select * from tenk1 a join tenk1 b on
  Nested Loop
    Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
    ->  Bitmap Heap Scan on tenk1 b
-         Recheck Cond: ((hundred = 4) OR (unique1 = 2))
+         Recheck Cond: ((unique1 = 2) OR (hundred = 4))
          ->  BitmapOr
-               ->  Bitmap Index Scan on tenk1_hundred
-                     Index Cond: (hundred = 4)
                ->  Bitmap Index Scan on tenk1_unique1
                      Index Cond: (unique1 = 2)
+               ->  Bitmap Index Scan on tenk1_hundred
+                     Index Cond: (hundred = 4)
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
+               Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7)))
                Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique2
-                           Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
                      ->  Bitmap Index Scan on tenk1_unique1
                            Index Cond: (unique1 = 1)
+                     ->  Bitmap Index Scan on tenk1_unique2
+                           Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
 (18 rows)
 
 explain (costs off)
@@ -4431,15 +4431,15 @@ select * from tenk1 a join tenk1 b on
    ->  Seq Scan on tenk1 b
    ->  Materialize
          ->  Bitmap Heap Scan on tenk1 a
-               Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR ((unique1 = 3) OR (unique1 = 1)) OR (unique1 < 20))
+               Recheck Cond: ((unique1 < 20) OR ((unique1 = 3) OR (unique1 = 1)) OR ((unique2 = 3) OR (unique2 = 7)))
                Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
                ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_unique2
-                           Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-                     ->  Bitmap Index Scan on tenk1_unique1
-                           Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
                      ->  Bitmap Index Scan on tenk1_unique1
                            Index Cond: (unique1 < 20)
+                     ->  Bitmap Index Scan on tenk1_unique1
+                           Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
+                     ->  Bitmap Index Scan on tenk1_unique2
+                           Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
 (14 rows)
 
 --
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 938cedd79ad..6101c8c7cf1 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -2568,24 +2568,24 @@ where not exists (select 1 from prtx2
          ->  Seq Scan on prtx1_1
                Filter: ((a < 20) AND (c = 91))
          ->  Bitmap Heap Scan on prtx2_1
-               Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1)))
+               Recheck Cond: ((b = (prtx1_1.b + 1)) OR (c = 99))
                Filter: (a = prtx1_1.a)
                ->  BitmapOr
-                     ->  Bitmap Index Scan on prtx2_1_c_idx
-                           Index Cond: (c = 99)
                      ->  Bitmap Index Scan on prtx2_1_b_idx
                            Index Cond: (b = (prtx1_1.b + 1))
+                     ->  Bitmap Index Scan on prtx2_1_c_idx
+                           Index Cond: (c = 99)
    ->  Nested Loop Anti Join
          ->  Seq Scan on prtx1_2
                Filter: ((a < 20) AND (c = 91))
          ->  Bitmap Heap Scan on prtx2_2
-               Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1)))
+               Recheck Cond: ((b = (prtx1_2.b + 1)) OR (c = 99))
                Filter: (a = prtx1_2.a)
                ->  BitmapOr
-                     ->  Bitmap Index Scan on prtx2_2_c_idx
-                           Index Cond: (c = 99)
                      ->  Bitmap Index Scan on prtx2_2_b_idx
                            Index Cond: (b = (prtx1_2.b + 1))
+                     ->  Bitmap Index Scan on prtx2_2_c_idx
+                           Index Cond: (c = 99)
 (23 rows)
 
 select * from prtx1
-- 
2.39.5 (Apple Git-154)

