Useless LEFT JOIN breaks MIN/MAX optimization

Alena Rybakina Thu, 27 Feb 2025 00:28:43 -0800

Hi hackers!

My colleague gave me an interesting case related to min maxoptimization. Adding a useless left join to the select min from t querybreaks the min/max read optimization from the index.

What is meant is shown in the example below:


drop table if exists t1;
drop table if exists t2;

create table t1 (id int not null, mod text);
insert into t1 select id, (id % 10)::text from generate_series(1,100000) id;
create unique index on t1(id);
create index on t1(mod);

This is the best plan for this query, since we only need one minimumvalue for this index. And it works perfectly:

explain select min(mod) from t1;
explain select min(mod) from t1;
QUERY PLAN
------------------------------------------------------------------------------------------------
 Result (cost=0.33..0.34 rows=1 width=32)
 InitPlan 1 (returns $0)
 -> Limit (cost=0.29..0.33 rows=1 width=32)

-> Index Only Scan using t1_mod_idx on t1 (cost=0.29..3861.54rows=99500 width=32)

 Index Cond: (mod IS NOT NULL)
(5 rows)

create table t2 (id int not null);
insert into t2 select id from generate_series(1,100000) id;
create unique index on t2(id);

But if we add a join, we fall into a sec scan without options:
explain select min(t1.mod) from t1 left join t2 on t1.id = t2.id;
postgres=# explain select min(t1.mod) from t1 left join t2 on t1.id = t2.id;
QUERY PLAN
-----------------------------------------------------------------
Aggregate (cost=1693.00..1693.01 rows=1 width=32)
-> Seq Scan on t1 (cost=0.00..1443.00 rows=100000 width=32)

I have implemented a patch that solves this problem - allowing toconsider and join expressions for trial optimization. I am glad forfeedback and review!


--
Regards,
Alena Rybakina
Postgres Professional

From eb8fe49f68e198217a8f3e92ee424ff897f9e21e Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybak...@postgrespro.ru>
Date: Thu, 27 Feb 2025 11:26:44 +0300
Subject: [PATCH] Apply min_max_transformation for eliminated join relations
 after successfuly applying join removal optimization.

---
 src/backend/optimizer/plan/planagg.c     | 14 +++++++--
 src/test/regress/expected/aggregates.out | 40 ++++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      | 17 ++++++++++
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index 64605be3178..b1d44987fb1 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -125,9 +125,19 @@ preprocess_minmax_aggregates(PlannerInfo *root)
 			return;
 		jtnode = linitial(jtnode->fromlist);
 	}
-	if (!IsA(jtnode, RangeTblRef))
+
+	/*
+	 * Let's consider applying optimization to the remaining
+	 * single relations after join removal optimization.
+	 * To do this, we need to make sure that fromlist is empty and
+	 * quals contains only one RTE relation.aggs_list.
+	 */
+	if (IsA(jtnode, JoinExpr) && jtnode->fromlist == NIL && IsA(jtnode->quals, RangeTblRef))
+		rtr = (RangeTblRef *) (jtnode->quals);
+	else if (IsA(jtnode, RangeTblRef))
+		rtr = (RangeTblRef *) jtnode;
+	else
 		return;
-	rtr = (RangeTblRef *) jtnode;
 	rte = planner_rt_fetch(rtr->rtindex, root);
 	if (rte->rtekind == RTE_RELATION)
 		 /* ordinary relation, ok */ ;
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 8c4f8ce27ed..79fe2fcce4d 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1329,6 +1329,46 @@ from int4_tbl t0;
 (5 rows)
 
 rollback;
+--Check min/max optimization for join expressions
+create unique index tenk1_unique1_idx on tenk1(unique1);
+create index tenk1_unique2_idx on tenk1(unique2);
+create unique index INT4_TBL_f1_idx on INT4_TBL(f1);
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 left join INT4_TBL t2 on t1.unique1 = t2.f1;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Result
+   InitPlan 1
+     ->  Limit
+           ->  Index Scan using tenk1_unique2_idx on tenk1 t1
+                 Index Cond: (unique2 IS NOT NULL)
+(5 rows)
+
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 inner join INT4_TBL t2 on t1.unique1 = t2.f1;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Seq Scan on int4_tbl t2
+         ->  Index Scan using tenk1_unique1_idx on tenk1 t1
+               Index Cond: (unique1 = t2.f1)
+(5 rows)
+
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 left outer join INT4_TBL t2 on t1.unique1 = t2.f1;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Result
+   InitPlan 1
+     ->  Limit
+           ->  Index Scan using tenk1_unique2_idx on tenk1 t1
+                 Index Cond: (unique2 IS NOT NULL)
+(5 rows)
+
+DROP INDEX tenk1_unique1_idx;
+DROP INDEX tenk1_unique2_idx;
+DROP INDEX INT4_TBL_f1_idx;
 -- check for correct detection of nested-aggregate errors
 select max(min(unique1)) from tenk1;
 ERROR:  aggregate function calls cannot be nested
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index a1dc94bff57..c489f71bfc3 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -452,6 +452,23 @@ select f1, (select distinct min(t1.f1) from int4_tbl t1 where t1.f1 = t0.f1)
 from int4_tbl t0;
 rollback;
 
+--Check min/max optimization for join expressions
+create unique index tenk1_unique1_idx on tenk1(unique1);
+create index tenk1_unique2_idx on tenk1(unique2);
+create unique index INT4_TBL_f1_idx on INT4_TBL(f1);
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 left join INT4_TBL t2 on t1.unique1 = t2.f1;
+
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 inner join INT4_TBL t2 on t1.unique1 = t2.f1;
+
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 left outer join INT4_TBL t2 on t1.unique1 = t2.f1;
+
+DROP INDEX tenk1_unique1_idx;
+DROP INDEX tenk1_unique2_idx;
+DROP INDEX INT4_TBL_f1_idx;
+
 -- check for correct detection of nested-aggregate errors
 select max(min(unique1)) from tenk1;
 select (select max(min(unique1)) from int8_tbl) from tenk1;
-- 
2.34.1

Useless LEFT JOIN breaks MIN/MAX optimization

Reply via email to