Hi hackers!
My colleague gave me an interesting case related to min max
optimization. Adding a useless left join to the select min from t query
breaks the min/max read optimization from the index.
What is meant is shown in the example below:
drop table if exists t1;
drop table if exists t2;
create table t1 (id int not null, mod text);
insert into t1 select id, (id % 10)::text from generate_series(1,100000) id;
create unique index on t1(id);
create index on t1(mod);
This is the best plan for this query, since we only need one minimum
value for this index. And it works perfectly:
explain select min(mod) from t1;
explain select min(mod) from t1;
QUERY PLAN
------------------------------------------------------------------------------------------------
Result (cost=0.33..0.34 rows=1 width=32)
InitPlan 1 (returns $0)
-> Limit (cost=0.29..0.33 rows=1 width=32)
-> Index Only Scan using t1_mod_idx on t1 (cost=0.29..3861.54
rows=99500 width=32)
Index Cond: (mod IS NOT NULL)
(5 rows)
create table t2 (id int not null);
insert into t2 select id from generate_series(1,100000) id;
create unique index on t2(id);
But if we add a join, we fall into a sec scan without options:
explain select min(t1.mod) from t1 left join t2 on t1.id = t2.id;
postgres=# explain select min(t1.mod) from t1 left join t2 on t1.id = t2.id;
QUERY PLAN
-----------------------------------------------------------------
Aggregate (cost=1693.00..1693.01 rows=1 width=32)
-> Seq Scan on t1 (cost=0.00..1443.00 rows=100000 width=32)
I have implemented a patch that solves this problem - allowing to
consider and join expressions for trial optimization. I am glad for
feedback and review!
--
Regards,
Alena Rybakina
Postgres Professional
From eb8fe49f68e198217a8f3e92ee424ff897f9e21e Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybak...@postgrespro.ru>
Date: Thu, 27 Feb 2025 11:26:44 +0300
Subject: [PATCH] Apply min_max_transformation for eliminated join relations
after successfuly applying join removal optimization.
---
src/backend/optimizer/plan/planagg.c | 14 +++++++--
src/test/regress/expected/aggregates.out | 40 ++++++++++++++++++++++++
src/test/regress/sql/aggregates.sql | 17 ++++++++++
3 files changed, 69 insertions(+), 2 deletions(-)
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index 64605be3178..b1d44987fb1 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -125,9 +125,19 @@ preprocess_minmax_aggregates(PlannerInfo *root)
return;
jtnode = linitial(jtnode->fromlist);
}
- if (!IsA(jtnode, RangeTblRef))
+
+ /*
+ * Let's consider applying optimization to the remaining
+ * single relations after join removal optimization.
+ * To do this, we need to make sure that fromlist is empty and
+ * quals contains only one RTE relation.aggs_list.
+ */
+ if (IsA(jtnode, JoinExpr) && jtnode->fromlist == NIL && IsA(jtnode->quals, RangeTblRef))
+ rtr = (RangeTblRef *) (jtnode->quals);
+ else if (IsA(jtnode, RangeTblRef))
+ rtr = (RangeTblRef *) jtnode;
+ else
return;
- rtr = (RangeTblRef *) jtnode;
rte = planner_rt_fetch(rtr->rtindex, root);
if (rte->rtekind == RTE_RELATION)
/* ordinary relation, ok */ ;
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 8c4f8ce27ed..79fe2fcce4d 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1329,6 +1329,46 @@ from int4_tbl t0;
(5 rows)
rollback;
+--Check min/max optimization for join expressions
+create unique index tenk1_unique1_idx on tenk1(unique1);
+create index tenk1_unique2_idx on tenk1(unique2);
+create unique index INT4_TBL_f1_idx on INT4_TBL(f1);
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 left join INT4_TBL t2 on t1.unique1 = t2.f1;
+ QUERY PLAN
+--------------------------------------------------------------
+ Result
+ InitPlan 1
+ -> Limit
+ -> Index Scan using tenk1_unique2_idx on tenk1 t1
+ Index Cond: (unique2 IS NOT NULL)
+(5 rows)
+
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 inner join INT4_TBL t2 on t1.unique1 = t2.f1;
+ QUERY PLAN
+------------------------------------------------------------
+ Aggregate
+ -> Nested Loop
+ -> Seq Scan on int4_tbl t2
+ -> Index Scan using tenk1_unique1_idx on tenk1 t1
+ Index Cond: (unique1 = t2.f1)
+(5 rows)
+
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 left outer join INT4_TBL t2 on t1.unique1 = t2.f1;
+ QUERY PLAN
+--------------------------------------------------------------
+ Result
+ InitPlan 1
+ -> Limit
+ -> Index Scan using tenk1_unique2_idx on tenk1 t1
+ Index Cond: (unique2 IS NOT NULL)
+(5 rows)
+
+DROP INDEX tenk1_unique1_idx;
+DROP INDEX tenk1_unique2_idx;
+DROP INDEX INT4_TBL_f1_idx;
-- check for correct detection of nested-aggregate errors
select max(min(unique1)) from tenk1;
ERROR: aggregate function calls cannot be nested
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index a1dc94bff57..c489f71bfc3 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -452,6 +452,23 @@ select f1, (select distinct min(t1.f1) from int4_tbl t1 where t1.f1 = t0.f1)
from int4_tbl t0;
rollback;
+--Check min/max optimization for join expressions
+create unique index tenk1_unique1_idx on tenk1(unique1);
+create index tenk1_unique2_idx on tenk1(unique2);
+create unique index INT4_TBL_f1_idx on INT4_TBL(f1);
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 left join INT4_TBL t2 on t1.unique1 = t2.f1;
+
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 inner join INT4_TBL t2 on t1.unique1 = t2.f1;
+
+explain (COSTS OFF)
+select min(t1.unique2) from tenk1 t1 left outer join INT4_TBL t2 on t1.unique1 = t2.f1;
+
+DROP INDEX tenk1_unique1_idx;
+DROP INDEX tenk1_unique2_idx;
+DROP INDEX INT4_TBL_f1_idx;
+
-- check for correct detection of nested-aggregate errors
select max(min(unique1)) from tenk1;
select (select max(min(unique1)) from int8_tbl) from tenk1;
--
2.34.1