This is an automated email from the ASF dual-hosted git repository. zhangwenchao pushed a commit to branch fix_orca_empty_table in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 32ba9c0207d62f24420fd4000fa62921976a3333 Author: zhangwenchao <[email protected]> AuthorDate: Tue Oct 28 14:37:24 2025 +0800 Fix orca judgement of whether relation is empty is not accurate. When retrieve RelStats, orca use reltuples is whether -1 to decide relation is empty. However, reltuples 0 also means relation is empty. Authored-by: Zhang Wenchao <[email protected]> --- .../regress/expected/DML_over_joins_optimizer.out | 29 ++++++++------- .../test/regress/expected/bfv_joins_optimizer.out | 41 +++++++++++----------- .../gpopt/translate/CTranslatorRelcacheToDXL.cpp | 2 +- .../regress/expected/DML_over_joins_optimizer.out | 29 ++++++++------- src/test/regress/expected/bfv_joins_optimizer.out | 40 ++++++++++----------- 5 files changed, 69 insertions(+), 72 deletions(-) diff --git a/contrib/pax_storage/src/test/regress/expected/DML_over_joins_optimizer.out b/contrib/pax_storage/src/test/regress/expected/DML_over_joins_optimizer.out index aa87cea2a3e..abdad77c7f7 100644 --- a/contrib/pax_storage/src/test/regress/expected/DML_over_joins_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/DML_over_joins_optimizer.out @@ -1613,22 +1613,21 @@ HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For QUERY PLAN ------------------------------------------------------------------------------ Delete on tab1 - -> Result - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: tab1.b - -> Hash Join - Hash Cond: (tab2.a = tab1.a) - -> Seq Scan on tab2 - -> Hash - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: tab1.a - -> Hash Join - Hash Cond: (tab3.b = tab1.b) - -> Seq Scan on tab3 - -> Hash - -> Seq Scan on tab1 + -> Hash Join + Hash Cond: (tab3.b = tab1.b) + -> Seq Scan on tab3 + -> Hash + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: tab1.b + -> Hash Join + Hash Cond: (tab2.a = tab1.a) + -> Seq Scan on tab2 + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: tab1.a + -> Seq Scan on tab1 Optimizer: GPORCA -(16 rows) +(15 rows) -- ---------------------------------------------------------------------- -- Test: teardown.sql diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_joins_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_joins_optimizer.out index cc84f9983ff..e91d5ab5f88 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_joins_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_joins_optimizer.out @@ -2982,28 +2982,27 @@ ON (member_group.group_id IN (12,13,14,15) AND member_subgroup.subgroup_name = r QUERY PLAN --------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Hash Join - Hash Cond: (member."group_id" = member_group."group_id") - -> Seq Scan on member - -> Hash - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Hash Left Join - Hash Cond: (member_subgroup.subgroup_name = (region.county_name)::text) - Join Filter: (member_group."group_id" = ANY ('{12,13,14,15}'::integer[])) - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: member_subgroup.subgroup_name - -> Hash Join - Hash Cond: (member_subgroup."group_id" = member_group."group_id") - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: member_subgroup."group_id" + -> Hash Left Join + Hash Cond: (member_subgroup.subgroup_name = (region.county_name)::text) + Join Filter: (member_group."group_id" = ANY ('{12,13,14,15}'::integer[])) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: member_subgroup.subgroup_name + -> Hash Join + Hash Cond: (member."group_id" = member_group."group_id") + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: member."group_id" + -> Hash Join + Hash Cond: (member."group_id" = member_subgroup."group_id") + -> Seq Scan on member + -> Hash + -> Broadcast Motion 3:3 (slice4; segments: 3) -> Seq Scan on member_subgroup - -> Hash - -> Seq Scan on member_group - -> Hash - -> Redistribute Motion 3:3 (slice5; segments: 3) - Hash Key: region.county_name - -> Seq Scan on region - Optimizer: Pivotal Optimizer (GPORCA) + -> Hash + -> Seq Scan on member_group + -> Hash + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: region.county_name + -> Seq Scan on region (23 rows) -- Test colocated equijoins on coerced distribution keys diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index d8e1da28f0d..6a5d679a11f 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -1836,7 +1836,7 @@ CTranslatorRelcacheToDXL::RetrieveRelStats(CMemoryPool *mp, IMDId *mdid) * count of the partition table is -1. */ BOOL relation_empty = false; - if (num_rows == -1.0) + if (num_rows == -1.0 || num_rows == 0.0) { relation_empty = true; } diff --git a/src/test/regress/expected/DML_over_joins_optimizer.out b/src/test/regress/expected/DML_over_joins_optimizer.out index c0d7286affd..77a90f888c2 100644 --- a/src/test/regress/expected/DML_over_joins_optimizer.out +++ b/src/test/regress/expected/DML_over_joins_optimizer.out @@ -1774,22 +1774,21 @@ HINT: For non-partitioned tables, run analyze <table_name>(<column_list>). For QUERY PLAN ------------------------------------------------------------------------------ Delete on tab1 - -> Result - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: tab1.b - -> Hash Join - Hash Cond: (tab2.a = tab1.a) - -> Seq Scan on tab2 - -> Hash - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: tab1.a - -> Hash Join - Hash Cond: (tab3.b = tab1.b) - -> Seq Scan on tab3 - -> Hash - -> Seq Scan on tab1 + -> Hash Join + Hash Cond: (tab3.b = tab1.b) + -> Seq Scan on tab3 + -> Hash + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: tab1.b + -> Hash Join + Hash Cond: (tab2.a = tab1.a) + -> Seq Scan on tab2 + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: tab1.a + -> Seq Scan on tab1 Optimizer: GPORCA -(16 rows) +(15 rows) -- ---------------------------------------------------------------------- -- Test delete on partition table from join on another partition table diff --git a/src/test/regress/expected/bfv_joins_optimizer.out b/src/test/regress/expected/bfv_joins_optimizer.out index cccfe3db059..6ae5ea6d30c 100644 --- a/src/test/regress/expected/bfv_joins_optimizer.out +++ b/src/test/regress/expected/bfv_joins_optimizer.out @@ -3294,27 +3294,27 @@ ON (member_group.group_id IN (12,13,14,15) AND member_subgroup.subgroup_name = r QUERY PLAN --------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Hash Join - Hash Cond: (member."group_id" = member_group."group_id") - -> Seq Scan on member - -> Hash - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Hash Left Join - Hash Cond: (member_subgroup.subgroup_name = (region.county_name)::text) - Join Filter: (member_group."group_id" = ANY ('{12,13,14,15}'::integer[])) - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: member_subgroup.subgroup_name - -> Hash Join - Hash Cond: (member_subgroup."group_id" = member_group."group_id") - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: member_subgroup."group_id" + -> Hash Left Join + Hash Cond: (member_subgroup.subgroup_name = (region.county_name)::text) + Join Filter: (member_group."group_id" = ANY ('{12,13,14,15}'::integer[])) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: member_subgroup.subgroup_name + -> Hash Join + Hash Cond: (member."group_id" = member_group."group_id") + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: member."group_id" + -> Hash Join + Hash Cond: (member."group_id" = member_subgroup."group_id") + -> Seq Scan on member + -> Hash + -> Broadcast Motion 3:3 (slice4; segments: 3) -> Seq Scan on member_subgroup - -> Hash - -> Seq Scan on member_group - -> Hash - -> Redistribute Motion 3:3 (slice5; segments: 3) - Hash Key: region.county_name - -> Seq Scan on region + -> Hash + -> Seq Scan on member_group + -> Hash + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: region.county_name + -> Seq Scan on region Optimizer: Pivotal Optimizer (GPORCA) (23 rows) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
