This is an automated email from the ASF dual-hosted git repository.
wangwn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new cf9a6ea57a2 Fix Segment NDV number underflow issue (#1387)
cf9a6ea57a2 is described below
commit cf9a6ea57a2a7a69cbde99d0b881475b6945bd19
Author: Weinan WANG <[email protected]>
AuthorDate: Mon Oct 20 18:49:20 2025 +0800
Fix Segment NDV number underflow issue (#1387)
For the ratio of NDV, the function `abs` accepts an int value as
parameter, the double value `-0.x` cast to `int(0)`, which lead
to unefficient plan picked.
Fix `abs` -> `fabs`
---
.../regress/expected/bfv_aggregate_optimizer.out | 2 +-
.../src/test/regress/expected/gp_dqa_optimizer.out | 60 +++++++++-----------
.../regress/expected/groupingsets_optimizer.out | 14 ++---
.../src/test/regress/expected/limit_optimizer.out | 31 +++++-----
src/backend/commands/analyze.c | 5 +-
.../regress/expected/bfv_aggregate_optimizer.out | 2 +-
src/test/regress/expected/gp_dqa_optimizer.out | 66 ++++++++++------------
.../regress/expected/groupingsets_optimizer.out | 14 ++---
src/test/regress/expected/limit_optimizer.out | 33 ++++++-----
9 files changed, 105 insertions(+), 122 deletions(-)
diff --git
a/contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out
b/contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out
index f8508db62b0..72f0a78988c 100644
--- a/contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out
+++ b/contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out
@@ -188,7 +188,7 @@ set optimizer_force_multistage_agg = on;
select count_operator('select count(*) from multi_stage_test group by
b;','GroupAggregate');
count_operator
----------------
- 1
+ 2
(1 row)
set optimizer_force_multistage_agg = off;
diff --git a/contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out
b/contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out
index 250ad329a5e..db4d467570b 100644
--- a/contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out
+++ b/contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out
@@ -65,20 +65,18 @@ select count(distinct d) from dqa_t1 group by i;
(12 rows)
explain (costs off) select count(distinct d) from dqa_t1 group by i;
- QUERY PLAN
-------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
- -> Finalize HashAggregate
+ -> GroupAggregate
Group Key: i
- -> Redistribute Motion 3:3 (slice2; segments: 3)
- Hash Key: i
- -> Partial GroupAggregate
- Group Key: i
- -> Sort
- Sort Key: i, d
- -> Seq Scan on dqa_t1
+ -> Sort
+ Sort Key: i
+ -> Redistribute Motion 3:3 (slice2; segments: 3)
+ Hash Key: i
+ -> Seq Scan on dqa_t1
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(11 rows)
+(9 rows)
select count(distinct d), sum(distinct d) from dqa_t1 group by i;
count | sum
@@ -98,20 +96,18 @@ select count(distinct d), sum(distinct d) from dqa_t1 group
by i;
(12 rows)
explain (costs off) select count(distinct d), sum(distinct d) from dqa_t1
group by i;
- QUERY PLAN
-------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
- -> Finalize HashAggregate
+ -> GroupAggregate
Group Key: i
- -> Redistribute Motion 3:3 (slice2; segments: 3)
- Hash Key: i
- -> Partial GroupAggregate
- Group Key: i
- -> Sort
- Sort Key: i, d
- -> Seq Scan on dqa_t1
+ -> Sort
+ Sort Key: i
+ -> Redistribute Motion 3:3 (slice2; segments: 3)
+ Hash Key: i
+ -> Seq Scan on dqa_t1
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(11 rows)
+(9 rows)
select count(distinct d), count(distinct dt) from dqa_t1;
count | count
@@ -1909,20 +1905,18 @@ select count(distinct d) from dqa_t1 group by i;
(12 rows)
explain (costs off) select count(distinct d) from dqa_t1 group by i;
- QUERY PLAN
-------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
- -> Finalize HashAggregate
+ -> GroupAggregate
Group Key: i
- -> Redistribute Motion 3:3 (slice2; segments: 3)
- Hash Key: i
- -> Partial GroupAggregate
- Group Key: i
- -> Sort
- Sort Key: i, d
- -> Seq Scan on dqa_t1
+ -> Sort
+ Sort Key: i
+ -> Redistribute Motion 3:3 (slice2; segments: 3)
+ Hash Key: i
+ -> Seq Scan on dqa_t1
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(11 rows)
+(9 rows)
select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1;
count | count | count
diff --git
a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out
b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out
index b2ac9ce9ddc..b3da68b1f9d 100644
--- a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out
+++ b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out
@@ -1891,12 +1891,11 @@ explain (costs off)
-> Streaming Partial HashAggregate
Group Key: share0_ref5.hundred
-> Shared Scan (share slice:id 5:0)
- -> Finalize HashAggregate
+ -> HashAggregate
Group Key: share0_ref6.thousand
-> Redistribute Motion 3:3 (slice6; segments: 3)
Hash Key: share0_ref6.thousand
- -> Streaming Partial HashAggregate
- Group Key: share0_ref6.thousand
+ -> Result
-> Shared Scan (share slice:id 6:0)
-> HashAggregate
Group Key: share0_ref7.twothousand
@@ -1908,7 +1907,7 @@ explain (costs off)
Group Key: share0_ref8.unique1
-> Shared Scan (share slice:id 1:0)
Optimizer: Pivotal Optimizer (GPORCA)
-(50 rows)
+(49 rows)
explain (costs off)
select unique1,
@@ -1999,12 +1998,11 @@ explain (costs off)
-> Streaming Partial HashAggregate
Group Key: share0_ref5.hundred
-> Shared Scan (share slice:id 5:0)
- -> Finalize HashAggregate
+ -> HashAggregate
Group Key: share0_ref6.thousand
-> Redistribute Motion 3:3 (slice6; segments: 3)
Hash Key: share0_ref6.thousand
- -> Streaming Partial HashAggregate
- Group Key: share0_ref6.thousand
+ -> Result
-> Shared Scan (share slice:id 6:0)
-> HashAggregate
Group Key: share0_ref7.twothousand
@@ -2016,7 +2014,7 @@ explain (costs off)
Group Key: share0_ref8.unique1
-> Shared Scan (share slice:id 1:0)
Optimizer: Pivotal Optimizer (GPORCA)
-(50 rows)
+(49 rows)
reset hash_mem_multiplier;
-- check collation-sensitive matching between grouping expressions
diff --git a/contrib/pax_storage/src/test/regress/expected/limit_optimizer.out
b/contrib/pax_storage/src/test/regress/expected/limit_optimizer.out
index b8562703a6c..dea74df2b5d 100644
--- a/contrib/pax_storage/src/test/regress/expected/limit_optimizer.out
+++ b/contrib/pax_storage/src/test/regress/expected/limit_optimizer.out
@@ -358,29 +358,28 @@ order by s2 desc;
explain (verbose, costs off)
select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
from tenk1 group by thousand order by thousand limit 3;
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------
Result
- Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() *
'0'::double precision)))
+ Output: (sum(tenthous)), ((((sum(tenthous)))::double precision + (random()
* '0'::double precision)))
-> Limit
- Output: (sum(tenthous)), (((sum(tenthous))::double precision +
(random() * '0'::double precision))), thousand
+ Output: (sum(tenthous)), ((((sum(tenthous)))::double precision +
(random() * '0'::double precision))), thousand
-> Gather Motion 3:1 (slice1; segments: 3)
- Output: (sum(tenthous)), (((sum(tenthous))::double precision +
(random() * '0'::double precision))), thousand
+ Output: (sum(tenthous)), ((((sum(tenthous)))::double precision
+ (random() * '0'::double precision))), thousand
Merge Key: thousand
-> Limit
- Output: (sum(tenthous)), (((sum(tenthous))::double
precision + (random() * '0'::double precision))), thousand
- -> Finalize GroupAggregate
- Output: sum(tenthous), ((sum(tenthous))::double
precision + (random() * '0'::double precision)), thousand
- Group Key: tenk1.thousand
+ Output: (sum(tenthous)), ((((sum(tenthous)))::double
precision + (random() * '0'::double precision))), thousand
+ -> Result
+ Output: (sum(tenthous)), (((sum(tenthous)))::double
precision + (random() * '0'::double precision)), thousand
-> Sort
- Output: thousand, (PARTIAL sum(tenthous)),
(PARTIAL sum(tenthous))
+ Output: (sum(tenthous)), (sum(tenthous)),
thousand
Sort Key: tenk1.thousand
- -> Redistribute Motion 3:3 (slice2;
segments: 3)
- Output: thousand, (PARTIAL
sum(tenthous)), (PARTIAL sum(tenthous))
- Hash Key: thousand
- -> Streaming Partial HashAggregate
- Output: thousand, PARTIAL
sum(tenthous), PARTIAL sum(tenthous)
- Group Key: tenk1.thousand
+ -> HashAggregate
+ Output: sum(tenthous), sum(tenthous),
thousand
+ Group Key: tenk1.thousand
+ -> Redistribute Motion 3:3 (slice2;
segments: 3)
+ Output: thousand, tenthous
+ Hash Key: thousand
-> Seq Scan on public.tenk1
Output: thousand, tenthous
Optimizer: Pivotal Optimizer (GPORCA)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index ce642dc84e7..6f5c4c5c42c 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -2871,12 +2871,13 @@ process_sample_rows(Portal portal,
arrayVal = DatumGetArrayTypeP(funcRetValues[3]);
deconstruct_array(arrayVal, FLOAT8OID, 8, true,
'd',
&colndv,
&nulls, &numelems);
- for (i = 0; i < relDesc->natts; i++)
+ Assert(numelems == relDesc->natts);
+ for (i = 0; i < numelems; i++)
{
double this_colndv =
DatumGetFloat8(colndv[i]);
if (this_colndv < 0) {
Assert(this_colndv >= -1);
- colNDVBySeg[i] +=
abs(this_colndv) * this_totalrows;
+ colNDVBySeg[i] +=
fabs(this_colndv) * this_totalrows;
} else {
/* if current segment have any
data, then ndv won't be 0.
* if current segment have no
rows, ndv is 0.
diff --git a/src/test/regress/expected/bfv_aggregate_optimizer.out
b/src/test/regress/expected/bfv_aggregate_optimizer.out
index 51fd134f587..47ae7048795 100644
--- a/src/test/regress/expected/bfv_aggregate_optimizer.out
+++ b/src/test/regress/expected/bfv_aggregate_optimizer.out
@@ -188,7 +188,7 @@ set optimizer_force_multistage_agg = on;
select count_operator('select count(*) from multi_stage_test group by
b;','GroupAggregate');
count_operator
----------------
- 1
+ 2
(1 row)
set optimizer_force_multistage_agg = off;
diff --git a/src/test/regress/expected/gp_dqa_optimizer.out
b/src/test/regress/expected/gp_dqa_optimizer.out
index acbd713866d..278b719e933 100644
--- a/src/test/regress/expected/gp_dqa_optimizer.out
+++ b/src/test/regress/expected/gp_dqa_optimizer.out
@@ -66,20 +66,18 @@ select count(distinct d) from dqa_t1 group by i;
(12 rows)
explain (costs off) select count(distinct d) from dqa_t1 group by i;
- QUERY PLAN
-------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
- -> Finalize HashAggregate
+ -> GroupAggregate
Group Key: i
- -> Redistribute Motion 3:3 (slice2; segments: 3)
- Hash Key: i
- -> Partial GroupAggregate
- Group Key: i
- -> Sort
- Sort Key: i, d
- -> Seq Scan on dqa_t1
- Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(11 rows)
+ -> Sort
+ Sort Key: i
+ -> Redistribute Motion 3:3 (slice2; segments: 3)
+ Hash Key: i
+ -> Seq Scan on dqa_t1
+ Optimizer: GPORCA
+(9 rows)
select count(distinct d), sum(distinct d) from dqa_t1 group by i;
count | sum
@@ -99,20 +97,18 @@ select count(distinct d), sum(distinct d) from dqa_t1 group
by i;
(12 rows)
explain (costs off) select count(distinct d), sum(distinct d) from dqa_t1
group by i;
- QUERY PLAN
-------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
- -> Finalize HashAggregate
+ -> GroupAggregate
Group Key: i
- -> Redistribute Motion 3:3 (slice2; segments: 3)
- Hash Key: i
- -> Partial GroupAggregate
- Group Key: i
- -> Sort
- Sort Key: i, d
- -> Seq Scan on dqa_t1
- Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(11 rows)
+ -> Sort
+ Sort Key: i
+ -> Redistribute Motion 3:3 (slice2; segments: 3)
+ Hash Key: i
+ -> Seq Scan on dqa_t1
+ Optimizer: GPORCA
+(9 rows)
select count(distinct d), count(distinct dt) from dqa_t1;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
@@ -2022,20 +2018,18 @@ select count(distinct d) from dqa_t1 group by i;
(12 rows)
explain (costs off) select count(distinct d) from dqa_t1 group by i;
- QUERY PLAN
-------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
- -> Finalize HashAggregate
+ -> GroupAggregate
Group Key: i
- -> Redistribute Motion 3:3 (slice2; segments: 3)
- Hash Key: i
- -> Partial GroupAggregate
- Group Key: i
- -> Sort
- Sort Key: i, d
- -> Seq Scan on dqa_t1
- Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(11 rows)
+ -> Sort
+ Sort Key: i
+ -> Redistribute Motion 3:3 (slice2; segments: 3)
+ Hash Key: i
+ -> Seq Scan on dqa_t1
+ Optimizer: GPORCA
+(9 rows)
select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
diff --git a/src/test/regress/expected/groupingsets_optimizer.out
b/src/test/regress/expected/groupingsets_optimizer.out
index 39fad5a1c8a..08ef4c1a68c 100644
--- a/src/test/regress/expected/groupingsets_optimizer.out
+++ b/src/test/regress/expected/groupingsets_optimizer.out
@@ -1968,12 +1968,11 @@ explain (costs off)
-> Streaming Partial HashAggregate
Group Key: share0_ref5.hundred
-> Shared Scan (share slice:id 5:0)
- -> Finalize HashAggregate
+ -> HashAggregate
Group Key: share0_ref6.thousand
-> Redistribute Motion 3:3 (slice6; segments: 3)
Hash Key: share0_ref6.thousand
- -> Streaming Partial HashAggregate
- Group Key: share0_ref6.thousand
+ -> Result
-> Shared Scan (share slice:id 6:0)
-> HashAggregate
Group Key: share0_ref7.twothousand
@@ -1985,7 +1984,7 @@ explain (costs off)
Group Key: share0_ref8.unique1
-> Shared Scan (share slice:id 1:0)
Optimizer: Pivotal Optimizer (GPORCA)
-(50 rows)
+(49 rows)
explain (costs off)
select unique1,
@@ -2076,12 +2075,11 @@ explain (costs off)
-> Streaming Partial HashAggregate
Group Key: share0_ref5.hundred
-> Shared Scan (share slice:id 5:0)
- -> Finalize HashAggregate
+ -> HashAggregate
Group Key: share0_ref6.thousand
-> Redistribute Motion 3:3 (slice6; segments: 3)
Hash Key: share0_ref6.thousand
- -> Streaming Partial HashAggregate
- Group Key: share0_ref6.thousand
+ -> Result
-> Shared Scan (share slice:id 6:0)
-> HashAggregate
Group Key: share0_ref7.twothousand
@@ -2093,7 +2091,7 @@ explain (costs off)
Group Key: share0_ref8.unique1
-> Shared Scan (share slice:id 1:0)
Optimizer: Pivotal Optimizer (GPORCA)
-(50 rows)
+(49 rows)
reset hash_mem_multiplier;
-- check collation-sensitive matching between grouping expressions
diff --git a/src/test/regress/expected/limit_optimizer.out
b/src/test/regress/expected/limit_optimizer.out
index 6508365fc81..e8a00fe3a40 100644
--- a/src/test/regress/expected/limit_optimizer.out
+++ b/src/test/regress/expected/limit_optimizer.out
@@ -358,32 +358,31 @@ order by s2 desc;
explain (verbose, costs off)
select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
from tenk1 group by thousand order by thousand limit 3;
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------
Result
- Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() *
'0'::double precision)))
+ Output: (sum(tenthous)), ((((sum(tenthous)))::double precision + (random()
* '0'::double precision)))
-> Limit
- Output: (sum(tenthous)), (((sum(tenthous))::double precision +
(random() * '0'::double precision))), thousand
+ Output: (sum(tenthous)), ((((sum(tenthous)))::double precision +
(random() * '0'::double precision))), thousand
-> Gather Motion 3:1 (slice1; segments: 3)
- Output: (sum(tenthous)), (((sum(tenthous))::double precision +
(random() * '0'::double precision))), thousand
+ Output: (sum(tenthous)), ((((sum(tenthous)))::double precision
+ (random() * '0'::double precision))), thousand
Merge Key: thousand
-> Limit
- Output: (sum(tenthous)), (((sum(tenthous))::double
precision + (random() * '0'::double precision))), thousand
- -> Finalize GroupAggregate
- Output: sum(tenthous), ((sum(tenthous))::double
precision + (random() * '0'::double precision)), thousand
- Group Key: tenk1.thousand
+ Output: (sum(tenthous)), ((((sum(tenthous)))::double
precision + (random() * '0'::double precision))), thousand
+ -> Result
+ Output: (sum(tenthous)), (((sum(tenthous)))::double
precision + (random() * '0'::double precision)), thousand
-> Sort
- Output: thousand, (PARTIAL sum(tenthous)),
(PARTIAL sum(tenthous))
+ Output: (sum(tenthous)), (sum(tenthous)),
thousand
Sort Key: tenk1.thousand
- -> Redistribute Motion 3:3 (slice2;
segments: 3)
- Output: thousand, (PARTIAL
sum(tenthous)), (PARTIAL sum(tenthous))
- Hash Key: thousand
- -> Streaming Partial HashAggregate
- Output: thousand, PARTIAL
sum(tenthous), PARTIAL sum(tenthous)
- Group Key: tenk1.thousand
+ -> HashAggregate
+ Output: sum(tenthous), sum(tenthous),
thousand
+ Group Key: tenk1.thousand
+ -> Redistribute Motion 3:3 (slice2;
segments: 3)
+ Output: thousand, tenthous
+ Hash Key: thousand
-> Seq Scan on public.tenk1
Output: thousand, tenthous
- Optimizer: Pivotal Optimizer (GPORCA)
+ Optimizer: GPORCA
(24 rows)
select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]