Copilot commented on code in PR #1619:
URL: https://github.com/apache/cloudberry/pull/1619#discussion_r2933469941
##########
src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp:
##########
@@ -308,6 +308,93 @@ CSubqueryHandler::FProjectCountSubquery(CExpression
*pexprSubquery,
}
+//---------------------------------------------------------------------------
+// @function:
+// FContainsEmptyGbAgg
+//
+// @doc:
+// Return true if pexpr contains a GbAgg with empty grouping
columns
+// (i.e., GROUP BY ())
+//
+//---------------------------------------------------------------------------
+static BOOL
+FContainsEmptyGbAgg(CExpression *pexpr)
+{
+ if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid())
+ {
+ return 0 ==
CLogicalGbAgg::PopConvert(pexpr->Pop())->Pdrgpcr()->Size();
+ }
+ const ULONG arity = pexpr->Arity();
+ for (ULONG ul = 0; ul < arity; ul++)
+ {
+ CExpression *pexprChild = (*pexpr)[ul];
+ if (pexprChild->Pop()->FLogical() &&
FContainsEmptyGbAgg(pexprChild))
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+//---------------------------------------------------------------------------
+// @function:
+// FHasCorrelatedSelectAboveGbAgg
+//
+// @doc:
+// Return true if pexpr has a CLogicalSelect with outer references
in its
+// filter predicate that sits above a GROUP BY () aggregate. This
pattern
+// arises when a correlated scalar subquery has a correlated
HAVING clause,
+// e.g. "SELECT count(*) FROM t GROUP BY () HAVING outer_col".
+//
+// When such a pattern exists the scalar subquery must NOT be
decorrelated
+// with COALESCE(count,0) semantics: if the HAVING condition is
false the
+// subquery should return NULL (no rows), not 0.
+//
+//---------------------------------------------------------------------------
+static BOOL
+FHasCorrelatedSelectAboveGbAgg(CExpression *pexpr)
+{
+ // Stop recursion at a GbAgg boundary: we are looking for a Select
+ // that sits *above* a GbAgg, so once we reach the GbAgg there is
+ // nothing more to check in this branch.
+ if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid())
+ {
+ return false;
+ }
+
+ if (COperator::EopLogicalSelect == pexpr->Pop()->Eopid() &&
+ pexpr->HasOuterRefs())
+ {
+ // The Select has outer references somewhere in its subtree.
+ // Check whether they originate from the filter (child 1) rather
+ // than from the logical child (child 0). If the logical child
has
+ // no outer refs but the Select as a whole does, the outer refs
must
+ // come from the filter predicate — exactly the
correlated-HAVING
+ // pattern we want to detect.
+ CExpression *pexprLogicalChild = (*pexpr)[0];
+ if (!pexprLogicalChild->HasOuterRefs() &&
Review Comment:
FHasCorrelatedSelectAboveGbAgg() infers that outer refs come from the Select
predicate by checking `pexpr->HasOuterRefs()` and
`!(*pexpr)[0]->HasOuterRefs()`. This misses cases where there *is* a correlated
HAVING predicate but the logical child also contains outer refs (e.g.,
correlated WHERE below the GbAgg plus correlated HAVING above it), so the
pattern won’t be detected and ORCA can still apply the incorrect count(*)
decorrelation semantics. Consider directly checking for outer refs on the
predicate child `(*pexpr)[1]` (and then verifying the logical child contains an
empty GROUP BY GbAgg) instead of relying on
`!pexprLogicalChild->HasOuterRefs()`.
##########
src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp:
##########
@@ -753,8 +855,19 @@ CSubqueryHandler::FCreateOuterApplyForScalarSubquery(
*ppexprNewOuter = pexprPrj;
BOOL fGeneratedByQuantified = popSubquery->FGeneratedByQuantified();
+
+ // When GROUP BY () has a correlated HAVING clause (now represented as a
+ // CLogicalSelect with outer refs sitting above the GbAgg), the subquery
+ // must return NULL — not 0 — when the HAVING condition is false.
+ // Applying COALESCE(count,0) would incorrectly convert that NULL to 0,
+ // so we skip the special count(*) semantics in that case.
+ BOOL fCorrelatedHavingAboveEmptyGby =
+ (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() &&
+ FHasCorrelatedSelectAboveGbAgg((*pexprSubquery)[0]));
+
if (fGeneratedByQuantified ||
- (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size()))
+ (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() &&
+ !fCorrelatedHavingAboveEmptyGby))
Review Comment:
The new `fCorrelatedHavingAboveEmptyGby` guard only triggers when the HAVING
Select has outer refs. But `GROUP BY () HAVING <predicate>` can also eliminate
the single aggregate row in uncorrelated cases (e.g., `HAVING false` or `HAVING
count(*) > 0` when the input is empty), in which case a scalar subquery should
yield NULL (no rows) and COALESCE(count,0) would be wrong. Consider skipping
the special count(*) COALESCE whenever there is a Select above an
empty-grouping GbAgg (i.e., any HAVING filter), not only when it is correlated.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]