Copilot commented on code in PR #1619:
URL: https://github.com/apache/cloudberry/pull/1619#discussion_r2933469941


##########
src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp:
##########
@@ -308,6 +308,93 @@ CSubqueryHandler::FProjectCountSubquery(CExpression 
*pexprSubquery,
 }
 
 
+//---------------------------------------------------------------------------
+//     @function:
+//             FContainsEmptyGbAgg
+//
+//     @doc:
+//             Return true if pexpr contains a GbAgg with empty grouping 
columns
+//             (i.e., GROUP BY ())
+//
+//---------------------------------------------------------------------------
+static BOOL
+FContainsEmptyGbAgg(CExpression *pexpr)
+{
+       if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid())
+       {
+               return 0 == 
CLogicalGbAgg::PopConvert(pexpr->Pop())->Pdrgpcr()->Size();
+       }
+       const ULONG arity = pexpr->Arity();
+       for (ULONG ul = 0; ul < arity; ul++)
+       {
+               CExpression *pexprChild = (*pexpr)[ul];
+               if (pexprChild->Pop()->FLogical() && 
FContainsEmptyGbAgg(pexprChild))
+               {
+                       return true;
+               }
+       }
+       return false;
+}
+
+
+//---------------------------------------------------------------------------
+//     @function:
+//             FHasCorrelatedSelectAboveGbAgg
+//
+//     @doc:
+//             Return true if pexpr has a CLogicalSelect with outer references 
in its
+//             filter predicate that sits above a GROUP BY () aggregate.  This 
pattern
+//             arises when a correlated scalar subquery has a correlated 
HAVING clause,
+//             e.g. "SELECT count(*) FROM t GROUP BY () HAVING outer_col".
+//
+//             When such a pattern exists the scalar subquery must NOT be 
decorrelated
+//             with COALESCE(count,0) semantics: if the HAVING condition is 
false the
+//             subquery should return NULL (no rows), not 0.
+//
+//---------------------------------------------------------------------------
+static BOOL
+FHasCorrelatedSelectAboveGbAgg(CExpression *pexpr)
+{
+       // Stop recursion at a GbAgg boundary: we are looking for a Select
+       // that sits *above* a GbAgg, so once we reach the GbAgg there is
+       // nothing more to check in this branch.
+       if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid())
+       {
+               return false;
+       }
+
+       if (COperator::EopLogicalSelect == pexpr->Pop()->Eopid() &&
+               pexpr->HasOuterRefs())
+       {
+               // The Select has outer references somewhere in its subtree.
+               // Check whether they originate from the filter (child 1) rather
+               // than from the logical child (child 0).  If the logical child 
has
+               // no outer refs but the Select as a whole does, the outer refs 
must
+               // come from the filter predicate — exactly the 
correlated-HAVING
+               // pattern we want to detect.
+               CExpression *pexprLogicalChild = (*pexpr)[0];
+               if (!pexprLogicalChild->HasOuterRefs() &&

Review Comment:
   FHasCorrelatedSelectAboveGbAgg() infers that outer refs come from the Select 
predicate by checking `pexpr->HasOuterRefs()` and 
`!(*pexpr)[0]->HasOuterRefs()`. This misses cases where there *is* a correlated 
HAVING predicate but the logical child also contains outer refs (e.g., 
correlated WHERE below the GbAgg plus correlated HAVING above it), so the 
pattern won’t be detected and ORCA can still apply the incorrect count(*) 
decorrelation semantics. Consider directly checking for outer refs on the 
predicate child `(*pexpr)[1]` (and then verifying the logical child contains an 
empty GROUP BY GbAgg) instead of relying on 
`!pexprLogicalChild->HasOuterRefs()`.
   



##########
src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp:
##########
@@ -753,8 +855,19 @@ CSubqueryHandler::FCreateOuterApplyForScalarSubquery(
        *ppexprNewOuter = pexprPrj;
 
        BOOL fGeneratedByQuantified = popSubquery->FGeneratedByQuantified();
+
+       // When GROUP BY () has a correlated HAVING clause (now represented as a
+       // CLogicalSelect with outer refs sitting above the GbAgg), the subquery
+       // must return NULL — not 0 — when the HAVING condition is false.
+       // Applying COALESCE(count,0) would incorrectly convert that NULL to 0,
+       // so we skip the special count(*) semantics in that case.
+       BOOL fCorrelatedHavingAboveEmptyGby =
+               (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() &&
+                FHasCorrelatedSelectAboveGbAgg((*pexprSubquery)[0]));
+
        if (fGeneratedByQuantified ||
-               (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size()))
+               (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() &&
+                !fCorrelatedHavingAboveEmptyGby))

Review Comment:
   The new `fCorrelatedHavingAboveEmptyGby` guard only triggers when the HAVING 
Select has outer refs. But `GROUP BY () HAVING <predicate>` can also eliminate 
the single aggregate row in uncorrelated cases (e.g., `HAVING false` or `HAVING 
count(*) > 0` when the input is empty), in which case a scalar subquery should 
yield NULL (no rows) and COALESCE(count,0) would be wrong. Consider skipping 
the special count(*) COALESCE whenever there is a Select above an 
empty-grouping GbAgg (i.e., any HAVING filter), not only when it is correlated.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to