avamingli commented on code in PR #685:
URL: https://github.com/apache/cloudberry/pull/685#discussion_r1859758193
##########
src/backend/optimizer/plan/transform.c:
##########
@@ -520,3 +521,134 @@ replace_sirvf_rte(Query *query, RangeTblEntry *rte)
return rte;
}
+
+/*
+ * Does query has SRFs, or WITH ORDINALITY?
+ */
+bool query_has_srf(Query *query)
+{
+ if (query->hasTargetSRFs)
+ {
+ return true;
+ }
+
+ /* Double check for subquery. */
+ if (expression_returns_set( (Node *) query->targetList))
+ {
+ return true;
+ }
+
+ ListCell *lcrte = NULL;
+ foreach (lcrte, query->rtable)
+ {
+ RangeTblEntry *rte = (RangeTblEntry *) lfirst(lcrte);
+
+ switch(rte->rtekind)
+ {
+ case RTE_FUNCTION:
+ {
+ ListCell *lcrtfunc;
+
+ /* WITH ORDINALITY */
+ if (rte->funcordinality)
+ return true;
+
+ foreach(lcrtfunc, rte->functions)
+ {
+ RangeTblFunction *rtfunc =
(RangeTblFunction *) lfirst(lcrtfunc);
+
+ if (!IsA(rtfunc->funcexpr, FuncExpr))
+ return true;
+
+ if (((FuncExpr *)
rtfunc->funcexpr)->funcretset)
+ {
+ /* SRF in FROM clause */
+ return true;
+ }
+ }
+ break;
+ }
+ case RTE_SUBQUERY:
+ {
+ Query *sq = (Query *) rte->subquery;
+
+ if (query_has_srf(sq))
+ {
+ return true;
+ }
+ break;
+ }
+ default:
+ {
+ break;
+ }
+ }
+ }
+
+ return false;
+}
+
+/*
+ * DISTINCT/DISTINCT ON/ORDER BY optimization.
+ * Remove DISTINCT clause if possibile, ex:
+ * select DISTINCT count(a) from t; to
+ * select count(a) from t;
+ * There is one row returned at most, DISTINCT and/or ON is pointless then.
+ * The same with ORDER BY clause;
+ */
+Query *remove_distinct_sort_clause(Query *parse)
+{
+ if (parse->hasAggs &&
+ parse->groupClause == NIL &&
+ !contain_mutable_functions((Node *) parse) &&
+ !query_has_srf(parse))
+ {
+ List *useless_tlist = NIL;
+ List *tles;
+ List *sortops;
+ List *eqops;
+ ListCell *lc;
+
+ if (parse->distinctClause != NIL)
Review Comment:
> explain (verbose) select distinct(count(t.a)) from t, (select
distinct(count(*)), generate_series(0,1))as xx;
> the distinct(count(t.a)) is not optimized.
Good catch. This was blocked by SRF checks as subquery has SRF.
`distinct(count(t.a))` seems ok to be optimized if we only care the
TargetList and ignore other parts of Query.
For`distinct(count(*))` it's not easy to remove because pg planner build
everything from end to top, however ORCA could do that using Cascade Model.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]