Copilot commented on code in PR #2359:
URL: https://github.com/apache/age/pull/2359#discussion_r2991712193


##########
src/backend/parser/cypher_clause.c:
##########
@@ -1602,6 +1611,332 @@ static Query 
*transform_cypher_list_comprehension(cypher_parsestate *cpstate,
     return query;
 }
 
+/*
+ * Helper: build a BooleanTest node (pred IS TRUE, pred IS FALSE, etc.)
+ */
+static Node *make_boolean_test(Node *arg, BoolTestType testtype)
+{
+    BooleanTest *bt = makeNode(BooleanTest);
+
+    bt->arg = (Expr *) arg;
+    bt->booltesttype = testtype;
+    bt->location = -1;
+
+    return (Node *) bt;
+}
+
+/*
+ * Helper: build a fully-transformed bool_or(expr) Aggref node.
+ *
+ * The argument must already be a transformed boolean expression.
+ * We construct the Aggref manually to avoid going through FuncCall
+ * + transformExpr, which expects raw parse tree nodes.
+ */
+static Node *make_bool_or_agg(ParseState *pstate, Node *arg)
+{
+    Aggref *agg;
+    TargetEntry *te;
+    Oid bool_or_oid;
+    Oid argtypes[1] = { BOOLOID };
+
+    /* Look up bool_or(boolean) */
+    bool_or_oid = LookupFuncName(list_make1(makeString("bool_or")),
+                                 1, argtypes, false);
+
+    /* Build the TargetEntry for the aggregate argument */
+    te = makeTargetEntry((Expr *) arg, 1, NULL, false);
+
+    /* Construct the Aggref */
+    agg = makeNode(Aggref);
+    agg->aggfnoid = bool_or_oid;
+    agg->aggtype = BOOLOID;
+    agg->aggcollid = InvalidOid;
+    agg->inputcollid = InvalidOid;
+    agg->aggtranstype = InvalidOid;  /* filled by planner */
+    agg->aggargtypes = list_make1_oid(BOOLOID);
+    agg->aggdirectargs = NIL;
+    agg->args = list_make1(te);
+    agg->aggorder = NIL;
+    agg->aggdistinct = NIL;
+    agg->aggfilter = NULL;
+    agg->aggstar = false;
+    agg->aggvariadic = false;
+    agg->aggkind = AGGKIND_NORMAL;
+    agg->aggpresorted = false;
+    agg->agglevelsup = 0;
+    agg->aggsplit = AGGSPLIT_SIMPLE;
+    agg->aggno = -1;
+    agg->aggtransno = -1;
+    agg->location = -1;
+
+    /* Register the aggregate with the parse state */
+    pstate->p_hasAggs = true;
+
+    return (Node *) agg;
+}
+
+/*
+ * Helper: build a transformed CASE expression implementing three-valued
+ * predicate logic for all(), any(), and none().
+ *
+ * any():   CASE WHEN bool_or(pred IS TRUE) THEN true
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE false END
+ *
+ * all():   CASE WHEN bool_or(pred IS FALSE) THEN false
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE true END
+ *
+ * none():  CASE WHEN bool_or(pred IS TRUE) THEN false
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE true END
+ *
+ * Empty list: both bool_or calls return NULL (no rows), so the CASE
+ * falls through to the default: false for any(), true for all()/none().
+ * This matches Cypher's vacuous truth semantics.
+ */
+static Node *make_predicate_case_expr(ParseState *pstate, Node *pred,
+                                      cypher_predicate_function_kind kind)
+{
+    CaseExpr *cexpr;
+    CaseWhen *when1, *when2;
+    Node *bool_or_first, *bool_or_null;
+    Node *true_const, *false_const, *null_const;
+
+    /* boolean constants */
+    true_const = makeBoolConst(true, false);
+    false_const = makeBoolConst(false, false);
+    null_const = makeBoolConst(false, true);  /* isnull = true */
+
+    /* Second branch is common to all: bool_or(pred IS NULL) -> NULL */
+    bool_or_null = make_bool_or_agg(pstate,
+                                    make_boolean_test(pred, IS_UNKNOWN));
+
+    when2 = makeNode(CaseWhen);
+    when2->expr = (Expr *) bool_or_null;
+    when2->result = (Expr *) null_const;
+    when2->location = -1;
+
+    if (kind == CPFK_ALL)
+    {
+        /* bool_or(pred IS FALSE) -> false */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_FALSE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) false_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) true_const;
+        cexpr->location = -1;
+    }
+    else if (kind == CPFK_ANY)
+    {
+        /* bool_or(pred IS TRUE) -> true */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_TRUE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) true_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) false_const;
+        cexpr->location = -1;
+    }
+    else /* CPFK_NONE */
+    {
+        /* bool_or(pred IS TRUE) -> false */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_TRUE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) false_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) true_const;
+        cexpr->location = -1;
+    }
+
+    return (Node *) cexpr;
+}
+
+/*
+ * Transform a cypher_predicate_function node into a query tree.
+ *
+ * Generates aggregate-based queries that preserve Cypher's three-valued
+ * NULL semantics.  The grammar layer adds a CASE WHEN list IS NULL
+ * THEN NULL ELSE (subquery) END guard for the null-list case.
+ *
+ * For all()/any()/none():
+ *   SELECT CASE WHEN bool_or(pred IS TRUE/FALSE) THEN ...
+ *               WHEN bool_or(pred IS NULL)       THEN NULL
+ *               ELSE ... END
+ *   FROM unnest(list) AS x
+ *
+ * For single():
+ *   SELECT count(*) FROM (
+ *     SELECT 1 FROM unnest(list) AS x WHERE pred IS TRUE LIMIT 2
+ *   ) sub
+ *
+ * All four use EXPR_SUBLINK so the subquery returns a scalar value.
+ * The LIMIT 2 in single() enables early termination: once two matches
+ * are found, evaluation stops and count(*) = 2 != 1.

Review Comment:
   The block comment above transform_cypher_predicate_function() says single() 
uses a nested subquery with LIMIT 2 for early termination, but the 
implementation below explicitly does not apply that optimization (it uses a 
plain count(*) and notes LIMIT 2 is deferred). Update the comment to match the 
current implementation (or implement the described LIMIT 2 approach) to avoid 
misleading future maintainers.
   ```suggestion
    *   SELECT count(*)
    *   FROM unnest(list) AS x
    *   WHERE pred IS TRUE
    *
    * All four use EXPR_SUBLINK so the subquery returns a scalar value.
   ```



##########
src/backend/parser/cypher_clause.c:
##########
@@ -25,6 +25,7 @@
 #include "postgres.h"
 
 #include "access/heapam.h"
+#include "catalog/pg_aggregate.h"

Review Comment:
   The new include "catalog/pg_aggregate.h" appears unused in this file 
(LookupFuncName comes from parse_func.h). Please remove the unused header to 
keep dependencies minimal and avoid masking missing includes in the future.
   ```suggestion
   
   ```



##########
src/backend/parser/cypher_gram.y:
##########
@@ -3317,6 +3382,79 @@ static Node *build_list_comprehension_node(Node *var, 
Node *expr,
     return (Node *) node_to_agtype((Node *)sub, "agtype[]", location);
 }
 
+/*
+ * Helper function to build a predicate function grammar node.
+ *
+ * Predicate functions follow the openCypher syntax:
+ *   all(x IN list WHERE predicate)
+ *   any(x IN list WHERE predicate)
+ *   none(x IN list WHERE predicate)
+ *   single(x IN list WHERE predicate)
+ *
+ * All four use EXPR_SUBLINK (scalar subquery).  The transform layer
+ * generates aggregate-based queries (using bool_or + CASE) for
+ * all/any/none to preserve three-valued NULL semantics, and
+ * count(*) with IS TRUE filtering for single().
+ *
+ * For single(), the subquery result is compared = 1.
+ */
+static Node *build_predicate_function_node(cypher_predicate_function_kind kind,
+                                           Node *var, Node *expr,
+                                           Node *where, int location)
+{
+    SubLink *sub;
+    cypher_predicate_function *pred_func = NULL;
+
+    /* build the predicate function node */
+    pred_func = make_ag_node(cypher_predicate_function);
+    pred_func->kind = kind;
+    pred_func->varname = extract_iter_variable_name(var);
+    pred_func->expr = expr;
+    pred_func->where = where;
+
+    /*
+     * Wrap the predicate function in a SubLink. PostgreSQL's SubLink is
+     * reused here as the carrier for our custom subquery node -- the
+     * predicate function node is stored as the subselect and will be
+     * transformed into a real Query by transform_cypher_predicate_function()
+     * in cypher_clause.c.
+     *
+     * All predicate functions now use EXPR_SUBLINK: the transform layer
+     * generates aggregate-based queries that return a scalar boolean
+     * (for all/any/none) or integer (for single).
+     */
+    sub = makeNode(SubLink);
+    sub->subLinkId = 0;
+    sub->testexpr = NULL;
+    sub->operName = NIL;
+    sub->subselect = (Node *) pred_func;
+    sub->location = location;
+    sub->subLinkType = EXPR_SUBLINK;
+
+    if (kind == CPFK_SINGLE)
+    {
+        /*
+         * single() -> (subquery) = 1
+         * The subquery returns count(*) with IS TRUE filtering.
+         */
+        Node *eq_expr;
+
+        eq_expr = (Node *) makeSimpleA_Expr(AEXPR_OP, "=",
+                                            (Node *) sub,
+                                            make_int_const(1, location),
+                                            location);
+        return (Node *) node_to_agtype(eq_expr, "boolean", location);
+    }
+    else
+    {
+        /*
+         * all()/any()/none(): the subquery returns a boolean directly
+         * from the CASE+bool_or() aggregate expression.
+         */
+        return (Node *) node_to_agtype((Node *) sub, "boolean", location);
+    }

Review Comment:
   Predicate functions need to return NULL when the input list expression is 
NULL (as documented in cypher_clause.c and expected by the regression tests for 
all/any/none). Right now build_predicate_function_node() returns the SubLink 
result directly, so NULL lists will behave like empty lists (vacuous truth) 
instead of propagating NULL. Wrap the generated expression with a NULL-list 
guard (e.g., CASE WHEN <list-expr> IS NULL THEN NULL ELSE 
<predicate-subquery-result> END), and ensure this applies consistently for 
single() too.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to