gregfelice commented on code in PR #2359:
URL: https://github.com/apache/age/pull/2359#discussion_r2996801820


##########
src/backend/parser/cypher_gram.y:
##########
@@ -3317,6 +3382,79 @@ static Node *build_list_comprehension_node(Node *var, 
Node *expr,
     return (Node *) node_to_agtype((Node *)sub, "agtype[]", location);
 }
 
+/*
+ * Helper function to build a predicate function grammar node.
+ *
+ * Predicate functions follow the openCypher syntax:
+ *   all(x IN list WHERE predicate)
+ *   any(x IN list WHERE predicate)
+ *   none(x IN list WHERE predicate)
+ *   single(x IN list WHERE predicate)
+ *
+ * All four use EXPR_SUBLINK (scalar subquery).  The transform layer
+ * generates aggregate-based queries (using bool_or + CASE) for
+ * all/any/none to preserve three-valued NULL semantics, and
+ * count(*) with IS TRUE filtering for single().
+ *
+ * For single(), the subquery result is compared = 1.
+ */
+static Node *build_predicate_function_node(cypher_predicate_function_kind kind,
+                                           Node *var, Node *expr,
+                                           Node *where, int location)
+{
+    SubLink *sub;
+    cypher_predicate_function *pred_func = NULL;
+
+    /* build the predicate function node */
+    pred_func = make_ag_node(cypher_predicate_function);
+    pred_func->kind = kind;
+    pred_func->varname = extract_iter_variable_name(var);
+    pred_func->expr = expr;
+    pred_func->where = where;
+
+    /*
+     * Wrap the predicate function in a SubLink. PostgreSQL's SubLink is
+     * reused here as the carrier for our custom subquery node -- the
+     * predicate function node is stored as the subselect and will be
+     * transformed into a real Query by transform_cypher_predicate_function()
+     * in cypher_clause.c.
+     *
+     * All predicate functions now use EXPR_SUBLINK: the transform layer
+     * generates aggregate-based queries that return a scalar boolean
+     * (for all/any/none) or integer (for single).
+     */
+    sub = makeNode(SubLink);
+    sub->subLinkId = 0;
+    sub->testexpr = NULL;
+    sub->operName = NIL;
+    sub->subselect = (Node *) pred_func;
+    sub->location = location;
+    sub->subLinkType = EXPR_SUBLINK;
+
+    if (kind == CPFK_SINGLE)
+    {
+        /*
+         * single() -> (subquery) = 1
+         * The subquery returns count(*) with IS TRUE filtering.
+         */
+        Node *eq_expr;
+
+        eq_expr = (Node *) makeSimpleA_Expr(AEXPR_OP, "=",
+                                            (Node *) sub,
+                                            make_int_const(1, location),
+                                            location);
+        return (Node *) node_to_agtype(eq_expr, "boolean", location);
+    }
+    else
+    {
+        /*
+         * all()/any()/none(): the subquery returns a boolean directly
+         * from the CASE+bool_or() aggregate expression.
+         */
+        return (Node *) node_to_agtype((Node *) sub, "boolean", location);
+    }

Review Comment:
   Addressed in cc19476b. Added a NULL-list guard in the grammar layer:
   
   ```c
   CASE WHEN expr IS NULL THEN NULL ELSE <predicate-result> END
   ```
   
   The guard wraps the entire predicate function result using a shared pointer 
to the list expression (safe because AGE's `transform_cypher_expr_recurse` 
creates new nodes without modifying the parse tree in-place). This fixes 
`single(x IN null WHERE ...)` returning false instead of NULL, and makes the 
NULL behavior explicit rather than relying on aggregate-over-empty-set 
semantics.
   
   Updated regression tests to expect NULL for `single(x IN null WHERE ...)`.



##########
src/backend/parser/cypher_clause.c:
##########
@@ -1602,6 +1611,332 @@ static Query 
*transform_cypher_list_comprehension(cypher_parsestate *cpstate,
     return query;
 }
 
+/*
+ * Helper: build a BooleanTest node (pred IS TRUE, pred IS FALSE, etc.)
+ */
+static Node *make_boolean_test(Node *arg, BoolTestType testtype)
+{
+    BooleanTest *bt = makeNode(BooleanTest);
+
+    bt->arg = (Expr *) arg;
+    bt->booltesttype = testtype;
+    bt->location = -1;
+
+    return (Node *) bt;
+}
+
+/*
+ * Helper: build a fully-transformed bool_or(expr) Aggref node.
+ *
+ * The argument must already be a transformed boolean expression.
+ * We construct the Aggref manually to avoid going through FuncCall
+ * + transformExpr, which expects raw parse tree nodes.
+ */
+static Node *make_bool_or_agg(ParseState *pstate, Node *arg)
+{
+    Aggref *agg;
+    TargetEntry *te;
+    Oid bool_or_oid;
+    Oid argtypes[1] = { BOOLOID };
+
+    /* Look up bool_or(boolean) */
+    bool_or_oid = LookupFuncName(list_make1(makeString("bool_or")),
+                                 1, argtypes, false);
+
+    /* Build the TargetEntry for the aggregate argument */
+    te = makeTargetEntry((Expr *) arg, 1, NULL, false);
+
+    /* Construct the Aggref */
+    agg = makeNode(Aggref);
+    agg->aggfnoid = bool_or_oid;
+    agg->aggtype = BOOLOID;
+    agg->aggcollid = InvalidOid;
+    agg->inputcollid = InvalidOid;
+    agg->aggtranstype = InvalidOid;  /* filled by planner */
+    agg->aggargtypes = list_make1_oid(BOOLOID);
+    agg->aggdirectargs = NIL;
+    agg->args = list_make1(te);
+    agg->aggorder = NIL;
+    agg->aggdistinct = NIL;
+    agg->aggfilter = NULL;
+    agg->aggstar = false;
+    agg->aggvariadic = false;
+    agg->aggkind = AGGKIND_NORMAL;
+    agg->aggpresorted = false;
+    agg->agglevelsup = 0;
+    agg->aggsplit = AGGSPLIT_SIMPLE;
+    agg->aggno = -1;
+    agg->aggtransno = -1;
+    agg->location = -1;
+
+    /* Register the aggregate with the parse state */
+    pstate->p_hasAggs = true;
+
+    return (Node *) agg;
+}
+
+/*
+ * Helper: build a transformed CASE expression implementing three-valued
+ * predicate logic for all(), any(), and none().
+ *
+ * any():   CASE WHEN bool_or(pred IS TRUE) THEN true
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE false END
+ *
+ * all():   CASE WHEN bool_or(pred IS FALSE) THEN false
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE true END
+ *
+ * none():  CASE WHEN bool_or(pred IS TRUE) THEN false
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE true END
+ *
+ * Empty list: both bool_or calls return NULL (no rows), so the CASE
+ * falls through to the default: false for any(), true for all()/none().
+ * This matches Cypher's vacuous truth semantics.
+ */
+static Node *make_predicate_case_expr(ParseState *pstate, Node *pred,
+                                      cypher_predicate_function_kind kind)
+{
+    CaseExpr *cexpr;
+    CaseWhen *when1, *when2;
+    Node *bool_or_first, *bool_or_null;
+    Node *true_const, *false_const, *null_const;
+
+    /* boolean constants */
+    true_const = makeBoolConst(true, false);
+    false_const = makeBoolConst(false, false);
+    null_const = makeBoolConst(false, true);  /* isnull = true */
+
+    /* Second branch is common to all: bool_or(pred IS NULL) -> NULL */
+    bool_or_null = make_bool_or_agg(pstate,
+                                    make_boolean_test(pred, IS_UNKNOWN));
+
+    when2 = makeNode(CaseWhen);
+    when2->expr = (Expr *) bool_or_null;
+    when2->result = (Expr *) null_const;
+    when2->location = -1;
+
+    if (kind == CPFK_ALL)
+    {
+        /* bool_or(pred IS FALSE) -> false */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_FALSE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) false_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) true_const;
+        cexpr->location = -1;
+    }
+    else if (kind == CPFK_ANY)
+    {
+        /* bool_or(pred IS TRUE) -> true */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_TRUE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) true_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) false_const;
+        cexpr->location = -1;
+    }
+    else /* CPFK_NONE */
+    {
+        /* bool_or(pred IS TRUE) -> false */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_TRUE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) false_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) true_const;
+        cexpr->location = -1;
+    }
+
+    return (Node *) cexpr;
+}
+
+/*
+ * Transform a cypher_predicate_function node into a query tree.
+ *
+ * Generates aggregate-based queries that preserve Cypher's three-valued
+ * NULL semantics.  The grammar layer adds a CASE WHEN list IS NULL
+ * THEN NULL ELSE (subquery) END guard for the null-list case.
+ *
+ * For all()/any()/none():
+ *   SELECT CASE WHEN bool_or(pred IS TRUE/FALSE) THEN ...
+ *               WHEN bool_or(pred IS NULL)       THEN NULL
+ *               ELSE ... END
+ *   FROM unnest(list) AS x
+ *
+ * For single():
+ *   SELECT count(*) FROM (
+ *     SELECT 1 FROM unnest(list) AS x WHERE pred IS TRUE LIMIT 2
+ *   ) sub
+ *
+ * All four use EXPR_SUBLINK so the subquery returns a scalar value.
+ * The LIMIT 2 in single() enables early termination: once two matches
+ * are found, evaluation stops and count(*) = 2 != 1.

Review Comment:
   Addressed in cc19476b. Updated the block comment to describe the actual 
implementation (plain count(*) without LIMIT 2). The LIMIT 2 optimization is 
noted as deferred in the single() branch comment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to