Re: [PR] Implement predicate functions: all(), any(), none(), single() [age]

via GitHub Thu, 26 Mar 2026 11:43:43 -0700


Copilot commented on code in PR #2359:
URL: https://github.com/apache/age/pull/2359#discussion_r2996975639



##########
src/backend/parser/cypher_gram.y:
##########
@@ -3268,6 +3295,44 @@ static cypher_relationship *build_VLE_relation(List 
*left_arg,
     return cr;
 }
 
+/*
+ * Extract and validate the iterator variable name from a ColumnRef node.
+ * Used by predicate functions (all/any/none/single) which share the
+ * "variable IN list" syntax with list comprehensions.
+ */
+static char *extract_iter_variable_name(Node *var)
+{
+    ColumnRef *cref;
+    String *val;
+
+    if (!IsA(var, ColumnRef))
+    {
+        ereport(ERROR,
+                (errcode(ERRCODE_SYNTAX_ERROR),
+                 errmsg("syntax error at or near IN")));
+    }
+
+    cref = (ColumnRef *)var;
+
+    /* The iterator must be a simple unqualified name (single field) */
+    if (list_length(cref->fields) != 1)
+    {
+        ereport(ERROR,
+                (errcode(ERRCODE_SYNTAX_ERROR),
+                 errmsg("qualified name not allowed as iterator variable")));
+    }
+
+    val = linitial(cref->fields);
+    if (!IsA(val, String))
+    {
+        ereport(ERROR,
+                (errcode(ERRCODE_SYNTAX_ERROR),
+                 errmsg("invalid iterator variable name")));
+    }
+
+    return val->sval;
+}
+
 /* helper function to build a list_comprehension grammar node */
 static Node *build_list_comprehension_node(Node *var, Node *expr,
                                            Node *where, Node *mapping_expr,

Review Comment:
   Now that extract_iter_variable_name() was introduced to validate iterator 
variables (including rejecting qualified ColumnRefs), 
build_list_comprehension_node() should ideally reuse it so `var IN list` 
validation is consistent between list comprehensions and all/any/none/single. 
Otherwise list comprehensions can still accept qualified iterator names and 
silently use only the first field, which can lead to confusing behavior/errors.



##########
src/backend/parser/cypher_clause.c:
##########
@@ -1602,6 +1611,331 @@ static Query 
*transform_cypher_list_comprehension(cypher_parsestate *cpstate,
     return query;
 }
 
+/*
+ * Helper: build a BooleanTest node (pred IS TRUE, pred IS FALSE, etc.)
+ */
+static Node *make_boolean_test(Node *arg, BoolTestType testtype)
+{
+    BooleanTest *bt = makeNode(BooleanTest);
+
+    bt->arg = (Expr *) arg;
+    bt->booltesttype = testtype;
+    bt->location = -1;
+
+    return (Node *) bt;
+}
+
+/*
+ * Helper: build a fully-transformed bool_or(expr) Aggref node.
+ *
+ * The argument must already be a transformed boolean expression.
+ * We construct the Aggref manually to avoid going through FuncCall
+ * + transformExpr, which expects raw parse tree nodes.
+ */
+static Node *make_bool_or_agg(ParseState *pstate, Node *arg)
+{
+    Aggref *agg;
+    TargetEntry *te;
+    Oid bool_or_oid;
+    Oid argtypes[1] = { BOOLOID };
+
+    /* Look up bool_or(boolean) */
+    bool_or_oid = LookupFuncName(list_make1(makeString("bool_or")),
+                                 1, argtypes, false);
+
+    /* Build the TargetEntry for the aggregate argument */
+    te = makeTargetEntry((Expr *) arg, 1, NULL, false);
+
+    /* Construct the Aggref */
+    agg = makeNode(Aggref);
+    agg->aggfnoid = bool_or_oid;
+    agg->aggtype = BOOLOID;
+    agg->aggcollid = InvalidOid;
+    agg->inputcollid = InvalidOid;
+    agg->aggtranstype = InvalidOid;  /* filled by planner */
+    agg->aggargtypes = list_make1_oid(BOOLOID);
+    agg->aggdirectargs = NIL;
+    agg->args = list_make1(te);
+    agg->aggorder = NIL;
+    agg->aggdistinct = NIL;
+    agg->aggfilter = NULL;
+    agg->aggstar = false;
+    agg->aggvariadic = false;
+    agg->aggkind = AGGKIND_NORMAL;
+    agg->aggpresorted = false;
+    agg->agglevelsup = 0;
+    agg->aggsplit = AGGSPLIT_SIMPLE;
+    agg->aggno = -1;
+    agg->aggtransno = -1;
+    agg->location = -1;
+
+    /* Register the aggregate with the parse state */
+    pstate->p_hasAggs = true;
+
+    return (Node *) agg;
+}
+
+/*
+ * Helper: build a transformed CASE expression implementing three-valued
+ * predicate logic for all(), any(), and none().
+ *
+ * any():   CASE WHEN bool_or(pred IS TRUE) THEN true
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE false END
+ *
+ * all():   CASE WHEN bool_or(pred IS FALSE) THEN false
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE true END
+ *
+ * none():  CASE WHEN bool_or(pred IS TRUE) THEN false
+ *               WHEN bool_or(pred IS NULL) THEN NULL
+ *               ELSE true END
+ *
+ * Empty list: both bool_or calls return NULL (no rows), so the CASE
+ * falls through to the default: false for any(), true for all()/none().
+ * This matches Cypher's vacuous truth semantics.
+ */
+static Node *make_predicate_case_expr(ParseState *pstate, Node *pred,
+                                      cypher_predicate_function_kind kind)
+{
+    CaseExpr *cexpr;
+    CaseWhen *when1, *when2;
+    Node *bool_or_first, *bool_or_null;
+    Node *true_const, *false_const, *null_const;
+
+    /* boolean constants */
+    true_const = makeBoolConst(true, false);
+    false_const = makeBoolConst(false, false);
+    null_const = makeBoolConst(false, true);  /* isnull = true */
+
+    /* Second branch is common to all: bool_or(pred IS NULL) -> NULL */
+    bool_or_null = make_bool_or_agg(pstate,
+                                    make_boolean_test(pred, IS_UNKNOWN));
+
+    when2 = makeNode(CaseWhen);
+    when2->expr = (Expr *) bool_or_null;
+    when2->result = (Expr *) null_const;
+    when2->location = -1;
+
+    if (kind == CPFK_ALL)
+    {
+        /* bool_or(pred IS FALSE) -> false */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_FALSE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) false_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) true_const;
+        cexpr->location = -1;
+    }
+    else if (kind == CPFK_ANY)
+    {
+        /* bool_or(pred IS TRUE) -> true */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_TRUE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) true_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) false_const;
+        cexpr->location = -1;
+    }
+    else /* CPFK_NONE */
+    {
+        /* bool_or(pred IS TRUE) -> false */
+        bool_or_first = make_bool_or_agg(pstate,
+                                         make_boolean_test(pred, IS_TRUE));
+        when1 = makeNode(CaseWhen);
+        when1->expr = (Expr *) bool_or_first;
+        when1->result = (Expr *) false_const;
+        when1->location = -1;
+
+        cexpr = makeNode(CaseExpr);
+        cexpr->casetype = BOOLOID;
+        cexpr->arg = NULL;
+        cexpr->args = list_make2(when1, when2);
+        cexpr->defresult = (Expr *) true_const;
+        cexpr->location = -1;
+    }
+
+    return (Node *) cexpr;
+}
+
+/*
+ * Transform a cypher_predicate_function node into a query tree.
+ *
+ * Generates aggregate-based queries that preserve Cypher's three-valued
+ * NULL semantics.  The grammar layer wraps the SubLink with a
+ * CASE WHEN list IS NULL THEN NULL ELSE (subquery) END guard so all
+ * four functions return NULL when the input list is NULL.
+ *
+ * For all()/any()/none():
+ *   SELECT CASE WHEN bool_or(pred IS TRUE/FALSE) THEN ...
+ *               WHEN bool_or(pred IS NULL)       THEN NULL
+ *               ELSE ... END
+ *   FROM unnest(list) AS x
+ *

Review Comment:
   The PR description says all/any/none are transformed to EXISTS/NOT EXISTS 
and that these forms short-circuit, but the implemented transform uses 
bool_or() aggregates + CASE (which will evaluate all rows). Please update the 
PR description to match the actual aggregate-based approach (or implement the 
EXISTS/NOT EXISTS strategy if short-circuiting is a requirement).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Implement predicate functions: all(), any(), none(), single() [age]

Reply via email to