This is an automated email from the ASF dual-hosted git repository.

jgemignani pushed a commit to branch Dev_Multiple_Labels
in repository https://gitbox.apache.org/repos/asf/age.git


The following commit(s) were added to refs/heads/Dev_Multiple_Labels by this 
push:
     new 22ebcdab Optimize join conditions to use direct column references 
(#2285)
22ebcdab is described below

commit 22ebcdabb2f24dbbd03b5d61972d65959589eb56
Author: John Gemignani <[email protected]>
AuthorDate: Mon Dec 22 19:34:07 2025 -0800

    Optimize join conditions to use direct column references (#2285)
    
    NOTE: This PR was created with AI tools and a human.
    
    When matching patterns like (u)-[e]->(v), join conditions previously
    rebuilt entire vertex/edge agtype values just to extract IDs:
    
    age_id(_agtype_build_vertex(r.id, ...))::graphid
    
    Added optimize_qual_expr_mutator() to replace these patterns with
    direct column access:
    
    age_id(_agtype_build_vertex(id, ...)) -> graphid_to_agtype(id)
    age_start_id(_agtype_build_edge(...)) -> graphid_to_agtype(start)
    age_end_id(_agtype_build_edge(...)) -> graphid_to_agtype(end)
    age_properties(...) -> direct properties column
    
    Join conditions now use efficient comparisons like (e.start_id = u.id)
    enabling PostgreSQL to leverage index scans on edge tables.
    
    Added regression tests.
    All regression tests passed.
    
    modified:   regress/expected/unified_vertex_table.out
    modified:   regress/sql/unified_vertex_table.sql
    modified:   src/backend/parser/cypher_clause.c
---
 regress/expected/unified_vertex_table.out | 117 ++++++++++++++++++++-
 regress/sql/unified_vertex_table.sql      |  58 +++++++++++
 src/backend/parser/cypher_clause.c        | 164 ++++++++++++++++++++++++++++++
 3 files changed, 338 insertions(+), 1 deletion(-)

diff --git a/regress/expected/unified_vertex_table.out 
b/regress/expected/unified_vertex_table.out
index 118f4a83..81655bca 100644
--- a/regress/expected/unified_vertex_table.out
+++ b/regress/expected/unified_vertex_table.out
@@ -1309,11 +1309,124 @@ $$) AS (eid agtype, props agtype, sid agtype, eid2 
agtype);
  11540474045136897 | {"weight": 10} | 11258999068426241 | 11821949021847553
 (1 row)
 
+--
+-- Test 29: Verify join condition optimization with EXPLAIN
+--
+-- When vertices/edges from previous clauses are joined, the optimization
+-- should replace patterns like:
+--   age_id(_agtype_build_vertex(r.id, ...))::graphid
+-- with direct column access:
+--   r.id
+--
+-- This avoids expensive vertex reconstruction in join conditions.
+--
+-- Create test data: Users following each other
+SELECT * FROM cypher('unified_test', $$
+    CREATE (:JoinOptUser {name: 'Alice'}),
+           (:JoinOptUser {name: 'Bob'}),
+           (:JoinOptUser {name: 'Carol'})
+$$) AS (v agtype);
+ v 
+---
+(0 rows)
+
+SELECT * FROM cypher('unified_test', $$
+    MATCH (a:JoinOptUser {name: 'Alice'}), (b:JoinOptUser {name: 'Bob'})
+    CREATE (a)-[:JOPT_FOLLOWS]->(b)
+$$) AS (e agtype);
+ e 
+---
+(0 rows)
+
+SELECT * FROM cypher('unified_test', $$
+    MATCH (b:JoinOptUser {name: 'Bob'}), (c:JoinOptUser {name: 'Carol'})
+    CREATE (b)-[:JOPT_FOLLOWS]->(c)
+$$) AS (e agtype);
+ e 
+---
+(0 rows)
+
+-- EXPLAIN showing join conditions use direct column access
+-- Look for: graphid_to_agtype(id) instead of age_id(_agtype_build_vertex(...))
+-- And: direct id comparisons instead of age_id(...)::graphid
+EXPLAIN (COSTS OFF)
+SELECT * FROM cypher('unified_test', $$
+    MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
+    RETURN u.name, v.name
+$$) AS (u_name agtype, v_name agtype);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Nested Loop
+   Join Filter: (e.start_id = u.id)
+   ->  Nested Loop
+         ->  Seq Scan on _ag_label_vertex u
+               Filter: (labels = '23814'::oid)
+         ->  Seq Scan on _ag_label_vertex v
+               Filter: (labels = '23814'::oid)
+   ->  Bitmap Heap Scan on "JOPT_FOLLOWS" e
+         Recheck Cond: (end_id = v.id)
+         ->  Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
+               Index Cond: (end_id = v.id)
+(11 rows)
+
+-- Verify the query still returns correct results
+SELECT * FROM cypher('unified_test', $$
+    MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
+    RETURN u.name, v.name
+    ORDER BY u.name
+$$) AS (u_name agtype, v_name agtype);
+ u_name  | v_name  
+---------+---------
+ "Alice" | "Bob"
+ "Bob"   | "Carol"
+(2 rows)
+
+-- Multi-hop pattern showing optimization across multiple joins
+EXPLAIN (COSTS OFF)
+SELECT * FROM cypher('unified_test', $$
+    MATCH 
(a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
+    RETURN a.name, b.name, c.name
+$$) AS (a_name agtype, b_name agtype, c_name agtype);
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Nested Loop
+   Join Filter: (e1.start_id = a.id)
+   ->  Nested Loop
+         Join Filter: _ag_enforce_edge_uniqueness2(e1.id, e2.id)
+         ->  Nested Loop
+               Join Filter: (e2.start_id = b.id)
+               ->  Nested Loop
+                     ->  Seq Scan on _ag_label_vertex b
+                           Filter: (labels = '23814'::oid)
+                     ->  Seq Scan on _ag_label_vertex c
+                           Filter: (labels = '23814'::oid)
+               ->  Bitmap Heap Scan on "JOPT_FOLLOWS" e2
+                     Recheck Cond: (end_id = c.id)
+                     ->  Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
+                           Index Cond: (end_id = c.id)
+         ->  Bitmap Heap Scan on "JOPT_FOLLOWS" e1
+               Recheck Cond: (end_id = b.id)
+               ->  Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
+                     Index Cond: (end_id = b.id)
+   ->  Seq Scan on _ag_label_vertex a
+         Filter: (labels = '23814'::oid)
+(21 rows)
+
+-- Verify multi-hop query results
+SELECT * FROM cypher('unified_test', $$
+    MATCH 
(a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
+    RETURN a.name, b.name, c.name
+$$) AS (a_name agtype, b_name agtype, c_name agtype);
+ a_name  | b_name | c_name  
+---------+--------+---------
+ "Alice" | "Bob"  | "Carol"
+(1 row)
+
 --
 -- Cleanup
 --
 SELECT drop_graph('unified_test', true);
-NOTICE:  drop cascades to 42 other objects
+NOTICE:  drop cascades to 44 other objects
 DETAIL:  drop cascades to table unified_test._ag_label_vertex
 drop cascades to table unified_test._ag_label_edge
 drop cascades to table unified_test."Person"
@@ -1356,6 +1469,8 @@ drop cascades to table unified_test."OptimizeTest"
 drop cascades to table unified_test."OptStart"
 drop cascades to table unified_test."OPT_EDGE"
 drop cascades to table unified_test."OptEnd"
+drop cascades to table unified_test."JoinOptUser"
+drop cascades to table unified_test."JOPT_FOLLOWS"
 NOTICE:  graph "unified_test" has been dropped
  drop_graph 
 ------------
diff --git a/regress/sql/unified_vertex_table.sql 
b/regress/sql/unified_vertex_table.sql
index aebf6cd5..605fd176 100644
--- a/regress/sql/unified_vertex_table.sql
+++ b/regress/sql/unified_vertex_table.sql
@@ -809,6 +809,64 @@ SELECT * FROM cypher('unified_test', $$
     RETURN id(e), properties(e), start_id(e), end_id(e)
 $$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
 
+--
+-- Test 29: Verify join condition optimization with EXPLAIN
+--
+-- When vertices/edges from previous clauses are joined, the optimization
+-- should replace patterns like:
+--   age_id(_agtype_build_vertex(r.id, ...))::graphid
+-- with direct column access:
+--   r.id
+--
+-- This avoids expensive vertex reconstruction in join conditions.
+--
+
+-- Create test data: Users following each other
+SELECT * FROM cypher('unified_test', $$
+    CREATE (:JoinOptUser {name: 'Alice'}),
+           (:JoinOptUser {name: 'Bob'}),
+           (:JoinOptUser {name: 'Carol'})
+$$) AS (v agtype);
+
+SELECT * FROM cypher('unified_test', $$
+    MATCH (a:JoinOptUser {name: 'Alice'}), (b:JoinOptUser {name: 'Bob'})
+    CREATE (a)-[:JOPT_FOLLOWS]->(b)
+$$) AS (e agtype);
+
+SELECT * FROM cypher('unified_test', $$
+    MATCH (b:JoinOptUser {name: 'Bob'}), (c:JoinOptUser {name: 'Carol'})
+    CREATE (b)-[:JOPT_FOLLOWS]->(c)
+$$) AS (e agtype);
+
+-- EXPLAIN showing join conditions use direct column access
+-- Look for: graphid_to_agtype(id) instead of age_id(_agtype_build_vertex(...))
+-- And: direct id comparisons instead of age_id(...)::graphid
+EXPLAIN (COSTS OFF)
+SELECT * FROM cypher('unified_test', $$
+    MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
+    RETURN u.name, v.name
+$$) AS (u_name agtype, v_name agtype);
+
+-- Verify the query still returns correct results
+SELECT * FROM cypher('unified_test', $$
+    MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
+    RETURN u.name, v.name
+    ORDER BY u.name
+$$) AS (u_name agtype, v_name agtype);
+
+-- Multi-hop pattern showing optimization across multiple joins
+EXPLAIN (COSTS OFF)
+SELECT * FROM cypher('unified_test', $$
+    MATCH 
(a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
+    RETURN a.name, b.name, c.name
+$$) AS (a_name agtype, b_name agtype, c_name agtype);
+
+-- Verify multi-hop query results
+SELECT * FROM cypher('unified_test', $$
+    MATCH 
(a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
+    RETURN a.name, b.name, c.name
+$$) AS (a_name agtype, b_name agtype, c_name agtype);
+
 --
 -- Cleanup
 --
diff --git a/src/backend/parser/cypher_clause.c 
b/src/backend/parser/cypher_clause.c
index eb7dccf0..e56ac8ac 100644
--- a/src/backend/parser/cypher_clause.c
+++ b/src/backend/parser/cypher_clause.c
@@ -38,6 +38,7 @@
 #include "parser/parsetree.h"
 #include "parser/parse_relation.h"
 #include "rewrite/rewriteHandler.h"
+#include "utils/lsyscache.h"
 
 #include "catalog/ag_graph.h"
 #include "catalog/ag_label.h"
@@ -136,6 +137,7 @@ static Node *make_edge_expr(cypher_parsestate *cpstate,
                             ParseNamespaceItem *pnsi);
 static Node *make_qual(cypher_parsestate *cpstate,
                            transform_entity *entity, char *name);
+static Node *optimize_qual_expr_mutator(Node *node, void *context);
 static TargetEntry *
 transform_match_create_path_variable(cypher_parsestate *cpstate,
                                      cypher_path *path, List *entities);
@@ -3375,11 +3377,173 @@ static void transform_match_pattern(cypher_parsestate 
*cpstate, Query *query,
         expr = (Expr *)coerce_to_boolean(pstate, (Node *)expr, "WHERE");
     }
 
+    /*
+     * Apply optimization to the transformed expression tree. This looks for
+     * patterns like age_id(_agtype_build_vertex(...)) and replaces them with
+     * direct column references.
+     */
+    if (expr != NULL)
+    {
+        expr = (Expr *)optimize_qual_expr_mutator((Node *)expr, NULL);
+    }
+
     query->rtable = cpstate->pstate.p_rtable;
     query->rteperminfos = cpstate->pstate.p_rteperminfos;
     query->jointree = makeFromExpr(cpstate->pstate.p_joinlist, (Node *)expr);
 }
 
+/*
+ * optimize_qual_expr_mutator - Walk expression tree and optimize vertex/edge
+ * accessor patterns.
+ *
+ * This mutator looks for patterns like:
+ *   age_id(_agtype_build_vertex(id, label, props))
+ * and transforms them to:
+ *   graphid_to_agtype(id)
+ *
+ * This avoids the expensive reconstruction of vertex/edge agtype values
+ * just to immediately extract a single field from them. This is particularly
+ * important for join conditions where the vertex/edge comes from a previous
+ * clause.
+ */
+static Node *optimize_qual_expr_mutator(Node *node, void *context)
+{
+    if (node == NULL)
+    {
+        return NULL;
+    }
+
+    /*
+     * Look for FuncExpr nodes that wrap accessor functions around
+     * _agtype_build_vertex or _agtype_build_edge calls.
+     */
+    if (IsA(node, FuncExpr))
+    {
+        FuncExpr *outer_func = (FuncExpr *)node;
+        char *outer_func_name;
+        Node *arg;
+        FuncExpr *inner_func;
+        char *inner_func_name;
+        List *inner_args;
+        int arg_index = -1;
+
+        /* Must have exactly one argument */
+        if (list_length(outer_func->args) != 1)
+        {
+            goto recurse;
+        }
+
+        outer_func_name = get_func_name(outer_func->funcid);
+        if (outer_func_name == NULL)
+        {
+            goto recurse;
+        }
+
+        /* Check if this is an accessor function we can optimize */
+        if (strcmp(outer_func_name, "age_id") != 0 &&
+            strcmp(outer_func_name, "age_start_id") != 0 &&
+            strcmp(outer_func_name, "age_end_id") != 0 &&
+            strcmp(outer_func_name, "age_properties") != 0)
+        {
+            goto recurse;
+        }
+
+        arg = (Node *)linitial(outer_func->args);
+
+        /* The argument must be a FuncExpr (the build function) */
+        if (!IsA(arg, FuncExpr))
+        {
+            goto recurse;
+        }
+
+        inner_func = (FuncExpr *)arg;
+        inner_func_name = get_func_name(inner_func->funcid);
+        if (inner_func_name == NULL)
+        {
+            goto recurse;
+        }
+
+        inner_args = inner_func->args;
+
+        /*
+         * Check for _agtype_build_vertex(id, label_name, properties)
+         * Arguments: 0=id (graphid), 1=label_name (cstring), 2=properties 
(agtype)
+         */
+        if (strcmp(inner_func_name, "_agtype_build_vertex") == 0 &&
+            list_length(inner_args) == 3)
+        {
+            if (strcmp(outer_func_name, "age_id") == 0)
+            {
+                arg_index = 0;  /* id */
+            }
+            else if (strcmp(outer_func_name, "age_properties") == 0)
+            {
+                arg_index = 2;  /* properties */
+            }
+        }
+        /*
+         * Check for _agtype_build_edge(id, startid, endid, label_name, 
properties)
+         * Arguments: 0=id (graphid), 1=start_id (graphid), 2=end_id (graphid),
+         *            3=label_name (cstring), 4=properties (agtype)
+         */
+        else if (strcmp(inner_func_name, "_agtype_build_edge") == 0 &&
+                 list_length(inner_args) == 5)
+        {
+            if (strcmp(outer_func_name, "age_id") == 0)
+            {
+                arg_index = 0;  /* id */
+            }
+            else if (strcmp(outer_func_name, "age_start_id") == 0)
+            {
+                arg_index = 1;  /* start_id */
+            }
+            else if (strcmp(outer_func_name, "age_end_id") == 0)
+            {
+                arg_index = 2;  /* end_id */
+            }
+            else if (strcmp(outer_func_name, "age_properties") == 0)
+            {
+                arg_index = 4;  /* properties */
+            }
+        }
+
+        /* If we found a pattern to optimize */
+        if (arg_index >= 0)
+        {
+            Node *extracted_arg = (Node *)list_nth(inner_args, arg_index);
+
+            /* For properties, return directly (already agtype) */
+            if (strcmp(outer_func_name, "age_properties") == 0)
+            {
+                return extracted_arg;
+            }
+            else
+            {
+                /*
+                 * For graphid fields (id, start_id, end_id), we need to wrap
+                 * in graphid_to_agtype to match the original return type.
+                 */
+                Oid cast_func_oid;
+                FuncExpr *cast_expr;
+
+                cast_func_oid = get_ag_func_oid("graphid_to_agtype", 1,
+                                                GRAPHIDOID);
+
+                cast_expr = makeFuncExpr(cast_func_oid, AGTYPEOID,
+                                         list_make1(extracted_arg),
+                                         InvalidOid, InvalidOid,
+                                         COERCE_EXPLICIT_CALL);
+                cast_expr->location = outer_func->location;
+
+                return (Node *)cast_expr;
+            }
+        }
+    }
+
+recurse:
+    return expression_tree_mutator(node, optimize_qual_expr_mutator, context);
+}
+
 /*
  * Creates a FuncCall node that will prevent an edge from being joined
  * to twice.

Reply via email to