This is an automated email from the ASF dual-hosted git repository.
jgemignani pushed a commit to branch Dev_Multiple_Labels
in repository https://gitbox.apache.org/repos/asf/age.git
The following commit(s) were added to refs/heads/Dev_Multiple_Labels by this
push:
new 22ebcdab Optimize join conditions to use direct column references
(#2285)
22ebcdab is described below
commit 22ebcdabb2f24dbbd03b5d61972d65959589eb56
Author: John Gemignani <[email protected]>
AuthorDate: Mon Dec 22 19:34:07 2025 -0800
Optimize join conditions to use direct column references (#2285)
NOTE: This PR was created with AI tools and a human.
When matching patterns like (u)-[e]->(v), join conditions previously
rebuilt entire vertex/edge agtype values just to extract IDs:
age_id(_agtype_build_vertex(r.id, ...))::graphid
Added optimize_qual_expr_mutator() to replace these patterns with
direct column access:
age_id(_agtype_build_vertex(id, ...)) -> graphid_to_agtype(id)
age_start_id(_agtype_build_edge(...)) -> graphid_to_agtype(start)
age_end_id(_agtype_build_edge(...)) -> graphid_to_agtype(end)
age_properties(...) -> direct properties column
Join conditions now use efficient comparisons like (e.start_id = u.id)
enabling PostgreSQL to leverage index scans on edge tables.
Added regression tests.
All regression tests passed.
modified: regress/expected/unified_vertex_table.out
modified: regress/sql/unified_vertex_table.sql
modified: src/backend/parser/cypher_clause.c
---
regress/expected/unified_vertex_table.out | 117 ++++++++++++++++++++-
regress/sql/unified_vertex_table.sql | 58 +++++++++++
src/backend/parser/cypher_clause.c | 164 ++++++++++++++++++++++++++++++
3 files changed, 338 insertions(+), 1 deletion(-)
diff --git a/regress/expected/unified_vertex_table.out
b/regress/expected/unified_vertex_table.out
index 118f4a83..81655bca 100644
--- a/regress/expected/unified_vertex_table.out
+++ b/regress/expected/unified_vertex_table.out
@@ -1309,11 +1309,124 @@ $$) AS (eid agtype, props agtype, sid agtype, eid2
agtype);
11540474045136897 | {"weight": 10} | 11258999068426241 | 11821949021847553
(1 row)
+--
+-- Test 29: Verify join condition optimization with EXPLAIN
+--
+-- When vertices/edges from previous clauses are joined, the optimization
+-- should replace patterns like:
+-- age_id(_agtype_build_vertex(r.id, ...))::graphid
+-- with direct column access:
+-- r.id
+--
+-- This avoids expensive vertex reconstruction in join conditions.
+--
+-- Create test data: Users following each other
+SELECT * FROM cypher('unified_test', $$
+ CREATE (:JoinOptUser {name: 'Alice'}),
+ (:JoinOptUser {name: 'Bob'}),
+ (:JoinOptUser {name: 'Carol'})
+$$) AS (v agtype);
+ v
+---
+(0 rows)
+
+SELECT * FROM cypher('unified_test', $$
+ MATCH (a:JoinOptUser {name: 'Alice'}), (b:JoinOptUser {name: 'Bob'})
+ CREATE (a)-[:JOPT_FOLLOWS]->(b)
+$$) AS (e agtype);
+ e
+---
+(0 rows)
+
+SELECT * FROM cypher('unified_test', $$
+ MATCH (b:JoinOptUser {name: 'Bob'}), (c:JoinOptUser {name: 'Carol'})
+ CREATE (b)-[:JOPT_FOLLOWS]->(c)
+$$) AS (e agtype);
+ e
+---
+(0 rows)
+
+-- EXPLAIN showing join conditions use direct column access
+-- Look for: graphid_to_agtype(id) instead of age_id(_agtype_build_vertex(...))
+-- And: direct id comparisons instead of age_id(...)::graphid
+EXPLAIN (COSTS OFF)
+SELECT * FROM cypher('unified_test', $$
+ MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
+ RETURN u.name, v.name
+$$) AS (u_name agtype, v_name agtype);
+ QUERY PLAN
+------------------------------------------------------------
+ Nested Loop
+ Join Filter: (e.start_id = u.id)
+ -> Nested Loop
+ -> Seq Scan on _ag_label_vertex u
+ Filter: (labels = '23814'::oid)
+ -> Seq Scan on _ag_label_vertex v
+ Filter: (labels = '23814'::oid)
+ -> Bitmap Heap Scan on "JOPT_FOLLOWS" e
+ Recheck Cond: (end_id = v.id)
+ -> Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
+ Index Cond: (end_id = v.id)
+(11 rows)
+
+-- Verify the query still returns correct results
+SELECT * FROM cypher('unified_test', $$
+ MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
+ RETURN u.name, v.name
+ ORDER BY u.name
+$$) AS (u_name agtype, v_name agtype);
+ u_name | v_name
+---------+---------
+ "Alice" | "Bob"
+ "Bob" | "Carol"
+(2 rows)
+
+-- Multi-hop pattern showing optimization across multiple joins
+EXPLAIN (COSTS OFF)
+SELECT * FROM cypher('unified_test', $$
+ MATCH
(a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
+ RETURN a.name, b.name, c.name
+$$) AS (a_name agtype, b_name agtype, c_name agtype);
+ QUERY PLAN
+------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: (e1.start_id = a.id)
+ -> Nested Loop
+ Join Filter: _ag_enforce_edge_uniqueness2(e1.id, e2.id)
+ -> Nested Loop
+ Join Filter: (e2.start_id = b.id)
+ -> Nested Loop
+ -> Seq Scan on _ag_label_vertex b
+ Filter: (labels = '23814'::oid)
+ -> Seq Scan on _ag_label_vertex c
+ Filter: (labels = '23814'::oid)
+ -> Bitmap Heap Scan on "JOPT_FOLLOWS" e2
+ Recheck Cond: (end_id = c.id)
+ -> Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
+ Index Cond: (end_id = c.id)
+ -> Bitmap Heap Scan on "JOPT_FOLLOWS" e1
+ Recheck Cond: (end_id = b.id)
+ -> Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
+ Index Cond: (end_id = b.id)
+ -> Seq Scan on _ag_label_vertex a
+ Filter: (labels = '23814'::oid)
+(21 rows)
+
+-- Verify multi-hop query results
+SELECT * FROM cypher('unified_test', $$
+ MATCH
(a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
+ RETURN a.name, b.name, c.name
+$$) AS (a_name agtype, b_name agtype, c_name agtype);
+ a_name | b_name | c_name
+---------+--------+---------
+ "Alice" | "Bob" | "Carol"
+(1 row)
+
--
-- Cleanup
--
SELECT drop_graph('unified_test', true);
-NOTICE: drop cascades to 42 other objects
+NOTICE: drop cascades to 44 other objects
DETAIL: drop cascades to table unified_test._ag_label_vertex
drop cascades to table unified_test._ag_label_edge
drop cascades to table unified_test."Person"
@@ -1356,6 +1469,8 @@ drop cascades to table unified_test."OptimizeTest"
drop cascades to table unified_test."OptStart"
drop cascades to table unified_test."OPT_EDGE"
drop cascades to table unified_test."OptEnd"
+drop cascades to table unified_test."JoinOptUser"
+drop cascades to table unified_test."JOPT_FOLLOWS"
NOTICE: graph "unified_test" has been dropped
drop_graph
------------
diff --git a/regress/sql/unified_vertex_table.sql
b/regress/sql/unified_vertex_table.sql
index aebf6cd5..605fd176 100644
--- a/regress/sql/unified_vertex_table.sql
+++ b/regress/sql/unified_vertex_table.sql
@@ -809,6 +809,64 @@ SELECT * FROM cypher('unified_test', $$
RETURN id(e), properties(e), start_id(e), end_id(e)
$$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
+--
+-- Test 29: Verify join condition optimization with EXPLAIN
+--
+-- When vertices/edges from previous clauses are joined, the optimization
+-- should replace patterns like:
+-- age_id(_agtype_build_vertex(r.id, ...))::graphid
+-- with direct column access:
+-- r.id
+--
+-- This avoids expensive vertex reconstruction in join conditions.
+--
+
+-- Create test data: Users following each other
+SELECT * FROM cypher('unified_test', $$
+ CREATE (:JoinOptUser {name: 'Alice'}),
+ (:JoinOptUser {name: 'Bob'}),
+ (:JoinOptUser {name: 'Carol'})
+$$) AS (v agtype);
+
+SELECT * FROM cypher('unified_test', $$
+ MATCH (a:JoinOptUser {name: 'Alice'}), (b:JoinOptUser {name: 'Bob'})
+ CREATE (a)-[:JOPT_FOLLOWS]->(b)
+$$) AS (e agtype);
+
+SELECT * FROM cypher('unified_test', $$
+ MATCH (b:JoinOptUser {name: 'Bob'}), (c:JoinOptUser {name: 'Carol'})
+ CREATE (b)-[:JOPT_FOLLOWS]->(c)
+$$) AS (e agtype);
+
+-- EXPLAIN showing join conditions use direct column access
+-- Look for: graphid_to_agtype(id) instead of age_id(_agtype_build_vertex(...))
+-- And: direct id comparisons instead of age_id(...)::graphid
+EXPLAIN (COSTS OFF)
+SELECT * FROM cypher('unified_test', $$
+ MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
+ RETURN u.name, v.name
+$$) AS (u_name agtype, v_name agtype);
+
+-- Verify the query still returns correct results
+SELECT * FROM cypher('unified_test', $$
+ MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
+ RETURN u.name, v.name
+ ORDER BY u.name
+$$) AS (u_name agtype, v_name agtype);
+
+-- Multi-hop pattern showing optimization across multiple joins
+EXPLAIN (COSTS OFF)
+SELECT * FROM cypher('unified_test', $$
+ MATCH
(a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
+ RETURN a.name, b.name, c.name
+$$) AS (a_name agtype, b_name agtype, c_name agtype);
+
+-- Verify multi-hop query results
+SELECT * FROM cypher('unified_test', $$
+ MATCH
(a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
+ RETURN a.name, b.name, c.name
+$$) AS (a_name agtype, b_name agtype, c_name agtype);
+
--
-- Cleanup
--
diff --git a/src/backend/parser/cypher_clause.c
b/src/backend/parser/cypher_clause.c
index eb7dccf0..e56ac8ac 100644
--- a/src/backend/parser/cypher_clause.c
+++ b/src/backend/parser/cypher_clause.c
@@ -38,6 +38,7 @@
#include "parser/parsetree.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteHandler.h"
+#include "utils/lsyscache.h"
#include "catalog/ag_graph.h"
#include "catalog/ag_label.h"
@@ -136,6 +137,7 @@ static Node *make_edge_expr(cypher_parsestate *cpstate,
ParseNamespaceItem *pnsi);
static Node *make_qual(cypher_parsestate *cpstate,
transform_entity *entity, char *name);
+static Node *optimize_qual_expr_mutator(Node *node, void *context);
static TargetEntry *
transform_match_create_path_variable(cypher_parsestate *cpstate,
cypher_path *path, List *entities);
@@ -3375,11 +3377,173 @@ static void transform_match_pattern(cypher_parsestate
*cpstate, Query *query,
expr = (Expr *)coerce_to_boolean(pstate, (Node *)expr, "WHERE");
}
+ /*
+ * Apply optimization to the transformed expression tree. This looks for
+ * patterns like age_id(_agtype_build_vertex(...)) and replaces them with
+ * direct column references.
+ */
+ if (expr != NULL)
+ {
+ expr = (Expr *)optimize_qual_expr_mutator((Node *)expr, NULL);
+ }
+
query->rtable = cpstate->pstate.p_rtable;
query->rteperminfos = cpstate->pstate.p_rteperminfos;
query->jointree = makeFromExpr(cpstate->pstate.p_joinlist, (Node *)expr);
}
+/*
+ * optimize_qual_expr_mutator - Walk expression tree and optimize vertex/edge
+ * accessor patterns.
+ *
+ * This mutator looks for patterns like:
+ * age_id(_agtype_build_vertex(id, label, props))
+ * and transforms them to:
+ * graphid_to_agtype(id)
+ *
+ * This avoids the expensive reconstruction of vertex/edge agtype values
+ * just to immediately extract a single field from them. This is particularly
+ * important for join conditions where the vertex/edge comes from a previous
+ * clause.
+ */
+static Node *optimize_qual_expr_mutator(Node *node, void *context)
+{
+ if (node == NULL)
+ {
+ return NULL;
+ }
+
+ /*
+ * Look for FuncExpr nodes that wrap accessor functions around
+ * _agtype_build_vertex or _agtype_build_edge calls.
+ */
+ if (IsA(node, FuncExpr))
+ {
+ FuncExpr *outer_func = (FuncExpr *)node;
+ char *outer_func_name;
+ Node *arg;
+ FuncExpr *inner_func;
+ char *inner_func_name;
+ List *inner_args;
+ int arg_index = -1;
+
+ /* Must have exactly one argument */
+ if (list_length(outer_func->args) != 1)
+ {
+ goto recurse;
+ }
+
+ outer_func_name = get_func_name(outer_func->funcid);
+ if (outer_func_name == NULL)
+ {
+ goto recurse;
+ }
+
+ /* Check if this is an accessor function we can optimize */
+ if (strcmp(outer_func_name, "age_id") != 0 &&
+ strcmp(outer_func_name, "age_start_id") != 0 &&
+ strcmp(outer_func_name, "age_end_id") != 0 &&
+ strcmp(outer_func_name, "age_properties") != 0)
+ {
+ goto recurse;
+ }
+
+ arg = (Node *)linitial(outer_func->args);
+
+ /* The argument must be a FuncExpr (the build function) */
+ if (!IsA(arg, FuncExpr))
+ {
+ goto recurse;
+ }
+
+ inner_func = (FuncExpr *)arg;
+ inner_func_name = get_func_name(inner_func->funcid);
+ if (inner_func_name == NULL)
+ {
+ goto recurse;
+ }
+
+ inner_args = inner_func->args;
+
+ /*
+ * Check for _agtype_build_vertex(id, label_name, properties)
+ * Arguments: 0=id (graphid), 1=label_name (cstring), 2=properties
(agtype)
+ */
+ if (strcmp(inner_func_name, "_agtype_build_vertex") == 0 &&
+ list_length(inner_args) == 3)
+ {
+ if (strcmp(outer_func_name, "age_id") == 0)
+ {
+ arg_index = 0; /* id */
+ }
+ else if (strcmp(outer_func_name, "age_properties") == 0)
+ {
+ arg_index = 2; /* properties */
+ }
+ }
+ /*
+ * Check for _agtype_build_edge(id, startid, endid, label_name,
properties)
+ * Arguments: 0=id (graphid), 1=start_id (graphid), 2=end_id (graphid),
+ * 3=label_name (cstring), 4=properties (agtype)
+ */
+ else if (strcmp(inner_func_name, "_agtype_build_edge") == 0 &&
+ list_length(inner_args) == 5)
+ {
+ if (strcmp(outer_func_name, "age_id") == 0)
+ {
+ arg_index = 0; /* id */
+ }
+ else if (strcmp(outer_func_name, "age_start_id") == 0)
+ {
+ arg_index = 1; /* start_id */
+ }
+ else if (strcmp(outer_func_name, "age_end_id") == 0)
+ {
+ arg_index = 2; /* end_id */
+ }
+ else if (strcmp(outer_func_name, "age_properties") == 0)
+ {
+ arg_index = 4; /* properties */
+ }
+ }
+
+ /* If we found a pattern to optimize */
+ if (arg_index >= 0)
+ {
+ Node *extracted_arg = (Node *)list_nth(inner_args, arg_index);
+
+ /* For properties, return directly (already agtype) */
+ if (strcmp(outer_func_name, "age_properties") == 0)
+ {
+ return extracted_arg;
+ }
+ else
+ {
+ /*
+ * For graphid fields (id, start_id, end_id), we need to wrap
+ * in graphid_to_agtype to match the original return type.
+ */
+ Oid cast_func_oid;
+ FuncExpr *cast_expr;
+
+ cast_func_oid = get_ag_func_oid("graphid_to_agtype", 1,
+ GRAPHIDOID);
+
+ cast_expr = makeFuncExpr(cast_func_oid, AGTYPEOID,
+ list_make1(extracted_arg),
+ InvalidOid, InvalidOid,
+ COERCE_EXPLICIT_CALL);
+ cast_expr->location = outer_func->location;
+
+ return (Node *)cast_expr;
+ }
+ }
+ }
+
+recurse:
+ return expression_tree_mutator(node, optimize_qual_expr_mutator, context);
+}
+
/*
* Creates a FuncCall node that will prevent an edge from being joined
* to twice.