This is an automated email from the ASF dual-hosted git repository.

jgemignani pushed a commit to branch PG15
in repository https://gitbox.apache.org/repos/asf/age.git

commit 6d25d660001d73ae64fca6b20dc5a8fb7f8bc485
Author: Rafsun Masud <[email protected]>
AuthorDate: Thu Oct 12 14:47:03 2023 -0700

    Optimize performance of detach delete (#1271)
    
    Previously, for each vertex to be deleted, all edge tables were
    scanned once to process the connected edges. Now, this task is postponed
    until all vertices are deleted. So, the connected edges can be processed
    in only one scan of the edge tables regardless of the number of deleted
    vertices.
---
 regress/expected/cypher_delete.out   | 115 ++++++++++++++++++++++++++++++++--
 regress/sql/cypher_delete.sql        |  37 +++++++++++
 src/backend/executor/cypher_delete.c | 118 ++++++++++++++++++++---------------
 src/include/executor/cypher_utils.h  |  21 +++++++
 4 files changed, 236 insertions(+), 55 deletions(-)

diff --git a/regress/expected/cypher_delete.out 
b/regress/expected/cypher_delete.out
index d7d19826..0a44ded2 100644
--- a/regress/expected/cypher_delete.out
+++ b/regress/expected/cypher_delete.out
@@ -62,9 +62,9 @@ SELECT * FROM cypher('cypher_delete', $$CREATE 
(:v)-[:e]->(:v)$$) AS (a agtype);
 
 --Should Fail
 SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n1 RETURN 
n1$$) AS (a agtype);
-ERROR:  Cannot delete vertex n1, because it still has edges attached. To 
delete this vertex, you must first delete the attached edges.
+ERROR:  Cannot delete a vertex that has edge(s). Delete the edge(s) first, or 
try DETACH DELETE.
 SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n2 RETURN 
n2$$) AS (a agtype);
-ERROR:  Cannot delete vertex n2, because it still has edges attached. To 
delete this vertex, you must first delete the attached edges.
+ERROR:  Cannot delete a vertex that has edge(s). Delete the edge(s) first, or 
try DETACH DELETE.
 SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE e RETURN 
e$$) AS (a agtype);
                                                            a                   
                                         
 
------------------------------------------------------------------------------------------------------------------------
@@ -188,7 +188,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE 
(n:v)-[:e]->(:v) CREATE (n)-[:e]-
 (0 rows)
 
 SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[]->() DELETE n1 RETURN 
n1$$) AS (a agtype);
-ERROR:  Cannot delete vertex n1, because it still has edges attached. To 
delete this vertex, you must first delete the attached edges.
+ERROR:  Cannot delete a vertex that has edge(s). Delete the edge(s) first, or 
try DETACH DELETE.
 --Cleanup
 SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) 
AS (a agtype);
                                 a                                
@@ -234,7 +234,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE 
(n:v)-[:e]->(:v)$$) AS (a agtype)
 (0 rows)
 
 SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->() DELETE n1, e RETURN 
n1$$) AS (a agtype);
-ERROR:  Cannot delete vertex n1, because it still has edges attached. To 
delete this vertex, you must first delete the attached edges.
+ERROR:  Cannot delete a vertex that has edge(s). Delete the edge(s) first, or 
try DETACH DELETE.
 --Cleanup
 SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) 
AS (a agtype);
                                 a                                
@@ -651,6 +651,113 @@ SELECT * FROM cypher('cypher_delete', $$MATCH 
(u:vertices) RETURN u $$) AS (resu
 --------
 (0 rows)
 
+--
+-- Detach Delete
+--
+SELECT create_graph('detach_delete');
+NOTICE:  graph "detach_delete" has been created
+ create_graph 
+--------------
+ 
+(1 row)
+
+SELECT * FROM cypher('detach_delete',
+$$
+    CREATE (x:Label3{name:'x', delete: true}),
+           (y:Label3{name:'y', delete: true}),
+           (a:Label1{name:'a', delete: true}),
+           (b:Label5{name:'b'}),
+           (c:Label5{name:'c'}),
+           (d:Label5{name:'d'}),
+           (m:Label7{name:'m', delete: true}),
+           (n:Label2{name:'n'}),
+           (p:Label2{name:'p'}),
+           (q:Label2{name:'q'}),
+           (a)-[:rel1{name:'ab'}]->(b),
+           (c)-[:rel2{name:'cd'}]->(d),
+           (n)-[:rel3{name:'nm'}]->(m),
+           (a)-[:rel4{name:'am'}]->(m),
+           (p)-[:rel5{name:'pq'}]->(q)
+$$
+) as (a agtype);
+ a 
+---
+(0 rows)
+
+-- no vertices or edges are deleted (error is expected)
+SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), 
(z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype);
+ERROR:  Cannot delete a vertex that has edge(s). Delete the edge(s) first, or 
try DETACH DELETE.
+SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname 
agtype);
+ vname 
+-------
+ "x"
+ "y"
+ "a"
+ "b"
+ "c"
+ "d"
+ "m"
+ "n"
+ "p"
+ "q"
+(10 rows)
+
+SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as 
(ename agtype);
+ ename 
+-------
+ "ab"
+ "cd"
+ "nm"
+ "am"
+ "pq"
+(5 rows)
+
+-- x, y, a, m, ab, nm, am are deleted
+SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), 
(z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype);
+ a 
+---
+ 1
+ 1
+(2 rows)
+
+SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname 
agtype);
+ vname 
+-------
+ "b"
+ "c"
+ "d"
+ "n"
+ "p"
+ "q"
+(6 rows)
+
+SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as 
(ename agtype);
+ ename 
+-------
+ "cd"
+ "pq"
+(2 rows)
+
+SELECT drop_graph('detach_delete', true);
+NOTICE:  drop cascades to 12 other objects
+DETAIL:  drop cascades to table detach_delete._ag_label_vertex
+drop cascades to table detach_delete._ag_label_edge
+drop cascades to table detach_delete."Label3"
+drop cascades to table detach_delete."Label1"
+drop cascades to table detach_delete."Label5"
+drop cascades to table detach_delete."Label7"
+drop cascades to table detach_delete."Label2"
+drop cascades to table detach_delete.rel1
+drop cascades to table detach_delete.rel2
+drop cascades to table detach_delete.rel3
+drop cascades to table detach_delete.rel4
+drop cascades to table detach_delete.rel5
+NOTICE:  graph "detach_delete" has been dropped
+ drop_graph 
+------------
+ 
+(1 row)
+
 --
 -- Clean up
 --
diff --git a/regress/sql/cypher_delete.sql b/regress/sql/cypher_delete.sql
index 38d91168..f0d98b6e 100644
--- a/regress/sql/cypher_delete.sql
+++ b/regress/sql/cypher_delete.sql
@@ -246,6 +246,43 @@ END;
 
 SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS 
(result agtype);
 
+--
+-- Detach Delete
+--
+
+SELECT create_graph('detach_delete');
+SELECT * FROM cypher('detach_delete',
+$$
+    CREATE (x:Label3{name:'x', delete: true}),
+           (y:Label3{name:'y', delete: true}),
+           (a:Label1{name:'a', delete: true}),
+           (b:Label5{name:'b'}),
+           (c:Label5{name:'c'}),
+           (d:Label5{name:'d'}),
+           (m:Label7{name:'m', delete: true}),
+           (n:Label2{name:'n'}),
+           (p:Label2{name:'p'}),
+           (q:Label2{name:'q'}),
+           (a)-[:rel1{name:'ab'}]->(b),
+           (c)-[:rel2{name:'cd'}]->(d),
+           (n)-[:rel3{name:'nm'}]->(m),
+           (a)-[:rel4{name:'am'}]->(m),
+           (p)-[:rel5{name:'pq'}]->(q)
+$$
+) as (a agtype);
+
+-- no vertices or edges are deleted (error is expected)
+SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), 
(z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype);
+SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname 
agtype);
+SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as 
(ename agtype);
+
+-- x, y, a, m, ab, nm, am are deleted
+SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), 
(z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype);
+SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname 
agtype);
+SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as 
(ename agtype);
+
+SELECT drop_graph('detach_delete', true);
+
 --
 -- Clean up
 --
diff --git a/src/backend/executor/cypher_delete.c 
b/src/backend/executor/cypher_delete.c
index 795d4c0e..1cca3313 100644
--- a/src/backend/executor/cypher_delete.c
+++ b/src/backend/executor/cypher_delete.c
@@ -32,6 +32,7 @@
 #include "parser/parsetree.h"
 #include "storage/bufmgr.h"
 #include "utils/rel.h"
+#include "common/hashfn.h"
 
 #include "catalog/ag_label.h"
 #include "executor/cypher_executor.h"
@@ -48,9 +49,7 @@ static void rescan_cypher_delete(CustomScanState *node);
 
 static void process_delete_list(CustomScanState *node);
 
-static void find_connected_edges(CustomScanState *node, char *graph_name,
-                                 List *labels, char *var_name, graphid id,
-                                 bool detach_delete);
+static void check_for_connected_edges(CustomScanState *node);
 static agtype_value *extract_entity(CustomScanState *node,
                                     TupleTableSlot *scanTupleSlot,
                                     int entity_position);
@@ -83,6 +82,7 @@ static void begin_cypher_delete(CustomScanState *node, EState 
*estate,
     cypher_delete_custom_scan_state *css =
         (cypher_delete_custom_scan_state *)node;
     Plan *subplan;
+    HASHCTL hashctl;
 
     Assert(list_length(css->cs->custom_plans) == 1);
 
@@ -112,6 +112,16 @@ static void begin_cypher_delete(CustomScanState *node, 
EState *estate,
      */
     css->edge_labels = get_all_edge_labels_per_graph(estate, 
css->delete_data->graph_oid);
 
+    /* init vertex_id_htab */
+    MemSet(&hashctl, 0, sizeof(hashctl));
+    hashctl.keysize = sizeof(graphid);
+    hashctl.entrysize =
+        sizeof(graphid); // entries are not used, but entrysize must >= keysize
+    hashctl.hash = tag_hash;
+    css->vertex_id_htab = hash_create(DELETE_VERTEX_HTAB_NAME,
+                                      DELETE_VERTEX_HTAB_SIZE, &hashctl,
+                                      HASH_ELEM | HASH_FUNCTION);
+
     /*
      * Postgres does not assign the es_output_cid in queries that do
      * not write to disk, ie: SELECT commands. We need the command id
@@ -194,6 +204,10 @@ static TupleTableSlot *exec_cypher_delete(CustomScanState 
*node)
  */
 static void end_cypher_delete(CustomScanState *node)
 {
+    check_for_connected_edges(node);
+
+    hash_destroy(((cypher_delete_custom_scan_state *)node)->vertex_id_htab);
+
     ExecEndNode(node->ss.ps.lefttree);
 }
 
@@ -443,15 +457,15 @@ static void process_delete_list(CustomScanState *node)
         }
 
         /*
-         * For vertices, we need to check if the vertex is connected to any
-         * edges, * if there are, we need to delete them or throw an error,
-         * depending on if the query specified the DETACH option.
+         * For vertices, we insert the vertex ID in the hashtable
+         * vertex_id_htab. This hashtable is used later to process
+         * connected edges.
          */
         if (original_entity_value->type == AGTV_VERTEX)
         {
-            find_connected_edges(node, css->delete_data->graph_name,
-                                 css->edge_labels, item->var_name,
-                                 id->val.int_value, css->delete_data->detach);
+            bool found;
+            hash_search(css->vertex_id_htab, (void *)&(id->val.int_value),
+                        HASH_ENTER, &found);
         }
 
         /* At this point, we are ready to delete the node/vertex. */
@@ -464,30 +478,20 @@ static void process_delete_list(CustomScanState *node)
 }
 
 /*
- * Find the edges connected to the given node. If there is any edges either
- * delete them or throw an error, depending on the detach delete option.
+ * Scans the edge tables and checks if the deleted vertices are connected to
+ * any edge(s). For DETACH DELETE, the connected edges are deleted. Otherwise,
+ * an error is thrown.
  */
-static void find_connected_edges(CustomScanState *node, char *graph_name,
-                                 List *labels, char *var_name, graphid id,
-                                 bool detach_delete)
+static void check_for_connected_edges(CustomScanState *node)
 {
+    ListCell *lc;
     cypher_delete_custom_scan_state *css =
         (cypher_delete_custom_scan_state *)node;
     EState *estate = css->css.ss.ps.state;
-    ListCell *lc;
+    char *graph_name = css->delete_data->graph_name;
 
-    Increment_Estate_CommandId(estate);
-
-    /*
-     * We need to scan through all the edges to see if this vertex has
-     * any edges attached to it.
-     *
-     * XXX: If we implement an on-disc graph storage system. Such as
-     * an adjacency matrix, the performance of this check can be massively
-     * improved. However, right now we have to scan every edge to see if
-     * one has this vertex as a start or end vertex.
-     */
-    foreach(lc, labels)
+    /* scans each label from css->edge_labels */
+    foreach (lc, css->edge_labels)
     {
         char *label_name = lfirst(lc);
         ResultRelInfo *resultRelInfo;
@@ -495,54 +499,66 @@ static void find_connected_edges(CustomScanState *node, 
char *graph_name,
         HeapTuple tuple;
         TupleTableSlot *slot;
 
-        resultRelInfo = create_entity_result_rel_info(estate,
-                                                      graph_name, label_name);
-
+        resultRelInfo = create_entity_result_rel_info(estate, graph_name,
+                                                      label_name);
         scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc,
                                     estate->es_snapshot, 0, NULL);
-
         slot = ExecInitExtraTupleSlot(
             estate, RelationGetDescr(resultRelInfo->ri_RelationDesc),
             &TTSOpsHeapTuple);
 
-        // scan the table
-        while(true)
+        /* for each row */
+        while (true)
         {
-            graphid startid, endid;
+            graphid startid;
+            graphid endid;
             bool isNull;
+            bool found_startid = false;
+            bool found_endid = false;
 
             tuple = heap_getnext(scan_desc, ForwardScanDirection);
 
-            // no more tuples to process, break and scan the next label.
+            /* no more tuples to process, break and scan the next label. */
             if (!HeapTupleIsValid(tuple))
+            {
                 break;
+            }
 
             ExecStoreHeapTuple(tuple, slot, false);
 
-            startid = GRAPHID_GET_DATUM(slot_getattr(slot, 
Anum_ag_label_edge_table_start_id, &isNull));
-            endid = GRAPHID_GET_DATUM(slot_getattr(slot, 
Anum_ag_label_edge_table_end_id, &isNull));
+            startid = GRAPHID_GET_DATUM(slot_getattr(
+                slot, Anum_ag_label_edge_table_start_id, &isNull));
+            endid = GRAPHID_GET_DATUM(
+                slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull));
+
+            hash_search(css->vertex_id_htab, (void *)&startid, HASH_FIND,
+                        &found_startid);
 
-            if (id == startid || id == endid)
+            if (!found_startid)
             {
-                /*
-                 * We have found an edge that uses the vertex. Either delete 
the
-                 * edge or throw an error. Depending on whether the DETACH
-                 * option was specified in the query.
-                 */
-                if (detach_delete)
+                hash_search(css->vertex_id_htab, (void *)&endid, HASH_FIND,
+                            &found_endid);
+            }
+
+            if (found_startid || found_endid)
+            {
+                if (css->delete_data->detach)
+                {
                     delete_entity(estate, resultRelInfo, tuple);
+                }
                 else
-                    ereport(ERROR,
-                            (errcode(ERRCODE_INTERNAL_ERROR),
-                             errmsg("Cannot delete vertex %s, because it still 
has edges attached. "
-                                    "To delete this vertex, you must first 
delete the attached edges.",
-                                    var_name)));
+                {
+                    ereport(
+                        ERROR,
+                        (errcode(ERRCODE_INTERNAL_ERROR),
+                         errmsg(
+                             "Cannot delete a vertex that has edge(s). "
+                             "Delete the edge(s) first, or try DETACH 
DELETE.")));
+                }
             }
         }
 
         table_endscan(scan_desc);
         destroy_entity_result_rel_info(resultRelInfo);
     }
-
-    Decrement_Estate_CommandId(estate);
 }
diff --git a/src/include/executor/cypher_utils.h 
b/src/include/executor/cypher_utils.h
index 58dd8fa9..056f1a96 100644
--- a/src/include/executor/cypher_utils.h
+++ b/src/include/executor/cypher_utils.h
@@ -50,6 +50,9 @@
     estate->es_output_cid--; \
     estate->es_snapshot->curcid--;
 
+#define DELETE_VERTEX_HTAB_NAME "delete_vertex_htab"
+#define DELETE_VERTEX_HTAB_SIZE 1000000
+
 typedef struct cypher_create_custom_scan_state
 {
     CustomScanState css;
@@ -76,6 +79,24 @@ typedef struct cypher_delete_custom_scan_state
     cypher_delete_information *delete_data;
     int flags;
     List *edge_labels;
+
+    /*
+     * Deleted vertex IDs are stored in this hashtable.
+     *
+     * When a vertex item is deleted, it must be checked if there is any edges
+     * connected to it. The connected edges are either deleted or an error is
+     * thrown depending on the DETACH option. However, the check for connected
+     * edges is not done immediately. Instead the deleted vertex IDs are stored
+     * in the hashtable. Once all vertices are deleted, this hashtable is used
+     * to process the connected edges with only one scan of the edge tables.
+     *
+     * Note on performance: Additional performance gain may be possible if
+     * the standard DELETE .. USING .. command can be used instead of this
+     * hashtable. Because Postgres may create a better plan to execute that
+     * command depending on the statistics and available indexes on start_id
+     * and end_id column.
+     */
+    HTAB *vertex_id_htab;
 } cypher_delete_custom_scan_state;
 
 typedef struct cypher_merge_custom_scan_state

Reply via email to