I33Buckler opened a new issue, #1287: URL: https://github.com/apache/age/issues/1287
I am in the process of creating a graph node from a database table that has 7.6 million lines. The process runs a function to process the table into the graph with loops. Initial run of the process it was still running after nearly 24 hours. To test the loop I ran it without loading into graph. The result is nearly 5 minutes of execution. The code for creating the graph node below. Removing the comments from from the section with the PERFORM statement results in the long process. Wondering if there is something missing from the process. ```SQL --create graph --select * from drop_graph('transport_network',true); --SELECT create_graph('transport_network'); -- function to test for existence of graph node drop function if exists node_check; create or replace function node_check (graph_name text,node_name text) returns text LANGUAGE plpgsql as $node_check$ DECLARE table_exists boolean; node_status text; test text; begin drop table if exists table_test; -- Check for existence of node execute format('create temp table table_test as SELECT COUNT(*) as existence FROM information_schema.tables WHERE lower(table_schema) = lower(%1$s) AND lower(table_type) = ''base table'' and lower(table_name) = lower(%2$s);', format('''%s''',graph_name),format('''%s''',node_name)); -- create or empty node select existence != 0 from table_test into table_exists; if table_exists then node_status = 'Node exists'; execute format('select * from cypher(''%1s'',$$match (v:%2$s) detach delete v$$) as (v agtype);',graph_name,node_name) ; else node_status = 'Node created'; execute format('select create_vlabel(%1$s,%2$s);', format('''%s''',graph_name),format('''%s''',node_name)); end if; drop table if exists table_test; return node_status; END; $node_check$; -- Function for creating graph node from existing table drop function if exists create_stop_times; CREATE OR REPLACE FUNCTION create_stop_times(graph_name text, tc_date date, trip_id text, stop_id text, arrival_time text, arrival_time_sec bigint, departure_time text, departure_time_sec bigint, stop_sequence bigint, pickup_type int, drop_off_type int) returns text LANGUAGE plpgsql VOLATILE as $stop_times$ declare nodename text := graph_name || '.' || 'stop_times'; BEGIN execute format ('select * from cypher(''%1$s'', $$match (t:trips),(s:stops) where t.id=%2$s and s.id=%3$s and t.tc_date=%11$s and t.tc_date=s.tc_date create(t)<- [:PART_OF_TRIP]- (st:stop_times {tc_date: %11$s, arrival_time: %4$s, arrival_time_seconds: %5$s, departure_time: %6$s, departure_time_seconds: %7$s, stop_sequence: %8$s, pickup_type: %9$s, drop_off_type: %10$s})- [:LOCATED_AT]->(s)$$) as (st agtype);', quote_ident(graph_name),quote_ident(trip_id),quote_ident(stop_id), quote_ident(arrival_time),to_char(arrival_time_sec,'999999'), quote_ident(departure_time),to_char(departure_time_sec,'999999'), to_char(row_num,'9999'),to_char(pickup_type,'9'),to_char(drop_off_type,'9'), format('"%s"',to_char(tc_date,'yyyy-mm-dd'))); return nodename; END $stop_times$ ; -- Check for node. Reset if exists select node_check('transport_network','stop_times'); -- Initialise tables drop table if exists stop_times, trip_ranks; -- Stop times for processing by trip ; create temp table stop_times as select dense_rank() over (order by tc_date,trip_id) as trip_rank, row_number() over (partition by tc_date,trip_id order by stop_sequence) as row_num,* from allservices.stop_times ; create index on stop_times (trip_rank); create index on stop_times (row_num); create unique index on stop_times (trip_rank,row_num); create unique index on stop_times (trip_rank,stop_sequence); -- individual trips for processing stop times create temp table trip_ranks as select distinct trip_rank from stop_times; create unique index on trip_ranks (trip_rank); -- define table for processing in loop drop table if exists stop_times_loop; create temp table stop_times_loop (like stop_times); do $$ declare temprow record; triprank record; graph_name text:='transport_network'; row_counter integer := 0 ; rank_counter integer := 0; begin -- Loop through trips ranked by TC_Date for triprank in select trip_rank from trip_ranks order by trip_rank loop rank_counter := rank_counter+1; row_counter := 0; -- reset row counter for looping through rows in trip by date for temprow in select row_num from stop_times where trip_rank=rank_counter order by row_num loop row_counter := row_counter+1; -- Insert trip by date and associated rows for stop sequence on trip truncate table stop_times_loop; insert into stop_times_loop select * from stop_times where trip_rank=rank_counter and row_num=row_counter; --perform create_stop_times(graph_name, -- a.tc_date,a.trip_id,a.stop_id, -- a.arrival_time,a.arrival_time_sec, -- a.departure_time,a.departure_time_sec, -- a.stop_sequence,a.pickup_type,a.drop_off_type) -- from stop_times_loop a --; end loop; end loop; end; $$ ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@age.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org