orhankislal commented on code in PR #594: URL: https://github.com/apache/madlib/pull/594#discussion_r1110257271
########## src/ports/postgres/modules/graph/wcc.py_in: ########## @@ -366,44 +406,61 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, # found in the current iteration. with SetGUC("dev_opt_unsafe_truncate_in_subtransaction", "on"): - plpy.execute(loop_sql.format(**locals())) - - if grouping_cols: - nodes_to_update = plpy.execute(""" - SELECT SUM(cnt) AS cnt_sum - FROM ( - SELECT COUNT(*) AS cnt - FROM {toupdate} - GROUP BY {grouping_cols} - ) t - """.format(**locals()))[0]["cnt_sum"] - else: - nodes_to_update = plpy.execute(""" - SELECT COUNT(*) AS cnt FROM {toupdate} - """.format(**locals()))[0]["cnt"] + nodes_to_update = plpy.execute(loop_sql.format(**locals()))[0]["cnt_sum"] + iteration_counter += 1 + if not is_platform_pg(): # Drop intermediate table created for Greenplum plpy.execute("DROP TABLE IF EXISTS {0}".format(edge_inverse)) - rename_table(schema_madlib, newupdate, out_table) - if vertex_type != "BIGINT[]" and vertex_id_in and vertex_id_in != 'id': - plpy.execute("ALTER TABLE {out_table} RENAME COLUMN id TO {vertex_id_in}".format(**locals())) + if not warm_start: + rename_table(schema_madlib, newupdate, out_table) + if vertex_type != "BIGINT[]" and vertex_id_in and vertex_id_in != 'id': + plpy.execute("ALTER TABLE {out_table} RENAME COLUMN id TO {vertex_id_in}".format(**locals())) + else: + plpy.execute(""" + TRUNCATE TABLE {out_table}; Review Comment: Creating multiple tables in each iteration causes catalog bloat. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@madlib.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org