hi. COPY (select_query) generally slower than table_beginscan.. table_scan_getnextslot ..table_endscan, especially for partitioned tables. so in the function DoCopyTo trying to use table_beginscan.. table_scan_getnextslot ..table_endscan for COPY TO when source table is a partitioned table.
----setup----- CREATE TABLE t3 (a INT, b int ) PARTITION BY RANGE (a); create table t3_1 partition of t3 for values from (1) to (11); create table t3_2 partition of t3 for values from (11) to (15); insert into t3 select g from generate_series(1, 3) g; insert into t3 select g from generate_series(11, 11) g; so now you can do: copy t3 to stdout; in the master, you will get: ERROR: cannot copy from partitioned table "t3" HINT: Try the COPY (SELECT ...) TO variant. attached copy_par_regress_test.sql is a simple benchmark sql file, a partitioned table with 10 partitions, 2 levels of indirection. The simple benchmark shows around 7.7% improvement in my local environment. local environment: PostgreSQL 18devel_debug_build_382092a0cd on x86_64-linux, compiled by gcc-14.1.0, 64-bit
From ba2307cca8bd1d53e0febddaf11c932dae5d31e0 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Thu, 19 Dec 2024 19:48:12 +0800 Subject: [PATCH v1 1/1] speedup COPY TO for partitioned table. COPY (select_query) generally slower than table_beginscan.. table_scan_getnextslot ..table_endscan. especially for partitioned table. so using table_beginscan.. table_scan_getnextslot ..table_endscan for COPY TO when source table is a partitioned table. enviroment: PostgreSQL 18devel_debug_build_382092a0cd on x86_64-linux, compiled by gcc-14.1.0, 64-bit shows around 7.7% improvement. --- src/backend/commands/copyto.c | 69 +++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 161a0f8b0a..42dba2d4a8 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -19,6 +19,8 @@ #include <sys/stat.h> #include "access/tableam.h" +#include "access/table.h" +#include "catalog/pg_inherits.h" #include "commands/copy.h" #include "commands/progress.h" #include "executor/execdesc.h" @@ -79,6 +81,7 @@ typedef struct CopyToStateData List *attnumlist; /* integer list of attnums to copy */ char *filename; /* filename, or NULL for STDOUT */ bool is_program; /* is 'filename' a program to popen? */ + bool is_partitioned; /* is the COPY source relation a partitioned table? */ copy_data_dest_cb data_dest_cb; /* function for writing data */ CopyFormatOptions opts; @@ -396,13 +399,7 @@ BeginCopyTo(ParseState *pstate, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot copy from sequence \"%s\"", RelationGetRelationName(rel)))); - else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot copy from partitioned table \"%s\"", - RelationGetRelationName(rel)), - errhint("Try the COPY (SELECT ...) TO variant."))); - else + else if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot copy from non-table relation \"%s\"", @@ -426,6 +423,7 @@ BeginCopyTo(ParseState *pstate, /* Extract options from the statement node tree */ ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options); + cstate->is_partitioned = false; /* Process the source/target relation or query */ if (rel) { @@ -433,6 +431,8 @@ BeginCopyTo(ParseState *pstate, cstate->rel = rel; + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + cstate->is_partitioned = true; tupDesc = RelationGetDescr(cstate->rel); } else @@ -847,7 +847,60 @@ DoCopyTo(CopyToState cstate) } } - if (cstate->rel) + /* + * if COPY TO source table is a partitioned table, then open each + * partition and process each individual partition. + */ + if (cstate->rel && cstate->is_partitioned) + { + List *children = NIL; + List *scan_oids = NIL; + + processed = 0; + children = find_all_inheritors(RelationGetRelid(cstate->rel), + AccessShareLock, + NULL); + + foreach_oid(childreloid, children) + { + char relkind = get_rel_relkind(childreloid); + + if (RELKIND_HAS_PARTITIONS(relkind)) + continue; + + scan_oids = lappend_oid(scan_oids, childreloid); + } + + foreach_oid(scan_oid, scan_oids) + { + TupleTableSlot *slot; + TableScanDesc scandesc; + Relation scan_rel; + + scan_rel = table_open(scan_oid, AccessShareLock); + scandesc = table_beginscan(scan_rel, GetActiveSnapshot(), 0, NULL); + slot = table_slot_create(scan_rel, NULL); + + while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot)) + { + CHECK_FOR_INTERRUPTS(); + + /* Deconstruct the tuple ... */ + slot_getallattrs(slot); + + /* Format and send the data */ + CopyOneRowTo(cstate, slot); + + pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, + ++processed); + } + + ExecDropSingleTupleTableSlot(slot); + table_endscan(scandesc); + table_close(scan_rel, AccessShareLock); + } + } + else if (cstate->rel && !cstate->is_partitioned) { TupleTableSlot *slot; TableScanDesc scandesc; -- 2.34.1
copy_par_regress_test.sql
Description: application/sql