hi.

COPY (select_query) generally slower than
table_beginscan.. table_scan_getnextslot ..table_endscan,
especially for partitioned tables.
so in the function DoCopyTo
trying to use table_beginscan.. table_scan_getnextslot ..table_endscan
for COPY TO when source table is a partitioned table.

----setup-----
CREATE TABLE t3 (a INT, b int ) PARTITION BY RANGE (a);
create table t3_1 partition of t3 for values from (1) to (11);
create table t3_2 partition of t3 for values from (11) to (15);
insert into t3 select g from generate_series(1, 3) g;
insert into t3 select g from generate_series(11, 11) g;

so now you can do:
copy t3 to stdout;

in the master, you will get:
ERROR:  cannot copy from partitioned table "t3"
HINT:  Try the COPY (SELECT ...) TO variant.


attached copy_par_regress_test.sql is a simple benchmark sql file,
a partitioned table with 10 partitions, 2 levels of indirection.
The simple benchmark shows around 7.7% improvement in my local environment.

local environment:
PostgreSQL 18devel_debug_build_382092a0cd on x86_64-linux, compiled by
gcc-14.1.0, 64-bit
From ba2307cca8bd1d53e0febddaf11c932dae5d31e0 Mon Sep 17 00:00:00 2001
From: jian he <jian.universal...@gmail.com>
Date: Thu, 19 Dec 2024 19:48:12 +0800
Subject: [PATCH v1 1/1] speedup COPY TO for partitioned table.

COPY (select_query) generally slower than
table_beginscan.. table_scan_getnextslot ..table_endscan.
especially for partitioned table.

so using table_beginscan.. table_scan_getnextslot ..table_endscan
for COPY TO when source table is a partitioned table.

enviroment:
PostgreSQL 18devel_debug_build_382092a0cd on x86_64-linux, compiled by gcc-14.1.0, 64-bit
shows around 7.7% improvement.
---
 src/backend/commands/copyto.c | 69 +++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 8 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 161a0f8b0a..42dba2d4a8 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -19,6 +19,8 @@
 #include <sys/stat.h>
 
 #include "access/tableam.h"
+#include "access/table.h"
+#include "catalog/pg_inherits.h"
 #include "commands/copy.h"
 #include "commands/progress.h"
 #include "executor/execdesc.h"
@@ -79,6 +81,7 @@ typedef struct CopyToStateData
 	List	   *attnumlist;		/* integer list of attnums to copy */
 	char	   *filename;		/* filename, or NULL for STDOUT */
 	bool		is_program;		/* is 'filename' a program to popen? */
+	bool		is_partitioned;	/* is the COPY source relation a partitioned table? */
 	copy_data_dest_cb data_dest_cb; /* function for writing data */
 
 	CopyFormatOptions opts;
@@ -396,13 +399,7 @@ BeginCopyTo(ParseState *pstate,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg("cannot copy from sequence \"%s\"",
 							RelationGetRelationName(rel))));
-		else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("cannot copy from partitioned table \"%s\"",
-							RelationGetRelationName(rel)),
-					 errhint("Try the COPY (SELECT ...) TO variant.")));
-		else
+		else if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
 			ereport(ERROR,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg("cannot copy from non-table relation \"%s\"",
@@ -426,6 +423,7 @@ BeginCopyTo(ParseState *pstate,
 	/* Extract options from the statement node tree */
 	ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
 
+	cstate->is_partitioned = false;
 	/* Process the source/target relation or query */
 	if (rel)
 	{
@@ -433,6 +431,8 @@ BeginCopyTo(ParseState *pstate,
 
 		cstate->rel = rel;
 
+		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			cstate->is_partitioned = true;
 		tupDesc = RelationGetDescr(cstate->rel);
 	}
 	else
@@ -847,7 +847,60 @@ DoCopyTo(CopyToState cstate)
 		}
 	}
 
-	if (cstate->rel)
+	/*
+	 * if COPY TO source table is a partitioned table, then open each
+	 * partition and process each individual partition.
+	 */
+	if (cstate->rel && cstate->is_partitioned)
+	{
+		List	   *children = NIL;
+		List	   *scan_oids = NIL;
+
+		processed = 0;
+		children = find_all_inheritors(RelationGetRelid(cstate->rel),
+										 AccessShareLock,
+										 NULL);
+
+		foreach_oid(childreloid, children)
+		{
+			char		relkind = get_rel_relkind(childreloid);
+
+			if (RELKIND_HAS_PARTITIONS(relkind))
+				continue;
+
+			scan_oids = lappend_oid(scan_oids, childreloid);
+		}
+
+		foreach_oid(scan_oid, scan_oids)
+		{
+			TupleTableSlot *slot;
+			TableScanDesc scandesc;
+			Relation		scan_rel;
+
+			scan_rel = table_open(scan_oid, AccessShareLock);
+			scandesc = table_beginscan(scan_rel, GetActiveSnapshot(), 0, NULL);
+			slot = table_slot_create(scan_rel, NULL);
+
+			while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
+			{
+				CHECK_FOR_INTERRUPTS();
+
+				/* Deconstruct the tuple ... */
+				slot_getallattrs(slot);
+
+				/* Format and send the data */
+				CopyOneRowTo(cstate, slot);
+
+				pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+										 ++processed);
+			}
+
+			ExecDropSingleTupleTableSlot(slot);
+			table_endscan(scandesc);
+			table_close(scan_rel, AccessShareLock);
+		}
+	}
+	else if (cstate->rel && !cstate->is_partitioned)
 	{
 		TupleTableSlot *slot;
 		TableScanDesc scandesc;
-- 
2.34.1

Attachment: copy_par_regress_test.sql
Description: application/sql

Reply via email to