Hi,

In <CAD21AoBkA=g=pn17r_iieru+vlyltgz8wvohgana2vzsmfm...@mail.gmail.com>
  "Re: Make COPY format extendable: Extract COPY TO format implementations" on 
Mon, 13 Oct 2025 14:40:31 -0700,
  Masahiko Sawada <[email protected]> wrote:

> The patch refactors the CopyToStateData so that we can both hide
> internal-use-only fields from extensions and extension can use its own
> state data, while not adding extra indirection layers. TBH I'm really
> not sure we must fully hide internal fields from extensions. Other
> extendable components seem not to strictly hide internal information
> from extensions. I'd suggest starting with only the latter point. That
> is, we merge fields in CopyToStateInternalData to CopyToStateData.
> What do you think?

OK. Let's follow the existing style. How about the attached
patch? It merges CopyToStateInternalData to CopyToStateData.


Thanks,
-- 
kou

>From 325f56d4b4372f7b90b88c9c9068d253fcc9f39a Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <[email protected]>
Date: Tue, 14 Oct 2025 11:08:23 +0900
Subject: [PATCH] Split CopyToStateData to CopyToState{,Builtin}Data

---
 src/backend/commands/copyto.c  | 170 ++++++++++++++++-----------------
 src/include/commands/copyapi.h |  66 +++++++++++++
 2 files changed, 148 insertions(+), 88 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index e5781155cdf..176d98f866b 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -21,7 +21,6 @@
 #include "access/tableam.h"
 #include "commands/copyapi.h"
 #include "commands/progress.h"
-#include "executor/execdesc.h"
 #include "executor/executor.h"
 #include "executor/tuptable.h"
 #include "libpq/libpq.h"
@@ -37,19 +36,7 @@
 #include "utils/snapmgr.h"
 
 /*
- * Represents the different dest cases we need to worry about at
- * the bottom level
- */
-typedef enum CopyDest
-{
-	COPY_FILE,					/* to file (or a piped program) */
-	COPY_FRONTEND,				/* to frontend */
-	COPY_CALLBACK,				/* to callback function */
-} CopyDest;
-
-/*
- * This struct contains all the state variables used throughout a COPY TO
- * operation.
+ * This struct contains the state variables used by built-in format routines.
  *
  * Multi-byte encodings: all supported client-side encodings encode multi-byte
  * characters by having the first byte's high bit set. Subsequent bytes of the
@@ -62,40 +49,16 @@ typedef enum CopyDest
  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
  * when we have to do it the hard way.
  */
-typedef struct CopyToStateData
+typedef struct CopyToStateBuiltinData
 {
-	/* format-specific routines */
-	const CopyToRoutine *routine;
+	CopyToStateData parent;
 
 	/* low-level state data */
-	CopyDest	copy_dest;		/* type of copy source/destination */
-	FILE	   *copy_file;		/* used if copy_dest == COPY_FILE */
-	StringInfo	fe_msgbuf;		/* used for all dests during COPY TO */
-
 	int			file_encoding;	/* file or remote side's character encoding */
 	bool		need_transcoding;	/* file encoding diff from server? */
 	bool		encoding_embeds_ascii;	/* ASCII can be non-first byte? */
-
-	/* parameters from the COPY command */
-	Relation	rel;			/* relation to copy to */
-	QueryDesc  *queryDesc;		/* executable query to copy from */
-	List	   *attnumlist;		/* integer list of attnums to copy */
-	char	   *filename;		/* filename, or NULL for STDOUT */
-	bool		is_program;		/* is 'filename' a program to popen? */
-	copy_data_dest_cb data_dest_cb; /* function for writing data */
-
-	CopyFormatOptions opts;
-	Node	   *whereClause;	/* WHERE condition (or NULL) */
-
-	/*
-	 * Working state
-	 */
-	MemoryContext copycontext;	/* per-copy execution context */
-
-	FmgrInfo   *out_functions;	/* lookup info for output functions */
-	MemoryContext rowcontext;	/* per-row evaluation context */
-	uint64		bytes_processed;	/* number of bytes processed so far */
-} CopyToStateData;
+}			CopyToStateBuiltinData;
+typedef struct CopyToStateBuiltinData *CopyToStateBuiltin;
 
 /* DestReceiver for COPY (query) TO */
 typedef struct
@@ -118,6 +81,7 @@ static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
 								bool use_quote);
 
 /* built-in format-specific routines */
+static size_t CopyToBuiltinGetStateSize(void);
 static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
 static void CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
 static void CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot);
@@ -150,6 +114,7 @@ static void CopySendInt16(CopyToState cstate, int16 val);
 
 /* text format */
 static const CopyToRoutine CopyToRoutineText = {
+	.CopyToGetStateSize = CopyToBuiltinGetStateSize,
 	.CopyToStart = CopyToTextLikeStart,
 	.CopyToOutFunc = CopyToTextLikeOutFunc,
 	.CopyToOneRow = CopyToTextOneRow,
@@ -158,6 +123,7 @@ static const CopyToRoutine CopyToRoutineText = {
 
 /* CSV format */
 static const CopyToRoutine CopyToRoutineCSV = {
+	.CopyToGetStateSize = CopyToBuiltinGetStateSize,
 	.CopyToStart = CopyToTextLikeStart,
 	.CopyToOutFunc = CopyToTextLikeOutFunc,
 	.CopyToOneRow = CopyToCSVOneRow,
@@ -166,6 +132,7 @@ static const CopyToRoutine CopyToRoutineCSV = {
 
 /* binary format */
 static const CopyToRoutine CopyToRoutineBinary = {
+	.CopyToGetStateSize = CopyToBuiltinGetStateSize,
 	.CopyToStart = CopyToBinaryStart,
 	.CopyToOutFunc = CopyToBinaryOutFunc,
 	.CopyToOneRow = CopyToBinaryOneRow,
@@ -185,18 +152,27 @@ CopyToGetRoutine(const CopyFormatOptions *opts)
 	return &CopyToRoutineText;
 }
 
+/* Implementation of the allocate callback for all built-in formats */
+static size_t
+CopyToBuiltinGetStateSize(void)
+{
+	return sizeof(CopyToStateBuiltinData);
+}
+
 /* Implementation of the start callback for text and CSV formats */
 static void
 CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 {
+	CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
+
 	/*
 	 * For non-binary copy, we need to convert null_print to file encoding,
 	 * because it will be sent directly with CopySendString.
 	 */
-	if (cstate->need_transcoding)
+	if (cstate_builtin->need_transcoding)
 		cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
 														  cstate->opts.null_print_len,
-														  cstate->file_encoding);
+														  cstate_builtin->file_encoding);
 
 	/* if a header has been requested send the line */
 	if (cstate->opts.header_line == COPY_HEADER_TRUE)
@@ -401,7 +377,7 @@ SendCopyBegin(CopyToState cstate)
 	for (i = 0; i < natts; i++)
 		pq_sendint16(&buf, format); /* per-column formats */
 	pq_endmessage(&buf);
-	cstate->copy_dest = COPY_FRONTEND;
+	cstate->copy_dest = COPY_DEST_FRONTEND;
 }
 
 static void
@@ -448,7 +424,7 @@ CopySendEndOfRow(CopyToState cstate)
 
 	switch (cstate->copy_dest)
 	{
-		case COPY_FILE:
+		case COPY_DEST_FILE:
 			if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
 					   cstate->copy_file) != 1 ||
 				ferror(cstate->copy_file))
@@ -482,11 +458,11 @@ CopySendEndOfRow(CopyToState cstate)
 							 errmsg("could not write to COPY file: %m")));
 			}
 			break;
-		case COPY_FRONTEND:
+		case COPY_DEST_FRONTEND:
 			/* Dump the accumulated row as one CopyData message */
 			(void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len);
 			break;
-		case COPY_CALLBACK:
+		case COPY_DEST_CALLBACK:
 			cstate->data_dest_cb(fe_msgbuf->data, fe_msgbuf->len);
 			break;
 	}
@@ -507,7 +483,7 @@ CopySendTextLikeEndOfRow(CopyToState cstate)
 {
 	switch (cstate->copy_dest)
 	{
-		case COPY_FILE:
+		case COPY_DEST_FILE:
 			/* Default line termination depends on platform */
 #ifndef WIN32
 			CopySendChar(cstate, '\n');
@@ -515,7 +491,7 @@ CopySendTextLikeEndOfRow(CopyToState cstate)
 			CopySendString(cstate, "\r\n");
 #endif
 			break;
-		case COPY_FRONTEND:
+		case COPY_DEST_FRONTEND:
 			/* The FE/BE protocol uses \n as newline for all platforms */
 			CopySendChar(cstate, '\n');
 			break;
@@ -631,10 +607,14 @@ BeginCopyTo(ParseState *pstate,
 			List *options)
 {
 	CopyToState cstate;
+	CopyToStateBuiltin cstate_builtin;
 	bool		pipe = (filename == NULL && data_dest_cb == NULL);
 	TupleDesc	tupDesc;
 	int			num_phys_attrs;
+	MemoryContext copycontext;
 	MemoryContext oldcontext;
+	CopyFormatOptions opts = {0};
+	const CopyToRoutine *routine;
 	const int	progress_cols[] = {
 		PROGRESS_COPY_COMMAND,
 		PROGRESS_COPY_TYPE
@@ -686,24 +666,33 @@ BeginCopyTo(ParseState *pstate,
 	}
 
 
-	/* Allocate workspace and zero all fields */
-	cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData));
-
 	/*
 	 * We allocate everything used by a cstate in a new memory context. This
 	 * avoids memory leaks during repeated use of COPY in a query.
 	 */
-	cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
-												"COPY",
-												ALLOCSET_DEFAULT_SIZES);
+	copycontext = AllocSetContextCreate(CurrentMemoryContext,
+										"COPY",
+										ALLOCSET_DEFAULT_SIZES);
 
-	oldcontext = MemoryContextSwitchTo(cstate->copycontext);
+	oldcontext = MemoryContextSwitchTo(copycontext);
 
 	/* Extract options from the statement node tree */
-	ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
+	ProcessCopyOptions(pstate, &opts, false /* is_from */ , options);
 
-	/* Set format routine */
-	cstate->routine = CopyToGetRoutine(&cstate->opts);
+	/* Get format routine */
+	routine = CopyToGetRoutine(&opts);
+
+	/* Allocate workspace and set known values */
+	MemoryContextSwitchTo(oldcontext);
+	cstate = (CopyToState) palloc0(routine->CopyToGetStateSize());
+	MemoryContextSwitchTo(copycontext);
+	if (routine == &CopyToRoutineText || routine == &CopyToRoutineCSV || routine == &CopyToRoutineBinary)
+		cstate_builtin = (CopyToStateBuiltin) cstate;
+	else
+		cstate_builtin = NULL;
+	cstate->copycontext = copycontext;
+	cstate->opts = opts;
+	cstate->routine = routine;
 
 	/* Process the source/target relation or query */
 	if (rel)
@@ -883,31 +872,34 @@ BeginCopyTo(ParseState *pstate,
 		}
 	}
 
-	/* Use client encoding when ENCODING option is not specified. */
-	if (cstate->opts.file_encoding < 0)
-		cstate->file_encoding = pg_get_client_encoding();
-	else
-		cstate->file_encoding = cstate->opts.file_encoding;
+	if (cstate_builtin)
+	{
+		/* Use client encoding when ENCODING option is not specified. */
+		if (cstate->opts.file_encoding < 0)
+			cstate_builtin->file_encoding = pg_get_client_encoding();
+		else
+			cstate_builtin->file_encoding = cstate->opts.file_encoding;
 
-	/*
-	 * Set up encoding conversion info if the file and server encodings differ
-	 * (see also pg_server_to_any).
-	 */
-	if (cstate->file_encoding == GetDatabaseEncoding() ||
-		cstate->file_encoding == PG_SQL_ASCII)
-		cstate->need_transcoding = false;
-	else
-		cstate->need_transcoding = true;
+		/*
+		 * Set up encoding conversion info if the file and server encodings
+		 * differ (see also pg_server_to_any).
+		 */
+		if (cstate_builtin->file_encoding == GetDatabaseEncoding() ||
+			cstate_builtin->file_encoding == PG_SQL_ASCII)
+			cstate_builtin->need_transcoding = false;
+		else
+			cstate_builtin->need_transcoding = true;
 
-	/* See Multibyte encoding comment above */
-	cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
+		/* See Multibyte encoding comment above */
+		cstate_builtin->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate_builtin->file_encoding);
+	}
 
-	cstate->copy_dest = COPY_FILE;	/* default */
+	cstate->copy_dest = COPY_DEST_FILE; /* default */
 
 	if (data_dest_cb)
 	{
 		progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
-		cstate->copy_dest = COPY_CALLBACK;
+		cstate->copy_dest = COPY_DEST_CALLBACK;
 		cstate->data_dest_cb = data_dest_cb;
 	}
 	else if (pipe)
@@ -1146,13 +1138,14 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 static void
 CopyAttributeOutText(CopyToState cstate, const char *string)
 {
+	CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
 	const char *ptr;
 	const char *start;
 	char		c;
 	char		delimc = cstate->opts.delim[0];
 
-	if (cstate->need_transcoding)
-		ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+	if (cstate_builtin->need_transcoding)
+		ptr = pg_server_to_any(string, strlen(string), cstate_builtin->file_encoding);
 	else
 		ptr = string;
 
@@ -1170,7 +1163,7 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 	 * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
 	 * of the normal safe-encoding path.
 	 */
-	if (cstate->encoding_embeds_ascii)
+	if (cstate_builtin->encoding_embeds_ascii)
 	{
 		start = ptr;
 		while ((c = *ptr) != '\0')
@@ -1225,7 +1218,7 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 				start = ptr++;	/* we include char in next run */
 			}
 			else if (IS_HIGHBIT_SET(c))
-				ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+				ptr += pg_encoding_mblen(cstate_builtin->file_encoding, ptr);
 			else
 				ptr++;
 		}
@@ -1300,6 +1293,7 @@ static void
 CopyAttributeOutCSV(CopyToState cstate, const char *string,
 					bool use_quote)
 {
+	CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
 	const char *ptr;
 	const char *start;
 	char		c;
@@ -1312,8 +1306,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 	if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
 		use_quote = true;
 
-	if (cstate->need_transcoding)
-		ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+	if (cstate_builtin->need_transcoding)
+		ptr = pg_server_to_any(string, strlen(string), cstate_builtin->file_encoding);
 	else
 		ptr = string;
 
@@ -1342,8 +1336,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 					use_quote = true;
 					break;
 				}
-				if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
-					tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
+				if (IS_HIGHBIT_SET(c) && cstate_builtin->encoding_embeds_ascii)
+					tptr += pg_encoding_mblen(cstate_builtin->file_encoding, tptr);
 				else
 					tptr++;
 			}
@@ -1366,8 +1360,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 				CopySendChar(cstate, escapec);
 				start = ptr;	/* we include char in next run */
 			}
-			if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
-				ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+			if (IS_HIGHBIT_SET(c) && cstate_builtin->encoding_embeds_ascii)
+				ptr += pg_encoding_mblen(cstate_builtin->file_encoding, ptr);
 			else
 				ptr++;
 		}
diff --git a/src/include/commands/copyapi.h b/src/include/commands/copyapi.h
index 2a2d2f9876b..aece73f4ca2 100644
--- a/src/include/commands/copyapi.h
+++ b/src/include/commands/copyapi.h
@@ -15,6 +15,7 @@
 #define COPYAPI_H
 
 #include "commands/copy.h"
+#include "executor/execdesc.h"
 
 /*
  * API structure for a COPY TO format implementation. Note this must be
@@ -22,6 +23,25 @@
  */
 typedef struct CopyToRoutine
 {
+	/* ---
+	 * Return state size for this routine.
+	 *
+	 * If this routine uses CopyToStateData as-is, `return
+	 * sizeof(CopyToStateData)` can be used.
+	 *
+	 * If this routine needs additional data than CopyToStateData, a new
+	 * struct based on CopyToStateData can be used something like:
+	 *
+	 * typedef struct MyCopyToStateDate {
+	 *     struct CopyToStateData parent;
+	 *     int define_additional_members_here;
+	 * } MyCopyToStateData;
+	 *
+	 * In the case, this callback returns `sizeof(MyCopyToStateData)`.
+	 * ---
+	 */
+	size_t		(*CopyToGetStateSize) (void);
+
 	/*
 	 * Set output function information. This callback is called once at the
 	 * beginning of COPY TO.
@@ -54,6 +74,52 @@ typedef struct CopyToRoutine
 	void		(*CopyToEnd) (CopyToState cstate);
 } CopyToRoutine;
 
+/*
+ * Represents the different dest cases we need to worry about at
+ * the bottom level
+ */
+typedef enum CopyDest
+{
+	COPY_DEST_FILE,				/* to file (or a piped program) */
+	COPY_DEST_FRONTEND,			/* to frontend */
+	COPY_DEST_CALLBACK,			/* to callback function */
+} CopyDest;
+
+/*
+ * This struct contains the state variables used by PostgreSQL, built-in
+ * format routines and custom format routines.
+ */
+typedef struct CopyToStateData
+{
+	/* format-specific routines */
+	const CopyToRoutine *routine;
+
+	/* low-level state data */
+	CopyDest	copy_dest;		/* type of copy source/destination */
+	FILE	   *copy_file;		/* used if copy_dest == COPY_FILE */
+	StringInfo	fe_msgbuf;		/* used for all dests during COPY TO */
+
+	/* parameters from the COPY command */
+	Relation	rel;			/* relation to copy to */
+	QueryDesc  *queryDesc;		/* executable query to copy from */
+	List	   *attnumlist;		/* integer list of attnums to copy */
+	char	   *filename;		/* filename, or NULL for STDOUT */
+	bool		is_program;		/* is 'filename' a program to popen? */
+	copy_data_dest_cb data_dest_cb; /* function for writing data */
+
+	CopyFormatOptions opts;
+	Node	   *whereClause;	/* WHERE condition (or NULL) */
+
+	/*
+	 * Working state
+	 */
+	MemoryContext copycontext;	/* per-copy execution context */
+
+	FmgrInfo   *out_functions;	/* lookup info for output functions */
+	MemoryContext rowcontext;	/* per-row evaluation context */
+	uint64		bytes_processed;	/* number of bytes processed so far */
+} CopyToStateData;
+
 /*
  * API structure for a COPY FROM format implementation. Note this must be
  * allocated in a server-lifetime manner, typically as a static const struct.
-- 
2.51.0

Reply via email to