Hello,
Following the same path of optimizing COPY FROM using SIMD, i found that
COPY TO can also benefit from this.

I attached a small patch that uses SIMD to skip data and advance as far as
the first special character is found, then fallback to scalar processing
for that character and re-enter the SIMD path again...
There's two ways to do this:
1) Essentially we do SIMD until we find a special character, then continue
scalar path without re-entering SIMD again.
- This gives from 10% to 30% speedups depending on the weight of special
characters in the attribute, we don't lose anything here since it advances
with SIMD until it can't (using the previous scripts: 1/3, 2/3 specials
chars).

2) Do SIMD path, then use scalar path when we hit a special character, keep
re-entering the SIMD path each time.
- This is equivalent to the COPY FROM story, we'll need to find the same
heuristic to use for both COPY FROM/TO to reduce the regressions (same
regressions: around from 20% to 30% with 1/3, 2/3 specials chars).

Something else to note is that the scalar path for COPY TO isn't as heavy
as the state machine in COPY FROM.

So if we find the sweet spot for the heuristic, doing the same for COPY TO
will be trivial and always beneficial.
Attached is 0004 which is option 1 (SIMD without re-entering), 0005 is the
second one.


Regards,
Ayoub
From 319e5402e35429943d80ba136f27e6185410e6f5 Mon Sep 17 00:00:00 2001
From: AyoubKAZ <[email protected]>
Date: Wed, 24 Dec 2025 15:20:53 +0100
Subject: [PATCH] Speed up COPY TO text CSV using SIMD

---
 src/backend/commands/copyto.c | 252 ++++++++++++++++++++++------------
 1 file changed, 167 insertions(+), 85 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index e1306728509..b9d7b55f1ab 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -1268,38 +1268,63 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 	if (cstate->encoding_embeds_ascii)
 	{
 		start = ptr;
-		#ifndef USE_NO_SIMD
+		const char *end = ptr + strlen(ptr);
+
+		while ((c = *ptr) != '\0')
+		{
+#ifndef USE_NO_SIMD
+			/*
+			 * SIMD fast path: scan ahead for special characters.
+			 * We re-enter this path after handling each special character
+			 * to maximize the benefit of vectorization.
+			 */
 			{
-				const char* end = ptr + strlen(ptr);
-				while (ptr + sizeof(Vector8) <= end) {
-					Vector8 chunk;
-					Vector8 control_mask;
-					Vector8 backslash_mask;
-					Vector8 delim_mask;
-					Vector8 special_mask;
-					uint32 mask;
+				
+				while (ptr + sizeof(Vector8) <= end)
+				{
+					Vector8		chunk;
+					Vector8		control_mask;
+					Vector8		backslash_mask;
+					Vector8		delim_mask;
+					Vector8		special_mask;
+					uint32		mask;
 
 					vector8_load(&chunk, (const uint8 *) ptr);
+					
+					/* Check for control characters (< 0x20) */
 					control_mask = vector8_gt(vector8_broadcast(0x20), chunk);
-					backslash_mask = vector8_eq(vector8_broadcast('\\'), chunk);
-					delim_mask = vector8_eq(vector8_broadcast(delimc), chunk);
+					
+					/* Check for backslash and delimiter */
+					backslash_mask = vector8_eq(chunk, vector8_broadcast('\\'));
+					delim_mask = vector8_eq(chunk, vector8_broadcast(delimc));
+					
 
-					special_mask = vector8_or(control_mask, vector8_or(backslash_mask, delim_mask));
+					/* Combine all masks */
+					special_mask = vector8_or(
+						vector8_or(control_mask, backslash_mask), delim_mask);
 
 					mask = vector8_highbit_mask(special_mask);
-					if (mask != 0) {
+					if (mask != 0)
+					{
+						/* Found special character, advance to it */
 						int advance = pg_rightmost_one_pos32(mask);
 						ptr += advance;
 						break;
 					}
 
+					/* No special characters in this chunk, advance */
 					ptr += sizeof(Vector8);
 				}
-			} 
-		#endif
+				
+				/* Update c after SIMD scan */
+				c = *ptr;
+			}
+#endif /* !USE_NO_SIMD */
+
+			/* Scalar handling - same code for SIMD and non-SIMD builds */
+			if (c == '\0')
+				break;
 
-		while ((c = *ptr) != '\0')
-		{
 			if ((unsigned char) c < (unsigned char) 0x20)
 			{
 				/*
@@ -1358,38 +1383,60 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 	else
 	{
 		start = ptr;
-		#ifndef USE_NO_SIMD
+		const char *end = ptr + strlen(ptr);
+
+		while ((c = *ptr) != '\0')
+		{
+#ifndef USE_NO_SIMD
+			/*
+			 * SIMD fast path: scan ahead for special characters.
+			 */
 			{
-				const char* end = ptr + strlen(ptr);
-				while (ptr + sizeof(Vector8) <= end) {
-					Vector8 chunk;
-					Vector8 control_mask;
-					Vector8 backslash_mask;
-					Vector8 delim_mask;
-					Vector8 special_mask;
-					uint32 mask;
+				
+				while (ptr + sizeof(Vector8) <= end)
+				{
+					Vector8		chunk;
+					Vector8		control_mask;
+					Vector8		backslash_mask;
+					Vector8		delim_mask;
+					Vector8		special_mask;
+					uint32		mask;
 
 					vector8_load(&chunk, (const uint8 *) ptr);
+					
+					/* Check for control characters (< 0x20) */
 					control_mask = vector8_gt(vector8_broadcast(0x20), chunk);
-					backslash_mask = vector8_eq(vector8_broadcast('\\'), chunk);
-					delim_mask = vector8_eq(vector8_broadcast(delimc), chunk);
+					
+					/* Check for backslash and delimiter */
+					backslash_mask = vector8_eq(chunk, vector8_broadcast('\\'));
+					delim_mask = vector8_eq(chunk, vector8_broadcast(delimc));
 
-					special_mask = vector8_or(control_mask, vector8_or(backslash_mask, delim_mask));
+					/* Combine masks */
+					special_mask = vector8_or(control_mask, 
+											  vector8_or(backslash_mask, delim_mask));
 
 					mask = vector8_highbit_mask(special_mask);
-					if (mask != 0) {
+					if (mask != 0)
+					{
+						/* Found special character */
 						int advance = pg_rightmost_one_pos32(mask);
 						ptr += advance;
 						break;
 					}
 
+					/* No special characters, advance */
 					ptr += sizeof(Vector8);
 				}
-			} 
-		#endif
+				
+				/* Update c after SIMD scan */
+				c = *ptr;
+			}
+#endif /* !USE_NO_SIMD */
+
+			/* Scalar handling - same for SIMD and non-SIMD */
+			if (c == '\0')
+				break;
 
-		while ((c = *ptr) != '\0')
-		{
 			if ((unsigned char) c < (unsigned char) 0x20)
 			{
 				/*
@@ -1489,53 +1536,68 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 		else
 		{
 			const char *tptr = ptr;
+			const char *end = tptr + strlen(tptr);
+			
+			while ((c = *tptr) != '\0') 
+			{
+#ifndef USE_NO_SIMD
+			/*
+			 * SIMD accelerated quote detection.
+			 */
+			{	
+				Vector8		delim_vec;
+				Vector8		quote_vec;
+				Vector8		newline_vec;
+				Vector8		cr_vec;
+				
+				delim_vec = vector8_broadcast(delimc);
+				quote_vec = vector8_broadcast(quotec);
+				newline_vec = vector8_broadcast('\n');
+				cr_vec = vector8_broadcast('\r');
+
+				while (tptr + sizeof(Vector8) <= end)
+				{
+					Vector8		chunk;
+					Vector8		special_mask;
+					uint32		mask;
 
-			#ifndef USE_NO_SIMD
-				{	
-					const char* end = tptr + strlen(tptr);
-
-					Vector8 delim_mask = vector8_broadcast(delimc);
-					Vector8 quote_mask = vector8_broadcast(quotec);
-					Vector8 newline_mask = vector8_broadcast('\n');
-					Vector8 carriage_return_mask = vector8_broadcast('\r');
-
-					while (tptr + sizeof(Vector8) <= end) {
-						Vector8 chunk;
-						Vector8 special_mask;
-						uint32 mask;
-
-						vector8_load(&chunk, (const uint8 *) tptr);
-						special_mask = vector8_or(
-							vector8_or(vector8_eq(chunk, delim_mask),
-									   vector8_eq(chunk, quote_mask)),
-							vector8_or(vector8_eq(chunk, newline_mask),
-									   vector8_eq(chunk, carriage_return_mask))
-						);
-
-						mask = vector8_highbit_mask(special_mask);
-						if (mask != 0) {
-							tptr += pg_rightmost_one_pos32(mask);
-							use_quote = true;
-							break;
-						}
+					vector8_load(&chunk, (const uint8 *) tptr);
+					
+					special_mask = vector8_or(
+						vector8_or(vector8_eq(chunk, delim_vec),
+								   vector8_eq(chunk, quote_vec)),
+						vector8_or(vector8_eq(chunk, newline_vec),
+								   vector8_eq(chunk, cr_vec)));
 
-						tptr += sizeof(Vector8);
+					mask = vector8_highbit_mask(special_mask);
+					if (mask != 0)
+					{
+						tptr += pg_rightmost_one_pos32(mask);
+						use_quote = true;
+						break;
 					}
+
+					tptr += sizeof(Vector8);
 				}
-			#endif
+			}
+#endif /* !USE_NO_SIMD */
 
-			while ((c = *tptr) != '\0')
+			/*
+			 * Scalar scan for remaining bytes (tail after SIMD, or entire
+			 * string if USE_NO_SIMD).
+			 */
+			if ((c = *tptr) != '\0')
 			{
 				if (c == delimc || c == quotec || c == '\n' || c == '\r')
 				{
 					use_quote = true;
-					break;
 				}
 				if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
 					tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
 				else
 					tptr++;
 			}
+			}
 		}
 	}
 
@@ -1548,37 +1610,57 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 		 */
 		start = ptr;
 
-		#ifndef USE_NO_SIMD
-			{	
-				const char* end = ptr + strlen(ptr);
-
-				Vector8 escape_mask = vector8_broadcast(escapec);
-				Vector8 quote_mask = vector8_broadcast(quotec);
+		const char *end = ptr + strlen(ptr);
 
-				while (ptr + sizeof(Vector8) <= end) {
-					Vector8 chunk;
-					Vector8 special_mask;
-					uint32 mask;
+		while ((c = *ptr) != '\0')
+		{
+#ifndef USE_NO_SIMD
+			/*
+			 * SIMD fast path: scan ahead for quote/escape characters.
+			 * Re-enter after handling each special character.
+			 */
+			{	
+				Vector8		escape_vec;
+				Vector8		quote_vec;
+				
+				/* Pre-compute broadcast vectors */
+				escape_vec = vector8_broadcast(escapec);
+				quote_vec = vector8_broadcast(quotec);
+
+				while (ptr + sizeof(Vector8) <= end)
+				{
+					Vector8		chunk;
+					Vector8		special_mask;
+					uint32		mask;
 
 					vector8_load(&chunk, (const uint8 *) ptr);
+					
 					special_mask = vector8_or(
-						vector8_eq(chunk, escape_mask), 
-							vector8_eq(chunk, quote_mask));
+						vector8_eq(chunk, escape_vec), 
+						vector8_eq(chunk, quote_vec));
 
 					mask = vector8_highbit_mask(special_mask);
-					if (mask != 0) {
-						ptr += pg_rightmost_one_pos32(mask);
-						use_quote = true;
+					if (mask != 0)
+					{
+						/* Found special character */
+						int advance = pg_rightmost_one_pos32(mask);
+						ptr += advance;
 						break;
 					}
 
+					/* No special characters in this chunk */
 					ptr += sizeof(Vector8);
 				}
+				
+				/* Update c after SIMD scan */
+				c = *ptr;
 			}
-		#endif
-		
-		while ((c = *ptr) != '\0')
-		{
+#endif /* !USE_NO_SIMD */
+
+			/* Scalar handling - same code for SIMD and non-SIMD builds */
+			if (c == '\0')
+				break;
+
 			if (c == quotec || c == escapec)
 			{
 				DUMPSOFAR();
-- 
2.34.1

From bfc580b17ad5e6d981adc146c24690afe4634ce1 Mon Sep 17 00:00:00 2001
From: AyoubKAZ <[email protected]>
Date: Wed, 24 Dec 2025 12:55:15 +0100
Subject: [PATCH] Speed up COPY TO text CSV using SIMD

---
 src/backend/commands/copyto.c | 126 ++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index dae91630ac3..e1306728509 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -31,6 +31,8 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "port/pg_bitutils.h"
+#include "port/simd.h"
 #include "storage/fd.h"
 #include "tcop/tcopprot.h"
 #include "utils/lsyscache.h"
@@ -1266,6 +1268,36 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 	if (cstate->encoding_embeds_ascii)
 	{
 		start = ptr;
+		#ifndef USE_NO_SIMD
+			{
+				const char* end = ptr + strlen(ptr);
+				while (ptr + sizeof(Vector8) <= end) {
+					Vector8 chunk;
+					Vector8 control_mask;
+					Vector8 backslash_mask;
+					Vector8 delim_mask;
+					Vector8 special_mask;
+					uint32 mask;
+
+					vector8_load(&chunk, (const uint8 *) ptr);
+					control_mask = vector8_gt(vector8_broadcast(0x20), chunk);
+					backslash_mask = vector8_eq(vector8_broadcast('\\'), chunk);
+					delim_mask = vector8_eq(vector8_broadcast(delimc), chunk);
+
+					special_mask = vector8_or(control_mask, vector8_or(backslash_mask, delim_mask));
+
+					mask = vector8_highbit_mask(special_mask);
+					if (mask != 0) {
+						int advance = pg_rightmost_one_pos32(mask);
+						ptr += advance;
+						break;
+					}
+
+					ptr += sizeof(Vector8);
+				}
+			} 
+		#endif
+
 		while ((c = *ptr) != '\0')
 		{
 			if ((unsigned char) c < (unsigned char) 0x20)
@@ -1326,6 +1358,36 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 	else
 	{
 		start = ptr;
+		#ifndef USE_NO_SIMD
+			{
+				const char* end = ptr + strlen(ptr);
+				while (ptr + sizeof(Vector8) <= end) {
+					Vector8 chunk;
+					Vector8 control_mask;
+					Vector8 backslash_mask;
+					Vector8 delim_mask;
+					Vector8 special_mask;
+					uint32 mask;
+
+					vector8_load(&chunk, (const uint8 *) ptr);
+					control_mask = vector8_gt(vector8_broadcast(0x20), chunk);
+					backslash_mask = vector8_eq(vector8_broadcast('\\'), chunk);
+					delim_mask = vector8_eq(vector8_broadcast(delimc), chunk);
+
+					special_mask = vector8_or(control_mask, vector8_or(backslash_mask, delim_mask));
+
+					mask = vector8_highbit_mask(special_mask);
+					if (mask != 0) {
+						int advance = pg_rightmost_one_pos32(mask);
+						ptr += advance;
+						break;
+					}
+
+					ptr += sizeof(Vector8);
+				}
+			} 
+		#endif
+
 		while ((c = *ptr) != '\0')
 		{
 			if ((unsigned char) c < (unsigned char) 0x20)
@@ -1428,6 +1490,40 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 		{
 			const char *tptr = ptr;
 
+			#ifndef USE_NO_SIMD
+				{	
+					const char* end = tptr + strlen(tptr);
+
+					Vector8 delim_mask = vector8_broadcast(delimc);
+					Vector8 quote_mask = vector8_broadcast(quotec);
+					Vector8 newline_mask = vector8_broadcast('\n');
+					Vector8 carriage_return_mask = vector8_broadcast('\r');
+
+					while (tptr + sizeof(Vector8) <= end) {
+						Vector8 chunk;
+						Vector8 special_mask;
+						uint32 mask;
+
+						vector8_load(&chunk, (const uint8 *) tptr);
+						special_mask = vector8_or(
+							vector8_or(vector8_eq(chunk, delim_mask),
+									   vector8_eq(chunk, quote_mask)),
+							vector8_or(vector8_eq(chunk, newline_mask),
+									   vector8_eq(chunk, carriage_return_mask))
+						);
+
+						mask = vector8_highbit_mask(special_mask);
+						if (mask != 0) {
+							tptr += pg_rightmost_one_pos32(mask);
+							use_quote = true;
+							break;
+						}
+
+						tptr += sizeof(Vector8);
+					}
+				}
+			#endif
+
 			while ((c = *tptr) != '\0')
 			{
 				if (c == delimc || c == quotec || c == '\n' || c == '\r')
@@ -1451,6 +1547,36 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 		 * We adopt the same optimization strategy as in CopyAttributeOutText
 		 */
 		start = ptr;
+
+		#ifndef USE_NO_SIMD
+			{	
+				const char* end = ptr + strlen(ptr);
+
+				Vector8 escape_mask = vector8_broadcast(escapec);
+				Vector8 quote_mask = vector8_broadcast(quotec);
+
+				while (ptr + sizeof(Vector8) <= end) {
+					Vector8 chunk;
+					Vector8 special_mask;
+					uint32 mask;
+
+					vector8_load(&chunk, (const uint8 *) ptr);
+					special_mask = vector8_or(
+						vector8_eq(chunk, escape_mask), 
+							vector8_eq(chunk, quote_mask));
+
+					mask = vector8_highbit_mask(special_mask);
+					if (mask != 0) {
+						ptr += pg_rightmost_one_pos32(mask);
+						use_quote = true;
+						break;
+					}
+
+					ptr += sizeof(Vector8);
+				}
+			}
+		#endif
+		
 		while ((c = *ptr) != '\0')
 		{
 			if (c == quotec || c == escapec)
-- 
2.34.1

Reply via email to