Hi, On Thu, 21 Aug 2025 at 18:47, Andrew Dunstan <[email protected]> wrote: > > > On 2025-08-19 Tu 10:14 AM, Nazir Bilal Yavuz wrote: > > Hi, > > > > On Tue, 19 Aug 2025 at 15:33, Nazir Bilal Yavuz <[email protected]> wrote: > >> I am able to reproduce the regression you mentioned but both > >> regressions are %20 on my end. I found that (by experimenting) SIMD > >> causes a regression if it advances less than 5 characters. > >> > >> So, I implemented a small heuristic. It works like that: > >> > >> - If advance < 5 -> insert a sleep penalty (n cycles). > > 'sleep' might be a poor word choice here. I meant skipping SIMD for n > > number of times. > > > > I was thinking a bit about that this morning. I wonder if it might be > better instead of having a constantly applied heuristic like this, it > might be better to do a little extra accounting in the first, say, 1000 > lines of an input file, and if less than some portion of the input is > found to be special characters then switch to the SIMD code. What that > portion should be would need to be determined by some experimentation > with a variety of typical workloads, but given your findings 20% seems > like a good starting point.
I implemented a heuristic something similar to this. It is a mix of previous heuristic and your idea, it works like that: Overall logic is that we will not run SIMD for the entire line and we decide if it is worth it to run SIMD for the next lines. 1 - We will try SIMD and decide if it is worth it to run SIMD. 1.1 - If it is worth it, we will continue to run SIMD and we will halve the simd_last_sleep_cycle variable. 1.2 - If it is not worth it, we will double the simd_last_sleep_cycle and we will not run SIMD for these many lines. 1.3 - After skipping simd_last_sleep_cycle lines, we will go back to the #1. Note: simd_last_sleep_cycle can not pass 1024, so we will run SIMD for each 1024 lines at max. With this heuristic the regression is limited by %2 in the worst case. Patches are attached, the first patch is v2-0001 from Shinya with the '-Werror=maybe-uninitialized' fixes and the pgindent changes. 0002 is the actual heuristic patch. -- Regards, Nazir Bilal Yavuz Microsoft
From 2d2372e90305a81c80fe182003933039bf32f97e Mon Sep 17 00:00:00 2001 From: Shinya Kato <[email protected]> Date: Mon, 28 Jul 2025 22:08:20 +0900 Subject: [PATCH v3 1/2] Speed up COPY FROM text/CSV parsing using SIMD --- src/backend/commands/copyfromparse.c | 73 ++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index b1ae97b833d..99959a40fab 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -71,7 +71,9 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "pgstat.h" +#include "port/pg_bitutils.h" #include "port/pg_bswap.h" +#include "port/simd.h" #include "utils/builtins.h" #include "utils/rel.h" @@ -1255,6 +1257,14 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) char quotec = '\0'; char escapec = '\0'; +#ifndef USE_NO_SIMD + Vector8 nl = vector8_broadcast('\n'); + Vector8 cr = vector8_broadcast('\r'); + Vector8 bs = vector8_broadcast('\\'); + Vector8 quote = vector8_broadcast(0); + Vector8 escape = vector8_broadcast(0); +#endif + if (is_csv) { quotec = cstate->opts.quote[0]; @@ -1262,6 +1272,12 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) /* ignore special escape processing if it's the same as quotec */ if (quotec == escapec) escapec = '\0'; + +#ifndef USE_NO_SIMD + quote = vector8_broadcast(quotec); + if (quotec != escapec) + escape = vector8_broadcast(escapec); +#endif } /* @@ -1328,6 +1344,63 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) need_data = false; } +#ifndef USE_NO_SIMD + + /* + * Use SIMD instructions to efficiently scan the input buffer for + * special characters (e.g., newline, carriage return, quote, and + * escape). This is faster than byte-by-byte iteration, especially on + * large buffers. + * + * We do not apply the SIMD fast path in either of the following + * cases: - When the previously processed character was an escape + * character (last_was_esc), since the next byte must be examined + * sequentially. - The remaining buffer is smaller than one vector + * width (sizeof(Vector8)); SIMD operates on fixed-size chunks. + */ + if (!last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8)) + { + Vector8 chunk; + Vector8 match = vector8_broadcast(0); + uint32 mask; + + /* Load a chunk of data into a vector register */ + vector8_load(&chunk, (const uint8 *) ©_input_buf[input_buf_ptr]); + + /* \n and \r are not special inside quotes */ + if (!in_quote) + match = vector8_or(vector8_eq(chunk, nl), vector8_eq(chunk, cr)); + + if (is_csv) + { + match = vector8_or(match, vector8_eq(chunk, quote)); + if (escapec != '\0') + match = vector8_or(match, vector8_eq(chunk, escape)); + } + else + match = vector8_or(match, vector8_eq(chunk, bs)); + + /* Check if we found any special characters */ + mask = vector8_highbit_mask(match); + if (mask != 0) + { + /* + * Found a special character. Advance up to that point and let + * the scalar code handle it. + */ + int advance = pg_rightmost_one_pos32(mask); + + input_buf_ptr += advance; + } + else + { + /* No special characters found, so skip the entire chunk */ + input_buf_ptr += sizeof(Vector8); + continue; + } + } +#endif + /* OK to fetch a character */ prev_raw_ptr = input_buf_ptr; c = copy_input_buf[input_buf_ptr++]; -- 2.51.0
From ad050583d3c14bdec44266d8d2110b384fa9d7dc Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <[email protected]> Date: Tue, 14 Oct 2025 13:18:13 +0300 Subject: [PATCH v3 2/2] COPY SIMD per-line heuristic --- src/include/commands/copyfrom_internal.h | 7 ++ src/backend/commands/copyfrom.c | 6 ++ src/backend/commands/copyfromparse.c | 82 ++++++++++++++++++++++-- 3 files changed, 89 insertions(+), 6 deletions(-) diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index c8b22af22d8..9dd31320f52 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -89,6 +89,13 @@ typedef struct CopyFromStateData const char *cur_attval; /* current att value for error messages */ bool relname_only; /* don't output line number, att, etc. */ + /* SIMD variables */ + bool simd_continue; + bool simd_initialized; + uint16 simd_last_sleep_cycle; + uint16 simd_current_sleep_cycle; + + /* * Working state */ diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 12781963b4f..4bdfd96c244 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1721,6 +1721,12 @@ BeginCopyFrom(ParseState *pstate, cstate->cur_attval = NULL; cstate->relname_only = false; + /* Initialize SIMD variables */ + cstate->simd_continue = false; + cstate->simd_initialized = false; + cstate->simd_current_sleep_cycle = 0; + cstate->simd_last_sleep_cycle = 0; + /* * Allocate buffers for the input pipeline. * diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 99959a40fab..24cef54e5e4 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -143,12 +143,14 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"; /* non-export function prototypes */ static bool CopyReadLine(CopyFromState cstate, bool is_csv); -static bool CopyReadLineText(CopyFromState cstate, bool is_csv); static int CopyReadAttributesText(CopyFromState cstate); static int CopyReadAttributesCSV(CopyFromState cstate); static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull); +static pg_attribute_always_inline bool CopyReadLineText(CopyFromState cstate, + bool is_csv, + bool simd_continue); static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values, @@ -1173,8 +1175,23 @@ CopyReadLine(CopyFromState cstate, bool is_csv) resetStringInfo(&cstate->line_buf); cstate->line_buf_valid = false; - /* Parse data and transfer into line_buf */ - result = CopyReadLineText(cstate, is_csv); + /* If that is the first time we do read, initalize the SIMD */ + if (unlikely(!cstate->simd_initialized)) + { + cstate->simd_initialized = true; + cstate->simd_continue = true; + cstate->simd_current_sleep_cycle = 0; + cstate->simd_last_sleep_cycle = 0; + } + + /* + * Parse data and transfer into line_buf. To get benefit from inlining, + * call CopyReadLineText() with the constant boolean variables. + */ + if (cstate->simd_continue) + result = CopyReadLineText(cstate, is_csv, true); + else + result = CopyReadLineText(cstate, is_csv, false); if (result) { @@ -1241,8 +1258,8 @@ CopyReadLine(CopyFromState cstate, bool is_csv) /* * CopyReadLineText - inner loop of CopyReadLine for text mode */ -static bool -CopyReadLineText(CopyFromState cstate, bool is_csv) +static pg_attribute_always_inline bool +CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_continue) { char *copy_input_buf; int input_buf_ptr; @@ -1258,11 +1275,16 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) char escapec = '\0'; #ifndef USE_NO_SIMD +#define SIMD_SLEEP_MAX 1024 +#define SIMD_ADVANCE_AT_LEAST 5 Vector8 nl = vector8_broadcast('\n'); Vector8 cr = vector8_broadcast('\r'); Vector8 bs = vector8_broadcast('\\'); Vector8 quote = vector8_broadcast(0); Vector8 escape = vector8_broadcast(0); + + uint64 simd_total_cycle = 0; + uint64 simd_total_advance = 0; #endif if (is_csv) @@ -1358,12 +1380,14 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) * sequentially. - The remaining buffer is smaller than one vector * width (sizeof(Vector8)); SIMD operates on fixed-size chunks. */ - if (!last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8)) + if (simd_continue && !last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8)) { Vector8 chunk; Vector8 match = vector8_broadcast(0); uint32 mask; + simd_total_cycle++; + /* Load a chunk of data into a vector register */ vector8_load(&chunk, (const uint8 *) ©_input_buf[input_buf_ptr]); @@ -1391,11 +1415,13 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) int advance = pg_rightmost_one_pos32(mask); input_buf_ptr += advance; + simd_total_advance += advance; } else { /* No special characters found, so skip the entire chunk */ input_buf_ptr += sizeof(Vector8); + simd_total_advance += sizeof(Vector8); continue; } } @@ -1603,6 +1629,50 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) } } /* end of outer loop */ +#ifndef USE_NO_SIMD + + /* SIMD was enabled */ + if (simd_continue) + { + /* SIMD is worth */ + if (simd_total_cycle && simd_total_advance / simd_total_cycle >= SIMD_ADVANCE_AT_LEAST) + { + Assert(cstate->simd_current_sleep_cycle == 0); + cstate->simd_last_sleep_cycle >>= 1; + } + /* SIMD was enabled but it isn't worth */ + else + { + uint16 simd_last_sleep_cycle = cstate->simd_last_sleep_cycle; + + cstate->simd_continue = false; + + if (simd_last_sleep_cycle == 0) + simd_last_sleep_cycle = 1; + else if (simd_last_sleep_cycle >= SIMD_SLEEP_MAX / 2) + simd_last_sleep_cycle = SIMD_SLEEP_MAX; + else + simd_last_sleep_cycle <<= 1; + cstate->simd_current_sleep_cycle = simd_last_sleep_cycle; + cstate->simd_last_sleep_cycle = simd_last_sleep_cycle; + } + } + /* SIMD was disabled */ + else + { + /* + * We should come here with decrementing + * cstate->simd_current_sleep_cycle from a positive number. + */ + Assert(cstate->simd_current_sleep_cycle != 0); + cstate->simd_current_sleep_cycle--; + + if (cstate->simd_current_sleep_cycle == 0) + cstate->simd_continue = true; + } + +#endif + /* * Transfer any still-uncopied data to line_buf. */ -- 2.51.0
