Heikki Linnakangas wrote:
Attached is a patch that modifies CopyReadLineText so that it uses
memchr to speed up the scan. The nice thing about memchr is that we can
take advantage of any clever optimizations that might be in libc or
compiler.
Here's an updated version of the patch. The principle is the same, but
the same optimization is now used for CSV input as well, and there's
more comments.
I still need to do more benchmarking. I mentioned a ~5% speedup on the
test I ran earlier, which was a load of the lineitem table from TPC-H.
It looks like with cheaper data types the gain can be much bigger;
here's an oprofile from loading the TPC-H partsupp table,
Before:
samples % image name symbol name
5146 25.7635 postgres CopyReadLine
4089 20.4716 postgres DoCopy
1449 7.2544 reiserfs (no symbols)
1369 6.8539 postgres pg_verify_mbstr_len
1013 5.0716 libc-2.7.so memcpy
749 3.7499 libc-2.7.so ____strtod_l_internal
598 2.9939 postgres heap_formtuple
548 2.7436 libc-2.7.so ____strtol_l_internal
403 2.0176 libc-2.7.so memset
309 1.5470 libc-2.7.so strlen
208 1.0414 postgres AllocSetAlloc
...
After:
samples % image name symbol name
4165 25.7879 postgres DoCopy
1574 9.7455 postgres pg_verify_mbstr_len
1520 9.4112 reiserfs (no symbols)
1005 6.2225 libc-2.7.so memchr
986 6.1049 libc-2.7.so memcpy
632 3.9131 libc-2.7.so ____strtod_l_internal
589 3.6468 postgres heap_formtuple
546 3.3806 libc-2.7.so ____strtol_l_internal
386 2.3899 libc-2.7.so memset
366 2.2661 postgres CopyReadLine
287 1.7770 libc-2.7.so strlen
215 1.3312 postgres LWLockAcquire
208 1.2878 postgres hash_any
176 1.0897 postgres LWLockRelease
161 0.9968 postgres InputFunctionCall
157 0.9721 postgres AllocSetAlloc
...
Profile shows that with the patch, ~8.5% of the CPU time is spent in
CopyReadLine+memchr, vs. 25.5% before. That's a quite significant speedup.
I still need to test the worst-case performance, with input that has a
lot of escapes. It would be interesting to hear reports with this patch
from people on different platforms. These results are from my laptop
with 32-bit Intel CPU, running Linux. There could be big differences in
the memchr implementations.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
Index: src/backend/commands/copy.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/commands/copy.c,v
retrieving revision 1.295
diff -c -r1.295 copy.c
*** src/backend/commands/copy.c 1 Jan 2008 19:45:48 -0000 1.295
--- src/backend/commands/copy.c 29 Feb 2008 17:43:53 -0000
***************
*** 67,72 ****
--- 67,275 ----
EOL_CRNL
} EolType;
+
+ /***** Fast byte searcher functions *****
+ *
+ * CopyReadLine needs to search for newlines, as well as quoting and escape
+ * characters to determine which newlines are real and which ones are escaped.
+ * Doing that in the naive way, looping through the input byte by byte and
+ * comparing against the interesting characters, can be slow.
+ *
+ * To speed that up, we provide a "searcher" interface, that can be used to
+ * search a piece of memory for up to 4 different bytes simultaneously. It's
+ * similar to memchr, but allows searching for multiple chars at the same
+ * time.
+ *
+ * To start a search on a new block of memory, call init_searcher. Then call
+ * next_searcher repeatedly to return all the occurrances of the bytes being
+ * searched for.
+ *
+ * The searcher implementation uses memchr to search for the bytes, and keeps
+ * track of where the next occurrance of each is. Using memchr allows us
+ * to take advantage of any platform-specific optimizations.
+ */
+
+ /*
+ * Struct used to store state of the current search between calls to
+ * next_searcher. Initialized by init_search.
+ */
+ typedef struct {
+ /* Each element in c corresponds the element in s. These are sorted
+ * by the pointers (ptr) */
+ int n; /* elements used in the arrays */
+ char c[4]; /* bytes we're looking for */
+ char *ptr[4]; /* pointers to next occurances of the bytes */
+ char *end; /* end of block being searched. Last byte to search is end-1 */
+ } searcher;
+
+ /* Functions internal to "searcher" */
+
+ #define swap_searcher_entries(searcher, a, b) do { \
+ char tmpc = (searcher)->c[a]; \
+ char *tmpptr = (searcher)->ptr[a]; \
+ (searcher)->c[a] = (searcher)->c[b]; \
+ (searcher)->ptr[a] = (searcher)->ptr[b]; \
+ (searcher)->c[b] = tmpc; \
+ (searcher)->ptr[b] = tmpptr; \
+ } while(0)
+
+ static void
+ sort_searcher(searcher *s)
+ {
+ switch(s->n)
+ {
+ case 0:
+ case 1:
+ /* Nothing to do */
+ break;
+ case 2:
+ if (s->ptr[0] > s->ptr[1])
+ swap_searcher_entries(s, 0, 1);
+ break;
+ case 3:
+ if (s->ptr[0] > s->ptr[2])
+ swap_searcher_entries(s, 0, 2);
+ if (s->ptr[0] > s->ptr[1])
+ swap_searcher_entries(s, 0, 1);
+ if (s->ptr[1] > s->ptr[2])
+ swap_searcher_entries(s, 1, 2);
+ break;
+ case 4:
+ if (s->ptr[0] > s->ptr[1])
+ swap_searcher_entries(s, 0, 1);
+ if (s->ptr[2] > s->ptr[3])
+ swap_searcher_entries(s, 2, 3);
+ if (s->ptr[1] > s->ptr[2])
+ swap_searcher_entries(s, 2, 3);
+ if (s->ptr[0] > s->ptr[1])
+ swap_searcher_entries(s, 0, 1);
+ if (s->ptr[2] > s->ptr[3])
+ swap_searcher_entries(s, 2, 3);
+ break;
+ }
+ }
+
+ /* Remove the topmost element */
+ static void
+ remove_top(searcher *searcher)
+ {
+ int i;
+
+ searcher->n--;
+
+ /* Move up the remaining items */
+ for (i = 0; i < searcher->n; i++)
+ {
+ searcher->c[i] = searcher->c[i + 1];
+ searcher->ptr[i] = searcher->ptr[i + 1];
+ }
+ }
+
+ /*
+ * The first element in the list has been replaced by the caller.
+ * Move it to the right place in the list, to keep it sorted.
+ */
+ static void
+ sift_down(searcher *searcher)
+ {
+ if (searcher->n < 2 || searcher->ptr[0] <= searcher->ptr[1])
+ return;
+ swap_searcher_entries(searcher, 0, 1);
+
+ if (searcher->n < 3 || searcher->ptr[1] <= searcher->ptr[2])
+ return;
+ swap_searcher_entries(searcher, 1, 2);
+
+ if (searcher->n < 4 || searcher->ptr[2] <= searcher->ptr[3])
+ return;
+ swap_searcher_entries(searcher, 2, 3);
+ }
+
+ /*
+ * Starts a new search in a block of memory.
+ *
+ * searcher - for storing state between calls. Opaque to caller,
+ * init_searcher will initialize this.
+ * str - block of memory to search
+ * len - length of str
+ * c - bytes to search for, max 4.
+ * n - number of bytes in c
+ */
+ static void
+ init_searcher(searcher *searcher, char *str, int len, char c[4], int n)
+ {
+ int i;
+
+ Assert(n <= 4);
+
+ searcher->n = 0;
+ searcher->end = str + len;
+
+ /* Find first occurances of the searched-for bytes */
+ for (i = 0; i < n; i++)
+ {
+ char *hit = memchr(str, c[i], len);
+ if (hit != NULL)
+ {
+ searcher->c[searcher->n] = c[i];
+ searcher->ptr[searcher->n] = hit;
+ searcher->n++;
+ }
+ /*
+ * If the byte doesn't occur in the string at all, we don't
+ * need to put it in the list.
+ */
+ }
+ sort_searcher(searcher);
+ }
+
+ /*
+ * Look for the next interesting character
+ *
+ * searcher - state, opaque to caller
+ * str - block to search.
+ *
+ * 'str' must be part of the block passed to init_searcher, and >=
+ * the return value of last call to next_searcher.
+ *
+ * Returns the offset of the next interesting location, relative to 'ss'
+ */
+ static char *
+ next_searcher(searcher *searcher, char *str)
+ {
+ Assert(str < searcher->end);
+
+ if (searcher->n == 0)
+ return NULL;
+
+ /*
+ * If the caller skipped over the next pointer we had memorized, we have to
+ * search for the one after that.
+ */
+ while (str > searcher->ptr[0])
+ {
+ char *s = memchr(str, searcher->c[0], searcher->end - str);
+
+ if (s == NULL)
+ {
+ remove_top(searcher);
+ if (searcher->n == 0)
+ return NULL;
+ }
+ else
+ {
+ searcher->ptr[0] = s;
+ sift_down(searcher);
+ }
+ }
+
+ /*
+ * Return the next interesting location we had memorized.
+ */
+ return searcher->ptr[0];
+ }
+
+
/*
* This struct contains all the state variables used throughout a COPY
* operation. For simplicity, we use the same struct for all variants of COPY,
***************
*** 159,164 ****
--- 362,369 ----
char *raw_buf;
int raw_buf_index; /* next byte to process */
int raw_buf_len; /* total # of bytes stored */
+
+ searcher searcher;
} CopyStateData;
typedef CopyStateData *CopyState;
***************
*** 2285,2299 ****
--- 2490,2516 ----
last_was_esc = false;
char quotec = '\0';
char escapec = '\0';
+ char search_chars[4];
+ int nsearch_chars = 0;
+
+ search_chars[nsearch_chars++] = '\n';
+ search_chars[nsearch_chars++] = '\r';
if (cstate->csv_mode)
{
quotec = cstate->quote[0];
escapec = cstate->escape[0];
+
+ search_chars[nsearch_chars++] = quotec;
+
/* ignore special escape processing if it's the same as quotec */
if (quotec == escapec)
escapec = '\0';
+ else
+ search_chars[nsearch_chars++] = escapec;
}
+ else
+ search_chars[nsearch_chars++] = '\\';
mblen_str[1] = '\0';
***************
*** 2360,2368 ****
--- 2577,2601 ----
break;
}
need_data = false;
+
+ init_searcher(&cstate->searcher, copy_raw_buf, copy_buf_len,
+ search_chars, nsearch_chars);
}
/* OK to fetch a character */
+ if ((!cstate->csv_mode || !first_char_in_line)
+ && (raw_buf_ptr + 4 <= copy_buf_len))
+ {
+ char *next = next_searcher(&cstate->searcher,
+ ©_raw_buf[raw_buf_ptr]);
+
+ if (next != NULL)
+ {
+ raw_buf_ptr = next - copy_raw_buf;
+ first_char_in_line = false;
+ }
+ }
+
prev_raw_ptr = raw_buf_ptr;
c = copy_raw_buf[raw_buf_ptr++];
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?
http://archives.postgresql.org