Tom Lane wrote:
So it looks like you need to recheck if unescape_single_char sees a high-bit-set char. You should take a second look at the COPY code to see if there's a similar case there --- I forget what it does with backslash followed by non-digit.
It's covered. Revised patch attached. I'll probably apply this some time tomorrow.
cheers andrew
Index: src/backend/commands/copy.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/commands/copy.c,v retrieving revision 1.286 diff -c -r1.286 copy.c *** src/backend/commands/copy.c 7 Sep 2007 20:59:26 -0000 1.286 --- src/backend/commands/copy.c 12 Sep 2007 03:21:25 -0000 *************** *** 2685,2690 **** --- 2685,2691 ---- char *start_ptr; char *end_ptr; int input_len; + bool saw_high_bit = false; /* Make sure space remains in fieldvals[] */ if (fieldno >= maxfields) *************** *** 2749,2754 **** --- 2750,2757 ---- } } c = val & 0377; + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; } break; case 'x': *************** *** 2772,2777 **** --- 2775,2782 ---- } } c = val & 0xff; + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; } } break; *************** *** 2799,2805 **** * literally */ } ! } /* Add c to output string */ *output_ptr++ = c; --- 2804,2810 ---- * literally */ } ! } /* Add c to output string */ *output_ptr++ = c; *************** *** 2808,2813 **** --- 2813,2828 ---- /* Terminate attribute value in output area */ *output_ptr++ = '\0'; + /* If we de-escaped a char with the high bit set, make sure + * we still have valid data for the db encoding. Avoid calling strlen + * here for the sake of efficiency. + */ + if (saw_high_bit) + { + char *fld = fieldvals[fieldno]; + pg_verifymbstr(fld, output_ptr - (fld + 1), false); + } + /* Check whether raw input matched null marker */ input_len = end_ptr - start_ptr; if (input_len == cstate->null_print_len && Index: src/backend/parser/scan.l =================================================================== RCS file: /cvsroot/pgsql/src/backend/parser/scan.l,v retrieving revision 1.140 diff -c -r1.140 scan.l *** src/backend/parser/scan.l 12 Aug 2007 20:18:06 -0000 1.140 --- src/backend/parser/scan.l 12 Sep 2007 03:21:26 -0000 *************** *** 60,65 **** --- 60,66 ---- bool standard_conforming_strings = false; static bool warn_on_first_escape; + static bool saw_high_bit = false; /* * literalbuf is used to accumulate literal values when multiple rules *************** *** 426,431 **** --- 427,433 ---- {xqstart} { warn_on_first_escape = true; + saw_high_bit = false; SET_YYLLOC(); if (standard_conforming_strings) BEGIN(xq); *************** *** 435,440 **** --- 437,443 ---- } {xestart} { warn_on_first_escape = false; + saw_high_bit = false; SET_YYLLOC(); BEGIN(xe); startlit(); *************** *** 443,448 **** --- 446,453 ---- <xq,xe>{quotefail} { yyless(1); BEGIN(INITIAL); + if (saw_high_bit) + pg_verifymbstr(literalbuf, literallen, false); yylval.str = litbufdup(); return SCONST; } *************** *** 469,486 **** --- 474,497 ---- } check_string_escape_warning(yytext[1]); addlitchar(unescape_single_char(yytext[1])); + if (IS_HIGHBIT_SET(literalbuf[literallen])) + saw_high_bit = true; } <xe>{xeoctesc} { unsigned char c = strtoul(yytext+1, NULL, 8); check_escape_warning(); addlitchar(c); + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; } <xe>{xehexesc} { unsigned char c = strtoul(yytext+2, NULL, 16); check_escape_warning(); addlitchar(c); + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; } <xq,xe>{quotecontinue} { /* ignore */
---------------------------(end of broadcast)--------------------------- TIP 1: if posting/reading through Usenet, please send an appropriate subscribe-nomail command to [EMAIL PROTECTED] so that your message can get through to the mailing list cleanly