Tom Lane wrote:
So it looks like you need to recheck if unescape_single_char sees a
high-bit-set char.
You should take a second look at the COPY code to see if there's a
similar case there --- I forget what it does with backslash followed
by non-digit.
It's covered. Revised patch attached. I'll probably apply this some time
tomorrow.
cheers
andrew
Index: src/backend/commands/copy.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/copy.c,v
retrieving revision 1.286
diff -c -r1.286 copy.c
*** src/backend/commands/copy.c 7 Sep 2007 20:59:26 -0000 1.286
--- src/backend/commands/copy.c 12 Sep 2007 03:21:25 -0000
***************
*** 2685,2690 ****
--- 2685,2691 ----
char *start_ptr;
char *end_ptr;
int input_len;
+ bool saw_high_bit = false;
/* Make sure space remains in fieldvals[] */
if (fieldno >= maxfields)
***************
*** 2749,2754 ****
--- 2750,2757 ----
}
}
c = val & 0377;
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
}
break;
case 'x':
***************
*** 2772,2777 ****
--- 2775,2782 ----
}
}
c = val & 0xff;
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
}
}
break;
***************
*** 2799,2805 ****
* literally
*/
}
! }
/* Add c to output string */
*output_ptr++ = c;
--- 2804,2810 ----
* literally
*/
}
! }
/* Add c to output string */
*output_ptr++ = c;
***************
*** 2808,2813 ****
--- 2813,2828 ----
/* Terminate attribute value in output area */
*output_ptr++ = '\0';
+ /* If we de-escaped a char with the high bit set, make sure
+ * we still have valid data for the db encoding. Avoid calling strlen
+ * here for the sake of efficiency.
+ */
+ if (saw_high_bit)
+ {
+ char *fld = fieldvals[fieldno];
+ pg_verifymbstr(fld, output_ptr - (fld + 1), false);
+ }
+
/* Check whether raw input matched null marker */
input_len = end_ptr - start_ptr;
if (input_len == cstate->null_print_len &&
Index: src/backend/parser/scan.l
===================================================================
RCS file: /cvsroot/pgsql/src/backend/parser/scan.l,v
retrieving revision 1.140
diff -c -r1.140 scan.l
*** src/backend/parser/scan.l 12 Aug 2007 20:18:06 -0000 1.140
--- src/backend/parser/scan.l 12 Sep 2007 03:21:26 -0000
***************
*** 60,65 ****
--- 60,66 ----
bool standard_conforming_strings = false;
static bool warn_on_first_escape;
+ static bool saw_high_bit = false;
/*
* literalbuf is used to accumulate literal values when multiple rules
***************
*** 426,431 ****
--- 427,433 ----
{xqstart} {
warn_on_first_escape = true;
+ saw_high_bit = false;
SET_YYLLOC();
if (standard_conforming_strings)
BEGIN(xq);
***************
*** 435,440 ****
--- 437,443 ----
}
{xestart} {
warn_on_first_escape = false;
+ saw_high_bit = false;
SET_YYLLOC();
BEGIN(xe);
startlit();
***************
*** 443,448 ****
--- 446,453 ----
<xq,xe>{quotefail} {
yyless(1);
BEGIN(INITIAL);
+ if (saw_high_bit)
+ pg_verifymbstr(literalbuf, literallen, false);
yylval.str = litbufdup();
return SCONST;
}
***************
*** 469,486 ****
--- 474,497 ----
}
check_string_escape_warning(yytext[1]);
addlitchar(unescape_single_char(yytext[1]));
+ if (IS_HIGHBIT_SET(literalbuf[literallen]))
+ saw_high_bit = true;
}
<xe>{xeoctesc} {
unsigned char c = strtoul(yytext+1, NULL, 8);
check_escape_warning();
addlitchar(c);
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
}
<xe>{xehexesc} {
unsigned char c = strtoul(yytext+2, NULL, 16);
check_escape_warning();
addlitchar(c);
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
}
<xq,xe>{quotecontinue} {
/* ignore */
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
subscribe-nomail command to [EMAIL PROTECTED] so that your
message can get through to the mailing list cleanly