Tom Lane wrote:

So it looks like you need to recheck if unescape_single_char sees a
high-bit-set char.

You should take a second look at the COPY code to see if there's a
similar case there --- I forget what it does with backslash followed
by non-digit.


It's covered. Revised patch attached. I'll probably apply this some time tomorrow.

cheers

andrew
Index: src/backend/commands/copy.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/copy.c,v
retrieving revision 1.286
diff -c -r1.286 copy.c
*** src/backend/commands/copy.c	7 Sep 2007 20:59:26 -0000	1.286
--- src/backend/commands/copy.c	12 Sep 2007 03:21:25 -0000
***************
*** 2685,2690 ****
--- 2685,2691 ----
  		char	   *start_ptr;
  		char	   *end_ptr;
  		int			input_len;
+ 		bool        saw_high_bit = false;
  
  		/* Make sure space remains in fieldvals[] */
  		if (fieldno >= maxfields)
***************
*** 2749,2754 ****
--- 2750,2757 ----
  								}
  							}
  							c = val & 0377;
+ 							if (IS_HIGHBIT_SET(c))
+ 								saw_high_bit = true;
  						}
  						break;
  					case 'x':
***************
*** 2772,2777 ****
--- 2775,2782 ----
  									}
  								}
  								c = val & 0xff;
+ 								if (IS_HIGHBIT_SET(c))
+ 									saw_high_bit = true;							
  							}
  						}
  						break;
***************
*** 2799,2805 ****
  						 * literally
  						 */
  				}
! 			}
  
  			/* Add c to output string */
  			*output_ptr++ = c;
--- 2804,2810 ----
  						 * literally
  						 */
  				}
! 			}			
  
  			/* Add c to output string */
  			*output_ptr++ = c;
***************
*** 2808,2813 ****
--- 2813,2828 ----
  		/* Terminate attribute value in output area */
  		*output_ptr++ = '\0';
  
+ 		/* If we de-escaped a char with the high bit set, make sure
+ 		 * we still have valid data for the db encoding. Avoid calling strlen 
+ 		 * here for the sake of efficiency.
+ 		 */
+ 		if (saw_high_bit)
+ 		{
+ 			char *fld = fieldvals[fieldno];
+ 			pg_verifymbstr(fld, output_ptr - (fld + 1), false);
+ 		}
+ 
  		/* Check whether raw input matched null marker */
  		input_len = end_ptr - start_ptr;
  		if (input_len == cstate->null_print_len &&
Index: src/backend/parser/scan.l
===================================================================
RCS file: /cvsroot/pgsql/src/backend/parser/scan.l,v
retrieving revision 1.140
diff -c -r1.140 scan.l
*** src/backend/parser/scan.l	12 Aug 2007 20:18:06 -0000	1.140
--- src/backend/parser/scan.l	12 Sep 2007 03:21:26 -0000
***************
*** 60,65 ****
--- 60,66 ----
  bool			standard_conforming_strings = false;
  
  static bool		warn_on_first_escape;
+ static bool     saw_high_bit = false;
  
  /*
   * literalbuf is used to accumulate literal values when multiple rules
***************
*** 426,431 ****
--- 427,433 ----
  
  {xqstart}		{
  					warn_on_first_escape = true;
+ 					saw_high_bit = false;
  					SET_YYLLOC();
  					if (standard_conforming_strings)
  						BEGIN(xq);
***************
*** 435,440 ****
--- 437,443 ----
  				}
  {xestart}		{
  					warn_on_first_escape = false;
+ 					saw_high_bit = false;
  					SET_YYLLOC();
  					BEGIN(xe);
  					startlit();
***************
*** 443,448 ****
--- 446,453 ----
  <xq,xe>{quotefail} {
  					yyless(1);
  					BEGIN(INITIAL);
+ 					if (saw_high_bit)
+ 						pg_verifymbstr(literalbuf, literallen, false);
  					yylval.str = litbufdup();
  					return SCONST;
  				}
***************
*** 469,486 ****
--- 474,497 ----
  					}
  					check_string_escape_warning(yytext[1]);
  					addlitchar(unescape_single_char(yytext[1]));
+ 					if (IS_HIGHBIT_SET(literalbuf[literallen]))
+ 						saw_high_bit = true;
  				}
  <xe>{xeoctesc}  {
  					unsigned char c = strtoul(yytext+1, NULL, 8);
  
  					check_escape_warning();
  					addlitchar(c);
+ 					if (IS_HIGHBIT_SET(c))
+ 						saw_high_bit = true;
  				}
  <xe>{xehexesc}  {
  					unsigned char c = strtoul(yytext+2, NULL, 16);
  
  					check_escape_warning();
  					addlitchar(c);
+ 					if (IS_HIGHBIT_SET(c))
+ 						saw_high_bit = true;
  				}
  <xq,xe>{quotecontinue} {
  					/* ignore */
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
       subscribe-nomail command to [EMAIL PROTECTED] so that your
       message can get through to the mailing list cleanly

Reply via email to