Attached is a patch to the scanner and the COPY code that checks for invalidly encoded data that can currently leak into our system via \ escapes in quoted literals or text mode copy fields, as recently discussed. That would still leave holes via chr(), convert() and possibly other functions, but these two paths are the biggest holes that need plugging.


cheers

andrew
Index: src/backend/commands/copy.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/copy.c,v
retrieving revision 1.286
diff -c -r1.286 copy.c
*** src/backend/commands/copy.c	7 Sep 2007 20:59:26 -0000	1.286
--- src/backend/commands/copy.c	11 Sep 2007 16:33:38 -0000
***************
*** 2685,2690 ****
--- 2685,2691 ----
  		char	   *start_ptr;
  		char	   *end_ptr;
  		int			input_len;
+ 		bool        saw_high_bit = false;
  
  		/* Make sure space remains in fieldvals[] */
  		if (fieldno >= maxfields)
***************
*** 2749,2754 ****
--- 2750,2757 ----
  								}
  							}
  							c = val & 0377;
+ 							if (IS_HIGHBIT_SET(c))
+ 								saw_high_bit = true;
  						}
  						break;
  					case 'x':
***************
*** 2772,2777 ****
--- 2775,2782 ----
  									}
  								}
  								c = val & 0xff;
+ 								if (IS_HIGHBIT_SET(c))
+ 									saw_high_bit = true;							
  							}
  						}
  						break;
***************
*** 2799,2805 ****
  						 * literally
  						 */
  				}
! 			}
  
  			/* Add c to output string */
  			*output_ptr++ = c;
--- 2804,2810 ----
  						 * literally
  						 */
  				}
! 			}			
  
  			/* Add c to output string */
  			*output_ptr++ = c;
***************
*** 2808,2813 ****
--- 2813,2828 ----
  		/* Terminate attribute value in output area */
  		*output_ptr++ = '\0';
  
+ 		/* If we de-escaped a char with the high bit set, make sure
+ 		 * we still have valid data for the db encoding. Avoid calling strlen 
+ 		 * here for the sake of efficiency.
+ 		 */
+ 		if (saw_high_bit)
+ 		{
+ 			char *fld = fieldvals[fieldno];
+ 			pg_verifymbstr(fld, output_ptr - (fld + 1), false);
+ 		}
+ 
  		/* Check whether raw input matched null marker */
  		input_len = end_ptr - start_ptr;
  		if (input_len == cstate->null_print_len &&
Index: src/backend/parser/scan.l
===================================================================
RCS file: /cvsroot/pgsql/src/backend/parser/scan.l,v
retrieving revision 1.140
diff -c -r1.140 scan.l
*** src/backend/parser/scan.l	12 Aug 2007 20:18:06 -0000	1.140
--- src/backend/parser/scan.l	11 Sep 2007 16:33:38 -0000
***************
*** 443,448 ****
--- 443,449 ----
  <xq,xe>{quotefail} {
  					yyless(1);
  					BEGIN(INITIAL);
+ 					pg_verifymbstr(literalbuf, literallen, false);
  					yylval.str = litbufdup();
  					return SCONST;
  				}
***************
*** 508,513 ****
--- 509,515 ----
  					{
  						pfree(dolqstart);
  						BEGIN(INITIAL);
+ 						pg_verifymbstr(literalbuf, literallen, false);
  						yylval.str = litbufdup();
  						return SCONST;
  					}
***************
*** 545,550 ****
--- 547,553 ----
  					BEGIN(INITIAL);
  					if (literallen == 0)
  						yyerror("zero-length delimited identifier");
+ 					pg_verifymbstr(literalbuf, literallen, false);
  					ident = litbufdup();
  					if (literallen >= NAMEDATALEN)
  						truncate_identifier(ident, literallen, true);
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
       subscribe-nomail command to [EMAIL PROTECTED] so that your
       message can get through to the mailing list cleanly

Reply via email to