Tom Lane wrote:
Joe Conway <[EMAIL PROTECTED]> writes:
While looking at it the last day or so, I started to think it might be better to use bison to parse array literals -- or is that a bad idea?

Offhand it doesn't seem like a super-appropriate tool. Once you get past the lexical details like quoting, the syntax of array literals is not complicated enough to need a bison parser. Also, the issues you're facing now like enforcing consistent dimensions are not amenable to solution by a context-free grammar --- so you'd still need most of the dimension-checking mechanisms.

I'm hesitant to apply the attached this late before the beta without review, but it seems to take care of the pathological cases I came up with, doesn't break anything AFAICS, and passes all regression tests. I guess it can go into beta 2.


Joe
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.106
diff -c -r1.106 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c	5 Aug 2004 03:29:37 -0000	1.106
--- src/backend/utils/adt/arrayfuncs.c	5 Aug 2004 05:50:07 -0000
***************
*** 351,368 ****
   *		 The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
! 	int			nest_level = 0,
! 				i;
! 	int			ndim = 1,
! 				temp[MAXDIM],
! 				nelems[MAXDIM],
! 				nelems_last[MAXDIM];
! 	bool		scanning_string = false;
! 	bool		eoArray = false;
! 	char	   *ptr;
  
  	for (i = 0; i < MAXDIM; ++i)
  	{
--- 351,378 ----
   *		 The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
+ typedef enum
+ {
+ 	ARRAY_NO_LEVEL,
+ 	ARRAY_LEVEL_STARTED,
+ 	ARRAY_ELEM_STARTED,
+ 	ARRAY_LEVEL_COMPLETED,
+ 	ARRAY_LEVEL_DELIMITED
+ } ArrayParseState;
+ 
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
! 	int				nest_level = 0,
! 					i;
! 	int				ndim = 1,
! 					temp[MAXDIM],
! 					nelems[MAXDIM],
! 					nelems_last[MAXDIM];
! 	bool			scanning_string = false;
! 	bool			eoArray = false;
! 	char		   *ptr;
! 	ArrayParseState	parse_state = ARRAY_NO_LEVEL;
  
  	for (i = 0; i < MAXDIM; ++i)
  	{
***************
*** 389,394 ****
--- 399,416 ----
  						errmsg("malformed array literal: \"%s\"", str)));
  					break;
  				case '\\':
+ 					/*
+ 					 * An escape must be after a level start, within an
+ 					 * element, or after a delimiter. In any case
+ 					 * we now must be past an element start.
+ 					 */
+ 					if (parse_state != ARRAY_LEVEL_STARTED &&
+ 						parse_state != ARRAY_ELEM_STARTED &&
+ 						parse_state != ARRAY_LEVEL_DELIMITED)
+ 						ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 							errmsg("malformed array literal: \"%s\"", str)));
+ 					parse_state = ARRAY_ELEM_STARTED;
  					/* skip the escaped character */
  					if (*(ptr + 1))
  						ptr++;
***************
*** 398,408 ****
--- 420,454 ----
  						errmsg("malformed array literal: \"%s\"", str)));
  					break;
  				case '\"':
+ 					/*
+ 					 * A quote must be after a level start, within an
+ 					 * element, or after a delimiter. In any case
+ 					 * we now must be past an element start.
+ 					 */
+ 					if (parse_state != ARRAY_LEVEL_STARTED &&
+ 						parse_state != ARRAY_ELEM_STARTED &&
+ 						parse_state != ARRAY_LEVEL_DELIMITED)
+ 						ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 							errmsg("malformed array literal: \"%s\"", str)));
+ 					parse_state = ARRAY_ELEM_STARTED;
  					scanning_string = !scanning_string;
  					break;
  				case '{':
  					if (!scanning_string)
  					{
+ 						/*
+ 						 * A left brace can occur if no nesting has
+ 						 * occurred yet, after a level start, or
+ 						 * after a delimiter.
+ 						 */
+ 						if (parse_state != ARRAY_NO_LEVEL &&
+ 							parse_state != ARRAY_LEVEL_STARTED &&
+ 							parse_state != ARRAY_LEVEL_DELIMITED)
+ 							ereport(ERROR,
+ 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 								errmsg("malformed array literal: \"%s\"", str)));
+ 						parse_state = ARRAY_LEVEL_STARTED;
  						if (nest_level >= MAXDIM)
  							ereport(ERROR,
  								(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
***************
*** 417,422 ****
--- 463,480 ----
  				case '}':
  					if (!scanning_string)
  					{
+ 						/*
+ 						 * A right brace can occur after a level start,
+ 						 * after an element start, or after a level
+ 						 * completion.
+ 						 */
+ 						if (parse_state != ARRAY_LEVEL_STARTED &&
+ 							parse_state != ARRAY_ELEM_STARTED &&
+ 							parse_state != ARRAY_LEVEL_COMPLETED)
+ 							ereport(ERROR,
+ 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 								errmsg("malformed array literal: \"%s\"", str)));
+ 						parse_state = ARRAY_LEVEL_COMPLETED;
  						if (nest_level == 0)
  							ereport(ERROR,
  							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 447,455 ****
--- 505,540 ----
  				default:
  					if (*ptr == typdelim && !scanning_string)
  					{
+ 						/*
+ 						* Delimiters can occur after an element start
+ 						* or after a level completion
+ 						*/
+ 						if (parse_state != ARRAY_ELEM_STARTED &&
+ 							parse_state != ARRAY_LEVEL_COMPLETED)
+ 							ereport(ERROR,
+ 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 								errmsg("malformed array literal: \"%s\"", str)));
+ 						parse_state = ARRAY_LEVEL_DELIMITED;
+ 
  						itemdone = true;
  						nelems[nest_level - 1]++;
  					}
+ 					else if (!isspace(*ptr) && !scanning_string)
+ 					{
+ 						/*
+ 						* Other non-space characters
+ 						* must be after a level start, within an
+ 						* element, or after a delimiter. In any case
+ 						* we now must be past an element start.
+ 						*/
+ 						if (parse_state != ARRAY_LEVEL_STARTED &&
+ 							parse_state != ARRAY_ELEM_STARTED &&
+ 							parse_state != ARRAY_LEVEL_DELIMITED)
+ 							ereport(ERROR,
+ 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 								errmsg("malformed array literal: \"%s\"", str)));
+ 						parse_state = ARRAY_ELEM_STARTED;
+ 					}
  					break;
  			}
  			if (!itemdone)
---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

Reply via email to