Joe Conway <[EMAIL PROTECTED]> writes:While looking at it the last day or so, I started to think it might be better to use bison to parse array literals -- or is that a bad idea?
Offhand it doesn't seem like a super-appropriate tool. Once you get past the lexical details like quoting, the syntax of array literals is not complicated enough to need a bison parser. Also, the issues you're facing now like enforcing consistent dimensions are not amenable to solution by a context-free grammar --- so you'd still need most of the dimension-checking mechanisms.
I'm hesitant to apply the attached this late before the beta without review, but it seems to take care of the pathological cases I came up with, doesn't break anything AFAICS, and passes all regression tests. I guess it can go into beta 2.
Joe
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.106
diff -c -r1.106 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c 5 Aug 2004 03:29:37 -0000 1.106
--- src/backend/utils/adt/arrayfuncs.c 5 Aug 2004 05:50:07 -0000
***************
*** 351,368 ****
* The syntax for array input is C-like nested curly braces
*-----------------------------------------------------------------------------
*/
static int
ArrayCount(char *str, int *dim, char typdelim)
{
! int nest_level = 0,
! i;
! int ndim = 1,
! temp[MAXDIM],
! nelems[MAXDIM],
! nelems_last[MAXDIM];
! bool scanning_string = false;
! bool eoArray = false;
! char *ptr;
for (i = 0; i < MAXDIM; ++i)
{
--- 351,378 ----
* The syntax for array input is C-like nested curly braces
*-----------------------------------------------------------------------------
*/
+ typedef enum
+ {
+ ARRAY_NO_LEVEL,
+ ARRAY_LEVEL_STARTED,
+ ARRAY_ELEM_STARTED,
+ ARRAY_LEVEL_COMPLETED,
+ ARRAY_LEVEL_DELIMITED
+ } ArrayParseState;
+
static int
ArrayCount(char *str, int *dim, char typdelim)
{
! int nest_level = 0,
! i;
! int ndim = 1,
! temp[MAXDIM],
! nelems[MAXDIM],
! nelems_last[MAXDIM];
! bool scanning_string = false;
! bool eoArray = false;
! char *ptr;
! ArrayParseState parse_state = ARRAY_NO_LEVEL;
for (i = 0; i < MAXDIM; ++i)
{
***************
*** 389,394 ****
--- 399,416 ----
errmsg("malformed array literal: \"%s\"", str)));
break;
case '\\':
+ /*
+ * An escape must be after a level start, within an
+ * element, or after a delimiter. In any case
+ * we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_ELEM_STARTED;
/* skip the escaped character */
if (*(ptr + 1))
ptr++;
***************
*** 398,408 ****
--- 420,454 ----
errmsg("malformed array literal: \"%s\"", str)));
break;
case '\"':
+ /*
+ * A quote must be after a level start, within an
+ * element, or after a delimiter. In any case
+ * we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_ELEM_STARTED;
scanning_string = !scanning_string;
break;
case '{':
if (!scanning_string)
{
+ /*
+ * A left brace can occur if no nesting has
+ * occurred yet, after a level start, or
+ * after a delimiter.
+ */
+ if (parse_state != ARRAY_NO_LEVEL &&
+ parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_LEVEL_STARTED;
if (nest_level >= MAXDIM)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
***************
*** 417,422 ****
--- 463,480 ----
case '}':
if (!scanning_string)
{
+ /*
+ * A right brace can occur after a level start,
+ * after an element start, or after a level
+ * completion.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_COMPLETED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_LEVEL_COMPLETED;
if (nest_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 447,455 ****
--- 505,540 ----
default:
if (*ptr == typdelim && !scanning_string)
{
+ /*
+ * Delimiters can occur after an element start
+ * or after a level completion
+ */
+ if (parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_COMPLETED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_LEVEL_DELIMITED;
+
itemdone = true;
nelems[nest_level - 1]++;
}
+ else if (!isspace(*ptr) && !scanning_string)
+ {
+ /*
+ * Other non-space characters
+ * must be after a level start, within an
+ * element, or after a delimiter. In any case
+ * we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_ELEM_STARTED;
+ }
break;
}
if (!itemdone)
---------------------------(end of broadcast)--------------------------- TIP 4: Don't 'kill -9' the postmaster
