Tom Lane wrote:
Andrew Dunstan <[EMAIL PROTECTED]> writes:
Tom Lane wrote:
... But how about
42$foo$
This is a syntax error in 7.4, and we propose to redefine it as an
integer literal '42' followed by a dollar-quote start symbol.
The test should not succeed anywhere in the string '42$foo$'.
No, it won't. The problem is that it should, because the backend will see that as '42' followed by a $foo$ quote start.
Ok, I see what you are saying. This mismatch would only happen on invalid input, though. I believe that what I did will work on all legal input.
I think that this might be cured by having psql recognise a legal identifier or keyword and eating it as a word, rather than treating it as just another set of bytes in the stream. That would enable us to avoid the lookback in the dollar-quote recognition test altogether. The attached patch does it that way - the keyword/id test needs to come right at the end of the loop to avoid clashing with backslash commands, btw.
I *think* that this way psql will recognise the start of a dollar quote iff the backend lexer would.
Interacting with lexer states would probably be ... unpleasant. Matching a stream oriented lexer with a line oriented CLI would be messy I suspect.
I think it would not be that bad. We'd have to run the lexer on the command input buffer and see what state it terminates in.
Yeah. I am not enough of a flex wizard to undertake the task, though. It would take me lots of time. If we make a decision that we really need this in order to do dollar quoting in psql I would need some substantial help, at least.
cheers
andrew
Index: src/bin/psql/mainloop.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/mainloop.c,v
retrieving revision 1.61
diff -c -r1.61 mainloop.c
*** src/bin/psql/mainloop.c 25 Jan 2004 03:07:22 -0000 1.61
--- src/bin/psql/mainloop.c 15 Feb 2004 14:28:02 -0000
***************
*** 21,26 ****
--- 21,61 ----
sigjmp_buf main_loop_jmp;
#endif
+ /*
+ * function to detect a valid $foo$ quote delimiter at the start of the
+ * parameter dquote.
+ */
+
+ static bool valid_dolquote(char * dquote)
+ {
+ int i;
+
+ /* must start with a $ */
+ if (dquote[0] != '$')
+ return false;
+
+ /* empty 'identifier' case */
+ if (dquote[1] == '$')
+ return true;
+
+ /* first 'identifier' char must be a letter or have high bit set */
+ if (!isalpha(dquote[1]) && (dquote[1] & 0x80) == 0)
+ return false;
+
+ /* subsequent chars must be alphanumeric or _ or have high bit set */
+ for (i = 2; dquote[i] != '$'; i++)
+ {
+ if ((dquote[i] & 0x80) == 0 && ! isalnum(dquote[i]) &&
+ dquote[i] != '_')
+ {
+ /* we found an invalid character */
+ return false;
+ }
+ }
+
+ return true;
+ }
+
/*
* Main processing loop for reading lines of input
***************
*** 49,54 ****
--- 84,92 ----
unsigned int query_start;
volatile int count_eof = 0;
volatile unsigned int bslash_count = 0;
+ volatile bool free_dolquote = false;
+ char *dol_quote = NULL;
+
int i,
prevlen,
***************
*** 120,125 ****
--- 158,164 ----
in_quote = 0;
paren_level = 0;
count_eof = 0;
+ free_dolquote = true;
slashCmdStatus = CMD_UNKNOWN;
}
else
***************
*** 136,141 ****
--- 175,190 ----
pqsignal(SIGINT, handle_sigint); /* control-C => cancel
*/
#endif /* not WIN32 */
+ if (free_dolquote)
+ {
+ if(dol_quote)
+ {
+ free(dol_quote);
+ dol_quote = NULL;
+ }
+ free_dolquote = false;
+ }
+
fflush(stdout);
if (slashCmdStatus == CMD_NEWEDIT)
***************
*** 150,155 ****
--- 199,209 ----
in_xcomment = 0;
in_quote = 0;
paren_level = 0;
+ if(dol_quote)
+ {
+ free(dol_quote);
+ dol_quote = NULL;
+ }
slashCmdStatus = CMD_UNKNOWN;
}
***************
*** 161,167 ****
{
int prompt_status;
! if (in_quote && in_quote == '\'')
prompt_status = PROMPT_SINGLEQUOTE;
else if (in_quote && in_quote == '"')
prompt_status = PROMPT_DOUBLEQUOTE;
--- 215,223 ----
{
int prompt_status;
! if (dol_quote)
! prompt_status = PROMPT_DOLLARQUOTE;
! else if (in_quote && in_quote == '\'')
prompt_status = PROMPT_SINGLEQUOTE;
else if (in_quote && in_quote == '"')
prompt_status = PROMPT_DOUBLEQUOTE;
***************
*** 268,273 ****
--- 324,343 ----
in_quote = 0;
}
+ /* in or end of $foo$ type quote? */
+
+ else if (dol_quote)
+ {
+ if (strncmp(line+i,dol_quote,strlen(dol_quote)) == 0)
+ {
+ ADVANCE_1;
+ while(line[i] != '$')
+ ADVANCE_1;
+ free(dol_quote);
+ dol_quote = NULL;
+ }
+ }
+
/* start of extended comment? */
else if (line[i] == '/' && line[i + thislen] == '*')
{
***************
*** 288,293 ****
--- 358,383 ----
else if (line[i] == '\'' || line[i] == '"')
in_quote = line[i];
+ /*
+ * start of $foo$ type quote?
+ */
+ else if (!dol_quote && valid_dolquote(line+i))
+ {
+ char * dol_end;
+ char eos;
+
+ dol_end = strchr(line+i+1,'$');
+ dol_end ++;
+ eos = *dol_end;
+ *dol_end = '\0';
+ dol_quote = pg_strdup(line+i);
+ *dol_end = eos;
+ ADVANCE_1;
+ while(line[i] != '$')
+ ADVANCE_1;
+
+ }
+
/* single-line comment? truncate line */
else if (line[i] == '-' && line[i + thislen] == '-')
{
***************
*** 447,452 ****
--- 537,566 ----
i = end_of_cmd - line;
query_start = i;
}
+
+ /*
+ * keyword or identifier?
+ * We grab the whole string so that we don't
+ * mistakenly see $foo$ inside an identifier as the start
+ * of a dollar quote.
+ */
+
+ else if ( (line[i] & 0x80) != 0 ||
+ isalpha(line[i]) ||
+ line[i] == '_')
+ {
+ while ((line[i+thislen] & 0x80) != 0 ||
+ isalnum(line[i+thislen]) ||
+ line[i+thislen] == '_' ||
+ line[i+thislen] == '$' )
+ {
+ /* keep going while we still have identifier
chars */
+ ADVANCE_1;
+ }
+
+
+ }
+
} /* for (line) */
***************
*** 458,464 ****
/* Put the rest of the line in the query buffer. */
! if (in_quote || line[query_start + strspn(line + query_start, "
\t\n\r")] != '\0')
{
if (query_buf->len > 0)
appendPQExpBufferChar(query_buf, '\n');
--- 572,579 ----
/* Put the rest of the line in the query buffer. */
! if (in_quote || dol_quote ||
! line[query_start + strspn(line + query_start, " \t\n\r")] !=
'\0')
{
if (query_buf->len > 0)
appendPQExpBufferChar(query_buf, '\n');
Index: src/bin/psql/prompt.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.c,v
retrieving revision 1.34
diff -c -r1.34 prompt.c
*** src/bin/psql/prompt.c 25 Jan 2004 03:07:22 -0000 1.34
--- src/bin/psql/prompt.c 15 Feb 2004 14:28:02 -0000
***************
*** 85,90 ****
--- 85,91 ----
case PROMPT_CONTINUE:
case PROMPT_SINGLEQUOTE:
case PROMPT_DOUBLEQUOTE:
+ case PROMPT_DOLLARQUOTE:
case PROMPT_COMMENT:
case PROMPT_PAREN:
prompt_name = "PROMPT2";
***************
*** 198,203 ****
--- 199,207 ----
break;
case PROMPT_DOUBLEQUOTE:
buf[0] = '"';
+ break;
+ case PROMPT_DOLLARQUOTE:
+ buf[0] = '$';
break;
case PROMPT_COMMENT:
buf[0] = '*';
Index: src/bin/psql/prompt.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.h,v
retrieving revision 1.13
diff -c -r1.13 prompt.h
*** src/bin/psql/prompt.h 29 Nov 2003 19:52:07 -0000 1.13
--- src/bin/psql/prompt.h 15 Feb 2004 14:28:02 -0000
***************
*** 15,20 ****
--- 15,21 ----
PROMPT_COMMENT,
PROMPT_SINGLEQUOTE,
PROMPT_DOUBLEQUOTE,
+ PROMPT_DOLLARQUOTE,
PROMPT_PAREN,
PROMPT_COPY
} promptStatus_t;
Index: src/backend/parser/scan.l
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/parser/scan.l,v
retrieving revision 1.112
diff -c -r1.112 scan.l
*** src/backend/parser/scan.l 29 Nov 2003 19:51:52 -0000 1.112
--- src/backend/parser/scan.l 15 Feb 2004 14:28:16 -0000
***************
*** 39,44 ****
--- 39,46 ----
static int xcdepth = 0; /* depth of nesting in slash-star comments */
+ static char *dolqstart; /* current $foo$ quote start string */
+
/*
* literalbuf is used to accumulate literal values when multiple rules
* are needed to parse a single literal. Call startlit to reset buffer
***************
*** 95,100 ****
--- 97,103 ----
* <xd> delimited identifiers (double-quoted identifiers)
* <xh> hexadecimal numeric string
* <xq> quoted strings
+ * <dolq> $foo$-style quoted strings
*/
%x xb
***************
*** 102,107 ****
--- 105,111 ----
%x xd
%x xh
%x xq
+ %x dolq
/* Bit string
* It is tempting to scan the string for only those characters
***************
*** 141,146 ****
--- 145,159 ----
xqoctesc [\\][0-7]{1,3}
xqcat {quote}{whitespace_with_newline}{quote}
+ /* $foo$ style quotes ("dollar quoting")
+ * The quoted string starts with $foo$ where "foo" is an optional string
+ * in the form of an identifier, except that it may not contain "$",
+ * and extends to the first occurrence
+ * of an identical string. There is *no* processing of the quoted text.
+ */
+ dolqdelim \$([A-Za-z\200-\377][A-Za-z\200-\377_0-9]*)?\$
+ dolqinside [^$]+
+
/* Double quote
* Allows embedded spaces and other special characters into identifiers.
*/
***************
*** 387,392 ****
--- 400,434 ----
}
<xq><<EOF>> { yyerror("unterminated quoted string"); }
+ {dolqdelim} {
+ token_start = yytext;
+ dolqstart = pstrdup(yytext);
+ BEGIN(dolq);
+ startlit();
+ }
+ <dolq>{dolqdelim} {
+ if (strcmp(yytext, dolqstart) == 0)
+ {
+ pfree(dolqstart);
+ BEGIN(INITIAL);
+ yylval.str = litbufdup();
+ return SCONST;
+ }
+ /*
+ * When we fail to match $...$ to dolqstart, transfer
+ * the $... part to the output, but put back the final
+ * $ for rescanning. Consider $delim$...$junk$delim$
+ */
+ addlit(yytext, yyleng-1);
+ yyless(yyleng-1);
+ }
+ <dolq>{dolqinside} {
+ addlit(yytext, yyleng);
+ }
+ <dolq>. {
+ addlitchar(yytext[0]);
+ }
+ <dolq><<EOF>> { yyerror("unterminated special-quoted string"); }
{xdstart} {
token_start = yytext;
---------------------------(end of broadcast)--------------------------- TIP 1: subscribe and unsubscribe commands go to [EMAIL PROTECTED]
