I think the attached patch addresses Tom's comments.


I ended up not using a regex, which seemed to be a little heavy handed, but just writing a small custom recognition function, that should (and I think does) mimic the pattern recognition for these tokens used by the backend lexer. This patch just puts that function in mainloop.c, but perhaps it belongs elsewhere (string_utils.c maybe?). I don't have strong opinions on that.

Enjoy

andrew



Tom Lane wrote:

Andrew Dunstan <[EMAIL PROTECTED]> writes:


Comments welcome. Reviewers: I am not sure I got multi-byte stuff right in psql/mainloop.c - please pay close attention to that.



The i-1 stuff should generally be i-prevlen. Not sure if there are any other pitfalls.

A bigger problem here:



+ else if (!dol_quote && line[i] == '$' && + !isdigit(line[i + thislen]) && + (dol_end = strchr(line+i+1,'$')) != NULL &&
+ (i == 0 || + ! ((line[i-1] & 0x80) != 0 || isalnum(line[i-1]) || + line[i-1] == '_')))
+ {



is that you aren't checking that what comes between the two dollar signs looks like empty-or-an-identifier. The check for next-char-isn't-a-digit is part of that but not the only part.

Also I'm not sure about the positioning of these tests relative to the
in_quote and in_xcomment tests.  As you have it, $foo$ will be
recognized within an xcomment, which I think is at variance with the
proposed backend lexing behavior.

Also, the strdup should be pg_strdup.

regards, tom lane

---------------------------(end of broadcast)---------------------------
TIP 1: subscribe and unsubscribe commands go to [EMAIL PROTECTED]



Index: src/backend/parser/scan.l
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/parser/scan.l,v
retrieving revision 1.112
diff -c -w -r1.112 scan.l
*** src/backend/parser/scan.l   29 Nov 2003 19:51:52 -0000      1.112
--- src/backend/parser/scan.l   9 Feb 2004 15:26:34 -0000
***************
*** 39,44 ****
--- 39,46 ----
  
  static int            xcdepth = 0;    /* depth of nesting in slash-star comments */
  
+ static char    *dolqstart;  /* current $foo$ quote start string */
+ 
  /*
   * literalbuf is used to accumulate literal values when multiple rules
   * are needed to parse a single literal.  Call startlit to reset buffer
***************
*** 95,100 ****
--- 97,103 ----
   *  <xd> delimited identifiers (double-quoted identifiers)
   *  <xh> hexadecimal numeric string
   *  <xq> quoted strings
+  *  <dolq> $foo$-style quoted strings
   */
  
  %x xb
***************
*** 102,107 ****
--- 105,111 ----
  %x xd
  %x xh
  %x xq
+ %x dolq
  
  /* Bit string
   * It is tempting to scan the string for only those characters
***************
*** 141,146 ****
--- 145,159 ----
  xqoctesc              [\\][0-7]{1,3}
  xqcat                 {quote}{whitespace_with_newline}{quote}
  
+ /* $foo$ style quotes ("dollar quoting")
+  * The quoted string starts with $foo$ where "foo" is an optional string
+  * in the form of an identifier, except that it may not contain "$", 
+  * and extends to the first occurrence
+  * of an identical string.  There is *no* processing of the quoted text.
+  */
+ dolqdelim   \$([A-Za-z\200-\377][A-Za-z\200-\377_0-9]*)?\$
+ dolqinside  [^$]+
+ 
  /* Double quote
   * Allows embedded spaces and other special characters into identifiers.
   */
***************
*** 387,392 ****
--- 400,434 ----
                                }
  <xq><<EOF>>           { yyerror("unterminated quoted string"); }
  
+ {dolqdelim}  {
+      token_start = yytext;
+      dolqstart = pstrdup(yytext);
+      BEGIN(dolq);
+      startlit();
+     }
+ <dolq>{dolqdelim} {
+      if (strcmp(yytext, dolqstart) == 0)
+      {
+       pfree(dolqstart);
+       BEGIN(INITIAL);
+       yylval.str = litbufdup();
+       return SCONST;
+      }
+      /*
+       * When we fail to match $...$ to dolqstart, transfer
+       * the $... part to the output, but put back the final
+       * $ for rescanning.  Consider $delim$...$junk$delim$
+       */
+      addlit(yytext, yyleng-1);
+      yyless(yyleng-1);
+     }
+ <dolq>{dolqinside}  {
+      addlit(yytext, yyleng);
+     }
+ <dolq>.           {
+      addlitchar(yytext[0]);
+     }
+ <dolq><<EOF>>  { yyerror("unterminated special-quoted string"); }
  
  {xdstart}             {
                                        token_start = yytext;
Index: src/bin/psql/mainloop.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/mainloop.c,v
retrieving revision 1.61
diff -c -w -r1.61 mainloop.c
*** src/bin/psql/mainloop.c     25 Jan 2004 03:07:22 -0000      1.61
--- src/bin/psql/mainloop.c     9 Feb 2004 15:26:51 -0000
***************
*** 21,26 ****
--- 21,61 ----
  sigjmp_buf    main_loop_jmp;
  #endif
  
+ /*
+  * function to detect a valid $foo$ quote delimiter at the start of the
+  * parameter dquote.
+  */
+ 
+ static bool valid_dolquote(char * dquote)
+ {
+       int i;
+ 
+       /* must start with a $ */
+       if (dquote[0] != '$')
+               return false;
+ 
+       /* empty 'identifier' case */
+       if (dquote[1] == '$')
+               return true;
+ 
+       /* first 'identifier' char must be a letter or have high bit set */
+       if (!isalpha(dquote[1]) && (dquote[1] & 0x80) == 0)
+               return false;
+ 
+       /* subsequent chars must be alphanumeric or _ or have high bit set */
+       for (i = 2; dquote[i] != '$'; i++)
+       {
+               if ((dquote[i] & 0x80) == 0 && ! isalnum(dquote[i]) &&
+                       dquote[i] != '_')
+               {
+                       /* we found an invalid character */
+                       return false;
+               }
+       }
+ 
+       return true;
+ }
+ 
  
  /*
   * Main processing loop for reading lines of input
***************
*** 49,54 ****
--- 84,92 ----
        unsigned int query_start;
        volatile int count_eof = 0;
        volatile unsigned int bslash_count = 0;
+       volatile bool free_dolquote = false;
+       char *dol_quote = NULL;
+ 
  
        int                     i,
                                prevlen,
***************
*** 120,125 ****
--- 158,164 ----
                                in_quote = 0;
                                paren_level = 0;
                                count_eof = 0;
+                               free_dolquote = true;
                                slashCmdStatus = CMD_UNKNOWN;
                        }
                        else
***************
*** 136,141 ****
--- 175,190 ----
                pqsignal(SIGINT, handle_sigint);                /* control-C => cancel 
*/
  #endif   /* not WIN32 */
  
+               if (free_dolquote)
+               {
+                       if(dol_quote)
+                       {
+                               free(dol_quote);
+                               dol_quote = NULL;
+                       }
+                       free_dolquote = false;
+               }
+ 
                fflush(stdout);
  
                if (slashCmdStatus == CMD_NEWEDIT)
***************
*** 150,155 ****
--- 199,209 ----
                        in_xcomment = 0;
                        in_quote = 0;
                        paren_level = 0;
+                       if(dol_quote)
+                       {
+                               free(dol_quote);
+                               dol_quote = NULL;
+                       }
                        slashCmdStatus = CMD_UNKNOWN;
                }
  
***************
*** 161,167 ****
                {
                        int                     prompt_status;
  
!                       if (in_quote && in_quote == '\'')
                                prompt_status = PROMPT_SINGLEQUOTE;
                        else if (in_quote && in_quote == '"')
                                prompt_status = PROMPT_DOUBLEQUOTE;
--- 215,223 ----
                {
                        int                     prompt_status;
  
!                       if (dol_quote)
!                               prompt_status = PROMPT_DOLLARQUOTE;
!                       else if (in_quote && in_quote == '\'')
                                prompt_status = PROMPT_SINGLEQUOTE;
                        else if (in_quote && in_quote == '"')
                                prompt_status = PROMPT_DOUBLEQUOTE;
***************
*** 268,273 ****
--- 324,343 ----
                                        in_quote = 0;
                        }
  
+                       /* in or end of $foo$ type quote? */
+ 
+                       else if (dol_quote)
+                       {
+                               if (strncmp(line+i,dol_quote,strlen(dol_quote)) == 0)
+                               {
+                                       ADVANCE_1;
+                                       while(line[i] != '$')
+                                               ADVANCE_1;
+                                       free(dol_quote);
+                                       dol_quote = NULL;
+                               }
+                       }
+ 
                        /* start of extended comment? */
                        else if (line[i] == '/' && line[i + thislen] == '*')
                        {
***************
*** 288,297 ****
                        else if (line[i] == '\'' || line[i] == '"')
                                in_quote = line[i];
  
                        /* single-line comment? truncate line */
                        else if (line[i] == '-' && line[i + thislen] == '-')
                        {
!                               line[i] = '\0'; /* remove comment */
                                break;
                        }
  
--- 358,395 ----
                        else if (line[i] == '\'' || line[i] == '"')
                                in_quote = line[i];
  
+                       /* 
+                        * start of $foo$ type quote? 
+                        * 
+                        * must not be preceded by a valid identifier character
+                        */
+ 
+                       else if (!dol_quote && valid_dolquote(line+i) && 
+                                        (i == 0 || 
+                                         ! ((line[i-prevlen] & 0x80) != 0 || 
+                                                isalnum(line[i-prevlen]) || 
+                                                line[i-prevlen] == '_' ||
+                                                line[i-prevlen] == '$' )))
+                       {
+                               char * dol_end;
+                               char eos;
+ 
+                               dol_end = strchr(line+i+1,'$');
+                               dol_end ++;
+                               eos = *dol_end;
+                               *dol_end = '\0';
+                               dol_quote = pg_strdup(line+i);
+                               *dol_end = eos;
+                               ADVANCE_1;
+                               while(line[i] != '$')
+                                       ADVANCE_1;
+                               
+                       }
+ 
                        /* single-line comment? truncate line */
                        else if (line[i] == '-' && line[i + thislen] == '-')
                        {
!                               line[i] = '\0'; /* removae comment */
                                break;
                        }
  
***************
*** 458,464 ****
  
  
                /* Put the rest of the line in the query buffer. */
!               if (in_quote || line[query_start + strspn(line + query_start, " 
\t\n\r")] != '\0')
                {
                        if (query_buf->len > 0)
                                appendPQExpBufferChar(query_buf, '\n');
--- 556,563 ----
  
  
                /* Put the rest of the line in the query buffer. */
!               if (in_quote || dol_quote ||
!                       line[query_start + strspn(line + query_start, " \t\n\r")] != 
'\0')
                {
                        if (query_buf->len > 0)
                                appendPQExpBufferChar(query_buf, '\n');
Index: src/bin/psql/prompt.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.c,v
retrieving revision 1.34
diff -c -w -r1.34 prompt.c
*** src/bin/psql/prompt.c       25 Jan 2004 03:07:22 -0000      1.34
--- src/bin/psql/prompt.c       9 Feb 2004 15:26:51 -0000
***************
*** 85,90 ****
--- 85,91 ----
                case PROMPT_CONTINUE:
                case PROMPT_SINGLEQUOTE:
                case PROMPT_DOUBLEQUOTE:
+               case PROMPT_DOLLARQUOTE:
                case PROMPT_COMMENT:
                case PROMPT_PAREN:
                        prompt_name = "PROMPT2";
***************
*** 198,203 ****
--- 199,207 ----
                                                        break;
                                                case PROMPT_DOUBLEQUOTE:
                                                        buf[0] = '"';
+                                                       break;
+                                               case PROMPT_DOLLARQUOTE:
+                                                       buf[0] = '$';
                                                        break;
                                                case PROMPT_COMMENT:
                                                        buf[0] = '*';
Index: src/bin/psql/prompt.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.h,v
retrieving revision 1.13
diff -c -w -r1.13 prompt.h
*** src/bin/psql/prompt.h       29 Nov 2003 19:52:07 -0000      1.13
--- src/bin/psql/prompt.h       9 Feb 2004 15:26:51 -0000
***************
*** 15,20 ****
--- 15,21 ----
        PROMPT_COMMENT,
        PROMPT_SINGLEQUOTE,
        PROMPT_DOUBLEQUOTE,
+       PROMPT_DOLLARQUOTE,
        PROMPT_PAREN,
        PROMPT_COPY
  } promptStatus_t;
---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend

Reply via email to