(Fourth try ;-)


Attached is a patch for dollar quoting in the backend and in psql (with the new flex scanner). I'm fairly confident about the backend (because this is mainly Tom's work adapted :-) ) but rather less so about psql - I don't entirely understand all the odd states in psql's scanner. I'm not sure that I have freed up memory in all the necessary cases. Nor am I sure what the state is or should be if we end an included file in a dollar-quoting state, nor how to handle such a situation. So, some extra eyeballs would be appreciated.

However - it does seem to work in my simple testing.

If this is all OK, the remaining tasks would include pg_dump, docs (Jon Jensen says he will attack these two) and some regression tests (any volunteers?)

cheers

andrew
Index: src/backend/parser/scan.l
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/parser/scan.l,v
retrieving revision 1.114
diff -c -r1.114 scan.l
*** src/backend/parser/scan.l   21 Feb 2004 00:34:52 -0000      1.114
--- src/backend/parser/scan.l   24 Feb 2004 17:33:01 -0000
***************
*** 37,42 ****
--- 37,43 ----
  extern YYSTYPE yylval;
  
  static int            xcdepth = 0;    /* depth of nesting in slash-star comments */
+ static char    *dolqstart;      /* current $foo$ quote start string */
  
  /*
   * literalbuf is used to accumulate literal values when multiple rules
***************
*** 94,99 ****
--- 95,101 ----
   *  <xd> delimited identifiers (double-quoted identifiers)
   *  <xh> hexadecimal numeric string
   *  <xq> quoted strings
+  *  <dolq> $foo$ quoted strings
   */
  
  %x xb
***************
*** 101,106 ****
--- 103,109 ----
  %x xd
  %x xh
  %x xq
+ %x dolq
  
  /*
   * In order to make the world safe for Windows and Mac clients as well as
***************
*** 175,180 ****
--- 178,194 ----
  xqoctesc              [\\][0-7]{1,3}
  xqcat                 {quote}{whitespace_with_newline}{quote}
  
+ /* $foo$ style quotes ("dollar quoting")
+  * The quoted string starts with $foo$ where "foo" is an optional string
+  * in the form of an identifier, except that it may not contain "$", 
+  * and extends to the first occurrence of an identical string.  
+  * There is *no* processing of the quoted text.
+  */
+ dolq_start            [A-Za-z\200-\377_]
+ dolq_cont             [A-Za-z\200-\377_0-9]
+ dolqdlm         \$({dolq_start}{dolq_cont}*)?\$
+ dolqins         [^$]+
+ 
  /* Double quote
   * Allows embedded spaces and other special characters into identifiers.
   */
***************
*** 242,248 ****
  other                 .
  
  /*
!  * Quoted strings must allow some special characters such as single-quote
   *  and newline.
   * Embedded single-quotes are implemented both in the SQL standard
   *  style of two adjacent single quotes "''" and in the Postgres/Java style
--- 256,263 ----
  other                 .
  
  /*
!  * Dollar quoted strings are totally opaque, and no escaping is done on them.
!  * Other quoted strings must allow some special characters such as single-quote
   *  and newline.
   * Embedded single-quotes are implemented both in the SQL standard
   *  style of two adjacent single quotes "''" and in the Postgres/Java style
***************
*** 390,395 ****
--- 405,439 ----
                                }
  <xq><<EOF>>           { yyerror("unterminated quoted string"); }
  
+ {dolqdlm}       {
+                       token_start = yytext;
+                                       dolqstart = pstrdup(yytext);
+                                       BEGIN(dolq);
+                                       startlit();
+                 }
+ <dolq>{dolqdlm} {
+                     if (strcmp(yytext, dolqstart) == 0)
+                                       {
+                                               pfree(dolqstart);
+                                               BEGIN(INITIAL);
+                                               yylval.str = litbufdup();
+                                               return SCONST;
+                                       }
+                                       /*
+                                        * When we fail to match $...$ to dolqstart, 
transfer
+                                        * the $... part to the output, but put back 
the final
+                                        * $ for rescanning.  Consider 
$delim$...$junk$delim$
+                                        */
+                                       addlit(yytext, yyleng-1); 
+                                       yyless(yyleng-1); 
+                 }
+ <dolq>{dolqins} {
+                     addlit(yytext, yyleng);
+                 }
+ <dolq>.         {
+                       addlitchar(yytext[0]);
+                 }
+ <dolq><<EOF>>   { yyerror("unterminated dollar-quoted string"); }
  {xdstart}             {
                                        token_start = yytext;
                                        BEGIN(xd);
***************
*** 407,413 ****
                                        yylval.str = ident;
                                        return IDENT;
                                }
! <xd>{xddouble} {
                                        addlitchar('"');
                                }
  <xd>{xdinside}        {
--- 451,457 ----
                                        yylval.str = ident;
                                        return IDENT;
                                }
! <xd>{xddouble}  {
                                        addlitchar('"');
                                }
  <xd>{xdinside}        {
Index: src/bin/psql/prompt.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.c,v
retrieving revision 1.34
diff -c -r1.34 prompt.c
*** src/bin/psql/prompt.c       25 Jan 2004 03:07:22 -0000      1.34
--- src/bin/psql/prompt.c       24 Feb 2004 17:33:19 -0000
***************
*** 85,90 ****
--- 85,91 ----
                case PROMPT_CONTINUE:
                case PROMPT_SINGLEQUOTE:
                case PROMPT_DOUBLEQUOTE:
+               case PROMPT_DOLLARQUOTE:
                case PROMPT_COMMENT:
                case PROMPT_PAREN:
                        prompt_name = "PROMPT2";
***************
*** 198,203 ****
--- 199,207 ----
                                                        break;
                                                case PROMPT_DOUBLEQUOTE:
                                                        buf[0] = '"';
+                                                       break;
+                                               case PROMPT_DOLLARQUOTE:
+                                                       buf[0] = '$';
                                                        break;
                                                case PROMPT_COMMENT:
                                                        buf[0] = '*';
Index: src/bin/psql/prompt.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.h,v
retrieving revision 1.13
diff -c -r1.13 prompt.h
*** src/bin/psql/prompt.h       29 Nov 2003 19:52:07 -0000      1.13
--- src/bin/psql/prompt.h       24 Feb 2004 17:33:19 -0000
***************
*** 15,20 ****
--- 15,21 ----
        PROMPT_COMMENT,
        PROMPT_SINGLEQUOTE,
        PROMPT_DOUBLEQUOTE,
+       PROMPT_DOLLARQUOTE,
        PROMPT_PAREN,
        PROMPT_COPY
  } promptStatus_t;
Index: src/bin/psql/psqlscan.l
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/psqlscan.l,v
retrieving revision 1.1
diff -c -r1.1 psqlscan.l
*** src/bin/psql/psqlscan.l     19 Feb 2004 19:40:09 -0000      1.1
--- src/bin/psql/psqlscan.l     24 Feb 2004 17:33:19 -0000
***************
*** 92,97 ****
--- 92,98 ----
        int                     start_state;    /* saved YY_START */
        int                     paren_depth;    /* depth of nesting in parentheses */
        int                     xcdepth;                /* depth of nesting in 
slash-star comments */
+     char        *dolqstart;      /* current $foo$ quote start string */
  } PsqlScanStateData;
  
  static PsqlScanState cur_state;       /* current state while active */
***************
*** 151,156 ****
--- 152,158 ----
   *  <xd> delimited identifiers (double-quoted identifiers)
   *  <xh> hexadecimal numeric string
   *  <xq> quoted strings
+  *  <dolq> $foo$ quoted strings
   */
  
  %x xb
***************
*** 158,163 ****
--- 160,166 ----
  %x xd
  %x xh
  %x xq
+ %x dolq
  /* Additional exclusive states for psql only: lex backslash commands */
  %x xslashcmd
  %x xslasharg
***************
*** 241,246 ****
--- 244,260 ----
  xqoctesc              [\\][0-7]{1,3}
  xqcat                 {quote}{whitespace_with_newline}{quote}
  
+ /* $foo$ style quotes ("dollar quoting")
+  * The quoted string starts with $foo$ where "foo" is an optional string
+  * in the form of an identifier, except that it may not contain "$", 
+  * and extends to the first occurrence of an identical string.  
+  * There is *no* processing of the quoted text.
+  */
+ dolq_start            [A-Za-z\200-\377_]
+ dolq_cont             [A-Za-z\200-\377_0-9]
+ dolqdlm         \$({dolq_start}{dolq_cont}*)?\$
+ dolqins         [^$]+
+ 
  /* Double quote
   * Allows embedded spaces and other special characters into identifiers.
   */
***************
*** 428,433 ****
--- 442,477 ----
                                        ECHO;
                                }
  
+ {dolqdlm}       {
+                                       cur_state->dolqstart = pg_strdup(yytext);
+                                       BEGIN(dolq);
+                                       ECHO;
+                 }
+ <dolq>{dolqdlm} {
+                     if (strcmp(yytext, cur_state->dolqstart) == 0)
+                                       {
+                                               free(cur_state->dolqstart);
+                                               cur_state->dolqstart = NULL;
+                                               BEGIN(INITIAL);
+                                               ECHO;
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * When we fail to match $...$ to 
dolqstart, transfer
+                                                * the $... part to the output, but 
put back the final
+                                                * $ for rescanning.  Consider 
$delim$...$junk$delim$
+                                                */
+                                               emit(yytext, yyleng-1); 
+                                               yyless(yyleng-1); 
+                                       }
+                 }
+ <dolq>{dolqins} {
+                     ECHO;
+                 }
+ <dolq>.         {
+                       ECHO;
+                 }
  {xdstart}             {
                                        BEGIN(xd);
                                        ECHO;
***************
*** 1007,1012 ****
--- 1051,1060 ----
                                case xq:
                                        result = PSCAN_INCOMPLETE;
                                        *prompt = PROMPT_SINGLEQUOTE;
+                                       break;
+                               case dolq:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_DOLLARQUOTE;
                                        break;
                                default:
                                        /* can't get here */
---------------------------(end of broadcast)---------------------------
TIP 1: subscribe and unsubscribe commands go to [EMAIL PROTECTED]

Reply via email to