Re: [HACKERS] Review: UNNEST (and other functions) WITH ORDINALITY

Andrew Gierth Sun, 28 Jul 2013 22:43:44 -0700

I propose the following patch (which goes on top of the current
ordinality one) to implement the suggested grammar changes.


I think this is the cleanest way, and I've tested that it both
passes regression and allows constructs like WITH time AS (...)
to work.

-- 
Andrew (irc:RhodiumToad)

*** a/src/backend/parser/gram.y
--- b/src/backend/parser/gram.y
***************
*** 608,615 **** static Node *makeRecursiveViewSelect(char *relname, List 
*aliases, Node *query);
   * The grammar thinks these are keywords, but they are not in the kwlist.h
   * list and so can never be entered directly.  The filter in parser.c
   * creates these tokens when required.
   */
! %token                        NULLS_FIRST NULLS_LAST WITH_ORDINALITY WITH_TIME
  
  /* Precedence: lowest to highest */
  %nonassoc     SET                             /* see relation_expr_opt_alias 
*/
--- 608,645 ----
   * The grammar thinks these are keywords, but they are not in the kwlist.h
   * list and so can never be entered directly.  The filter in parser.c
   * creates these tokens when required.
+  *
+  * The rules for referencing WITH and these special lookahead keywords are
+  * as follows:
+  *
+  * If WITH is followed by a fixed token, such as WITH OIDS, or a non-keyword
+  * token such as '(', then use WITH directly, except as indicated below.
+  *
+  * If WITH could be followed by an object name, then use the with_keyword
+  * production instead. Also, if there are alternative branches in which some
+  * have a fixed keyword following WITH and some have an object name, then
+  * use with_keyword for all of them, overriding the above rule.
+  *
+  * (Similar rules would apply for NULLS_P, but currently there are no
+  * instances in the grammar where this is used other than as a special
+  * case or as an identifier.)
+  *
+  * The productions associated with these special cases are listed under
+  * "Special-case keyword sequences" near the end of the grammar. It is
+  * intended that these be the ONLY places that the special lookahead
+  * keywords appear, in order to avoid complicating the main body of the
+  * grammar.
+  *
+  * To add a new special case:
+  *   - add the special token names here in a %token decl
+  *   - add or extend the productions under "Special-case keyword sequences"
+  *   - add appropriate comparisons in:
+  *       base_yylex in src/backend/parser/parser.c
+  *       filtered_base_yylex in src/interfaces/ecpg/preproc/parser.c
   */
! 
! %token                        NULLS_BEFORE_FIRST NULLS_BEFORE_LAST
! %token          WITH_BEFORE_ORDINALITY WITH_BEFORE_TIME
  
  /* Precedence: lowest to highest */
  %nonassoc     SET                             /* see relation_expr_opt_alias 
*/
***************
*** 838,848 **** CreateRoleStmt:
                                }
                ;
  
- 
- opt_with:     WITH                                                            
        {}
-                       | /*EMPTY*/                                             
                {}
-               ;
- 
  /*
   * Options for CREATE ROLE and ALTER ROLE (also used by CREATE/ALTER USER
   * for backwards compatibility).  Note: the only option required by SQL99
--- 868,873 ----
***************
*** 3127,3138 **** ExclusionConstraintList:
                                                                                
                        { $$ = lappend($1, $3); }
                ;
  
! ExclusionConstraintElem: index_elem WITH any_operator
                        {
                                $$ = list_make2($1, $3);
                        }
                        /* allow OPERATOR() decoration for the benefit of 
ruleutils.c */
!                       | index_elem WITH OPERATOR '(' any_operator ')'
                        {
                                $$ = list_make2($1, $5);
                        }
--- 3152,3163 ----
                                                                                
                        { $$ = lappend($1, $3); }
                ;
  
! ExclusionConstraintElem: index_elem with_keyword any_operator
                        {
                                $$ = list_make2($1, $3);
                        }
                        /* allow OPERATOR() decoration for the benefit of 
ruleutils.c */
!                       | index_elem with_keyword OPERATOR '(' any_operator ')'
                        {
                                $$ = list_make2($1, $5);
                        }
***************
*** 6188,6195 **** opt_asc_desc: ASC                                            
        { $$ = SORTBY_ASC; }
                        | /*EMPTY*/                                             
{ $$ = SORTBY_DEFAULT; }
                ;
  
! opt_nulls_order: NULLS_FIRST                          { $$ = 
SORTBY_NULLS_FIRST; }
!                       | NULLS_LAST                                    { $$ = 
SORTBY_NULLS_LAST; }
                        | /*EMPTY*/                                             
{ $$ = SORTBY_NULLS_DEFAULT; }
                ;
  
--- 6213,6220 ----
                        | /*EMPTY*/                                             
{ $$ = SORTBY_DEFAULT; }
                ;
  
! opt_nulls_order: nulls_first                          { $$ = 
SORTBY_NULLS_FIRST; }
!                       | nulls_last                                    { $$ = 
SORTBY_NULLS_LAST; }
                        | /*EMPTY*/                                             
{ $$ = SORTBY_NULLS_DEFAULT; }
                ;
  
***************
*** 8348,8354 **** AlterTSDictionaryStmt:
                ;
  
  AlterTSConfigurationStmt:
!                       ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P 
MAPPING FOR name_list WITH any_name_list
                                {
                                        AlterTSConfigurationStmt *n = 
makeNode(AlterTSConfigurationStmt);
                                        n->cfgname = $5;
--- 8373,8379 ----
                ;
  
  AlterTSConfigurationStmt:
!                       ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P 
MAPPING FOR name_list with_keyword any_name_list
                                {
                                        AlterTSConfigurationStmt *n = 
makeNode(AlterTSConfigurationStmt);
                                        n->cfgname = $5;
***************
*** 8358,8364 **** AlterTSConfigurationStmt:
                                        n->replace = false;
                                        $$ = (Node*)n;
                                }
!                       | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER 
MAPPING FOR name_list WITH any_name_list
                                {
                                        AlterTSConfigurationStmt *n = 
makeNode(AlterTSConfigurationStmt);
                                        n->cfgname = $5;
--- 8383,8389 ----
                                        n->replace = false;
                                        $$ = (Node*)n;
                                }
!                       | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER 
MAPPING FOR name_list with_keyword any_name_list
                                {
                                        AlterTSConfigurationStmt *n = 
makeNode(AlterTSConfigurationStmt);
                                        n->cfgname = $5;
***************
*** 8368,8374 **** AlterTSConfigurationStmt:
                                        n->replace = false;
                                        $$ = (Node*)n;
                                }
!                       | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER 
MAPPING REPLACE any_name WITH any_name
                                {
                                        AlterTSConfigurationStmt *n = 
makeNode(AlterTSConfigurationStmt);
                                        n->cfgname = $5;
--- 8393,8399 ----
                                        n->replace = false;
                                        $$ = (Node*)n;
                                }
!                       | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER 
MAPPING REPLACE any_name with_keyword any_name
                                {
                                        AlterTSConfigurationStmt *n = 
makeNode(AlterTSConfigurationStmt);
                                        n->cfgname = $5;
***************
*** 8378,8384 **** AlterTSConfigurationStmt:
                                        n->replace = true;
                                        $$ = (Node*)n;
                                }
!                       | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER 
MAPPING FOR name_list REPLACE any_name WITH any_name
                                {
                                        AlterTSConfigurationStmt *n = 
makeNode(AlterTSConfigurationStmt);
                                        n->cfgname = $5;
--- 8403,8409 ----
                                        n->replace = true;
                                        $$ = (Node*)n;
                                }
!                       | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER 
MAPPING FOR name_list REPLACE any_name with_keyword any_name
                                {
                                        AlterTSConfigurationStmt *n = 
makeNode(AlterTSConfigurationStmt);
                                        n->cfgname = $5;
***************
*** 9247,9260 **** simple_select:
   * We don't currently support the SEARCH or CYCLE clause.
   */
  with_clause:
!               WITH cte_list
                        {
                                $$ = makeNode(WithClause);
                                $$->ctes = $2;
                                $$->recursive = false;
                                $$->location = @1;
                        }
!               | WITH RECURSIVE cte_list
                        {
                                $$ = makeNode(WithClause);
                                $$->ctes = $3;
--- 9272,9285 ----
   * We don't currently support the SEARCH or CYCLE clause.
   */
  with_clause:
!               with_keyword cte_list
                        {
                                $$ = makeNode(WithClause);
                                $$->ctes = $2;
                                $$->recursive = false;
                                $$->location = @1;
                        }
!               | with_keyword RECURSIVE cte_list
                        {
                                $$ = makeNode(WithClause);
                                $$->ctes = $3;
***************
*** 9593,9599 **** table_ref:   relation_expr opt_alias_clause
                                        n->coldeflist = lsecond($2);
                                        $$ = (Node *) n;
                                }
!                       | func_table WITH_ORDINALITY func_alias_clause
                                {
                                        RangeFunction *n = 
makeNode(RangeFunction);
                                        n->lateral = false;
--- 9618,9624 ----
                                        n->coldeflist = lsecond($2);
                                        $$ = (Node *) n;
                                }
!                       | func_table with_ordinality func_alias_clause
                                {
                                        RangeFunction *n = 
makeNode(RangeFunction);
                                        n->lateral = false;
***************
*** 9613,9619 **** table_ref:   relation_expr opt_alias_clause
                                        n->coldeflist = lsecond($3);
                                        $$ = (Node *) n;
                                }
!                       | LATERAL_P func_table WITH_ORDINALITY func_alias_clause
                                {
                                        RangeFunction *n = 
makeNode(RangeFunction);
                                        n->lateral = true;
--- 9638,9644 ----
                                        n->coldeflist = lsecond($3);
                                        $$ = (Node *) n;
                                }
!                       | LATERAL_P func_table with_ordinality func_alias_clause
                                {
                                        RangeFunction *n = 
makeNode(RangeFunction);
                                        n->lateral = true;
***************
*** 10413,10419 **** ConstInterval:
                ;
  
  opt_timezone:
!                       WITH_TIME ZONE                                          
        { $$ = TRUE; }
                        | WITHOUT TIME ZONE                                     
        { $$ = FALSE; }
                        | /*EMPTY*/                                             
                { $$ = FALSE; }
                ;
--- 10438,10444 ----
                ;
  
  opt_timezone:
!                       with_time ZONE                                          
        { $$ = TRUE; }
                        | WITHOUT TIME ZONE                                     
        { $$ = FALSE; }
                        | /*EMPTY*/                                             
                { $$ = FALSE; }
                ;
***************
*** 12431,12436 **** ColLabel:  IDENT                                           
                        { $$ = $1; }
--- 12456,12499 ----
                        | reserved_keyword                                      
        { $$ = pstrdup($1); }
                ;
  
+ /*
+  * Special-case keyword sequences.
+  *
+  * To disambiguate WITH TIME, WITH ORDINALITY, NULLS FIRST, NULLS
+  * LAST, which otherwise cause conflicts, the lexer looks ahead one
+  * extra token and may replace the WITH or NULLS keyword by
+  * WITH_BEFORE_* or NULLS_BEFORE_* (the following keyword is not
+  * touched).
+  *
+  * These productions collect the special cases in one place; see the
+  * token declarations at the top of the file for the rules used.
+  */
+ 
+ opt_with: with_keyword
+           | /*EMPTY*/
+         ;
+ 
+ with_keyword: WITH
+               | WITH_BEFORE_TIME
+               | WITH_BEFORE_ORDINALITY
+             ;
+ 
+ with_ordinality: WITH_BEFORE_ORDINALITY ORDINALITY ;
+ 
+ with_time: WITH_BEFORE_TIME TIME ;
+ 
+ /*
+  * not needed since NULLS never occurs alone:
+  *
+  * nulls_keyword: NULLS_P
+  *               | NULLS_BEFORE_FIRST
+  *               | NULLS_BEFORE_LAST
+  *             ;
+  */
+ 
+ nulls_first: NULLS_BEFORE_FIRST FIRST_P ;
+ 
+ nulls_last: NULLS_BEFORE_LAST LAST_P ;
  
  /*
   * Keyword category lists.  Generally, every keyword present in
*** a/src/backend/parser/parser.c
--- b/src/backend/parser/parser.c
***************
*** 65,72 **** raw_parser(const char *str)
   * Intermediate filter between parser and core lexer (core_yylex in scan.l).
   *
   * The filter is needed because in some cases the standard SQL grammar
!  * requires more than one token lookahead.    We reduce these cases to 
one-token
!  * lookahead by combining tokens here, in order to keep the grammar LALR(1).
   *
   * Using a filter is simpler than trying to recognize multiword tokens
   * directly in scan.l, because we'd have to allow for comments between the
--- 65,80 ----
   * Intermediate filter between parser and core lexer (core_yylex in scan.l).
   *
   * The filter is needed because in some cases the standard SQL grammar
!  * requires more than one token lookahead. We reduce these cases to
!  * one-token lookahead by adding one extra token of lookahead here, and
!  * altering the keyword returned based on what follows, in order to keep
!  * the grammar LALR(1).
!  *
!  * We used to combine keywords, but no longer do. Now, keywords that would
!  * cause conflicts have special <keyword>_BEFORE_<lookahead> forms, which
!  * flag up in advance that the next token is <lookahead>.  This is more
!  * flexible when handling some of the edge cases such as
!  *   WITH ordinality AS (...)
   *
   * Using a filter is simpler than trying to recognize multiword tokens
   * directly in scan.l, because we'd have to allow for comments between the
***************
*** 98,167 **** base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t 
yyscanner)
        else
                cur_token = core_yylex(&(lvalp->core_yystype), llocp, 
yyscanner);
  
!       /* Do we need to look ahead for a possible multiword token? */
!       switch (cur_token)
!       {
!               case NULLS_P:
! 
!                       /*
!                        * NULLS FIRST and NULLS LAST must be reduced to one 
token
!                        */
!                       cur_yylval = lvalp->core_yystype;
!                       cur_yylloc = *llocp;
!                       next_token = core_yylex(&(lvalp->core_yystype), llocp, 
yyscanner);
!                       switch (next_token)
!                       {
!                               case FIRST_P:
!                                       cur_token = NULLS_FIRST;
!                                       break;
!                               case LAST_P:
!                                       cur_token = NULLS_LAST;
!                                       break;
!                               default:
!                                       /* save the lookahead token for next 
time */
!                                       yyextra->lookahead_token = next_token;
!                                       yyextra->lookahead_yylval = 
lvalp->core_yystype;
!                                       yyextra->lookahead_yylloc = *llocp;
!                                       yyextra->have_lookahead = true;
!                                       /* and back up the output info to 
cur_token */
!                                       lvalp->core_yystype = cur_yylval;
!                                       *llocp = cur_yylloc;
!                                       break;
!                       }
!                       break;
! 
!               case WITH:
! 
!                       /*
!                        * WITH TIME and WITH ORDINALITY must each be reduced 
to one token
!                        */
!                       cur_yylval = lvalp->core_yystype;
!                       cur_yylloc = *llocp;
!                       next_token = core_yylex(&(lvalp->core_yystype), llocp, 
yyscanner);
!                       switch (next_token)
!                       {
!                               case TIME:
!                                       cur_token = WITH_TIME;
!                                       break;
!                               case ORDINALITY:
!                                       cur_token = WITH_ORDINALITY;
!                                       break;
!                               default:
!                                       /* save the lookahead token for next 
time */
!                                       yyextra->lookahead_token = next_token;
!                                       yyextra->lookahead_yylval = 
lvalp->core_yystype;
!                                       yyextra->lookahead_yylloc = *llocp;
!                                       yyextra->have_lookahead = true;
!                                       /* and back up the output info to 
cur_token */
!                                       lvalp->core_yystype = cur_yylval;
!                                       *llocp = cur_yylloc;
!                                       break;
!                       }
!                       break;
! 
!               default:
!                       break;
!       }
  
        return cur_token;
  }
--- 106,151 ----
        else
                cur_token = core_yylex(&(lvalp->core_yystype), llocp, 
yyscanner);
  
!       /*
!        * Do we need to look ahead for a possible multiword token?
!        * If not, we're done here.
!        */
! 
!       if (cur_token != NULLS_P && cur_token != WITH)
!               return cur_token;
! 
!       /* Fetch the lookahead token */
! 
!       cur_yylval = lvalp->core_yystype;
!       cur_yylloc = *llocp;
!       next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
! 
!       /* save the lookahead token for next time */
! 
!       yyextra->lookahead_token = next_token;
!       yyextra->lookahead_yylval = lvalp->core_yystype;
!       yyextra->lookahead_yylloc = *llocp;
!       yyextra->have_lookahead = true;
! 
!       /* and back up the output info to cur_token */
! 
!       lvalp->core_yystype = cur_yylval;
!       *llocp = cur_yylloc;
! 
!       /*
!        * We don't merge the two tokens into one, but just modify the value of
!        * the leading token to reflect what follows.
!        */
! 
!       if (cur_token == NULLS_P && next_token == FIRST_P)
!               return NULLS_BEFORE_FIRST;
!       else if (cur_token == NULLS_P && next_token == LAST_P)
!               return NULLS_BEFORE_LAST;
! 
!       if (cur_token == WITH && next_token == TIME)
!               return WITH_BEFORE_TIME;
!       else if (cur_token == WITH && next_token == ORDINALITY)
!               return WITH_BEFORE_ORDINALITY;
  
        return cur_token;
  }
*** a/src/interfaces/ecpg/preproc/parser.c
--- b/src/interfaces/ecpg/preproc/parser.c
***************
*** 35,42 **** static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
   * Intermediate filter between parser and base lexer (base_yylex in scan.l).
   *
   * The filter is needed because in some cases the standard SQL grammar
!  * requires more than one token lookahead.    We reduce these cases to 
one-token
!  * lookahead by combining tokens here, in order to keep the grammar LALR(1).
   *
   * Using a filter is simpler than trying to recognize multiword tokens
   * directly in scan.l, because we'd have to allow for comments between the
--- 35,50 ----
   * Intermediate filter between parser and base lexer (base_yylex in scan.l).
   *
   * The filter is needed because in some cases the standard SQL grammar
!  * requires more than one token lookahead. We reduce these cases to
!  * one-token lookahead by adding one extra token of lookahead here, and
!  * altering the keyword returned based on what follows, in order to keep
!  * the grammar LALR(1).
!  *
!  * We used to combine keywords, but no longer do. Now, keywords that would
!  * cause conflicts have special <keyword>_BEFORE_<lookahead> forms, which
!  * flag up in advance that the next token is <lookahead>.  This is more
!  * flexible when handling some of the edge cases such as
!  *   WITH ordinality AS (...)
   *
   * Using a filter is simpler than trying to recognize multiword tokens
   * directly in scan.l, because we'd have to allow for comments between the
***************
*** 63,129 **** filtered_base_yylex(void)
        else
                cur_token = base_yylex();
  
!       /* Do we need to look ahead for a possible multiword token? */
!       switch (cur_token)
!       {
!               case NULLS_P:
! 
!                       /*
!                        * NULLS FIRST and NULLS LAST must be reduced to one 
token
!                        */
!                       cur_yylval = base_yylval;
!                       cur_yylloc = base_yylloc;
!                       next_token = base_yylex();
!                       switch (next_token)
!                       {
!                               case FIRST_P:
!                                       cur_token = NULLS_FIRST;
!                                       break;
!                               case LAST_P:
!                                       cur_token = NULLS_LAST;
!                                       break;
!                               default:
!                                       /* save the lookahead token for next 
time */
!                                       lookahead_token = next_token;
!                                       lookahead_yylval = base_yylval;
!                                       lookahead_yylloc = base_yylloc;
!                                       have_lookahead = true;
!                                       /* and back up the output info to 
cur_token */
!                                       base_yylval = cur_yylval;
!                                       base_yylloc = cur_yylloc;
!                                       break;
!                       }
!                       break;
! 
!               case WITH:
! 
!                       /*
!                        * WITH TIME must be reduced to one token
!                        */
!                       cur_yylval = base_yylval;
!                       cur_yylloc = base_yylloc;
!                       next_token = base_yylex();
!                       switch (next_token)
!                       {
!                               case TIME:
!                                       cur_token = WITH_TIME;
!                                       break;
!                               default:
!                                       /* save the lookahead token for next 
time */
!                                       lookahead_token = next_token;
!                                       lookahead_yylval = base_yylval;
!                                       lookahead_yylloc = base_yylloc;
!                                       have_lookahead = true;
!                                       /* and back up the output info to 
cur_token */
!                                       base_yylval = cur_yylval;
!                                       base_yylloc = cur_yylloc;
!                                       break;
!                       }
!                       break;
! 
!               default:
!                       break;
!       }
  
        return cur_token;
  }
--- 71,116 ----
        else
                cur_token = base_yylex();
  
!       /*
!        * Do we need to look ahead for a possible multiword token?
!        * If not, we're done here.
!        */
! 
!       if (cur_token != NULLS_P && cur_token != WITH)
!               return cur_token;
! 
!       /* fetch the lookahead token */
! 
!       cur_yylval = base_yylval;
!       cur_yylloc = base_yylloc;
!       next_token = base_yylex();
! 
!       /* save the lookahead token for next time */
! 
!       lookahead_token = next_token;
!       lookahead_yylval = base_yylval;
!       lookahead_yylloc = base_yylloc;
!       have_lookahead = true;
! 
!       /* and back up the output info to cur_token */
! 
!       base_yylval = cur_yylval;
!       base_yylloc = cur_yylloc;
! 
!       /*
!        * We don't merge the two tokens into one, but just modify the value of
!        * the leading token to reflect what follows.
!        */
! 
!       if (cur_token == NULLS_P && next_token == FIRST_P)
!               return NULLS_BEFORE_FIRST;
!       else if (cur_token == NULLS_P && next_token == LAST_P)
!               return NULLS_BEFORE_LAST;
! 
!       if (cur_token == WITH && next_token == TIME)
!               return WITH_BEFORE_TIME;
!       else if (cur_token == WITH && next_token == ORDINALITY)
!               return WITH_BEFORE_ORDINALITY;
  
        return cur_token;
  }

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Review: UNNEST (and other functions) WITH ORDINALITY

Reply via email to