Hi everyone,

After spending several days reading PostgreSQL source code (and another
couple of days coding), I've managed to come up with some alpha code
that attempts to implement non-recursive WITH common table expressions.
Having got this far, I feel that I need to ask for advice from some more
experienced PostgreSQL hackers and so I post the alpha version here.

The patch attempts to alter the parser grammar, and using code modified
from that used when processing subselects in the FROM clause, attempts
to treat the CTE as if it were presented the same as a FROM subselect.

My main issue at the moment is that the code in transformFromClauseItem
seems a terrible hack, mainly because the grammar returns each string
within the FROM clause as a RangeVar, and transformFromClauseItem
assumes that each RangeVar represents a physical relation. Of course,
this is not the case when referencing a CTE and so the code first checks
to see if an entry has already been created when processing the WITH
clause; if it does then we return NULL to indicate that
transformFromClause should do nothing. Messy, but I wanted to see what
other developers thought before jumping in and rewriting this part of
the code.

Another point to think about is what should a query return if the SELECT
doesn't refer to a CTE? For example:

- WITH foo(a, b) AS (SELECT * FROM pg_class) SELECT * FROM pg_class;

Since I've inserted the CTE as a range table entry of type RTE_SUBQUERY
then the current behaviour is to perform a cross-join between foo and
bar which I'm not sure is the correct behaviour since CTEs seem to be
more like views in this respect.

Finally, the current code fails when supplying an additional alias to a
CTE in the select statement and then trying to refer to it, e.g.

- with myrel(p1) as (select oid from pg_class) select myrel.p1 from
myrel AS foo, pg_class AS bar WHERE myrel.p1 = bar.oid; -- WORKS

- with myrel(p1) as (select oid from pg_class) select myrel.p1 from
myrel AS foo, pg_class AS bar WHERE foo.p1 = bar.oid; -- FAILS

ERROR:  missing FROM-clause entry for table "foo" at character 103

So in this case, should foo be accepted as a valid alias for myrel? My
feeling is that I will end up with an RTE_CTE range table entry kind
which borrows from the current subquery behaviour, but it would be very
useful to see where the similarity between the two range table entry
types ends.


TIA,

Mark.

Index: src/backend/parser/analyze.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/parser/analyze.c,v
retrieving revision 1.351
diff -c -r1.351 analyze.c
*** src/backend/parser/analyze.c	18 Sep 2006 16:04:04 -0000	1.351
--- src/backend/parser/analyze.c	20 Sep 2006 19:02:44 -0000
***************
*** 2087,2092 ****
--- 2087,2095 ----
  	/* make FOR UPDATE/FOR SHARE info available to addRangeTableEntry */
  	pstate->p_locking_clause = stmt->lockingClause;
  
+ 	/* process the WITH clause */
+ 	transformWithClause(pstate, stmt->withClause);
+ 
  	/* process the FROM clause */
  	transformFromClause(pstate, stmt->fromClause);
  
Index: src/backend/parser/gram.y
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/parser/gram.y,v
retrieving revision 2.565
diff -c -r2.565 gram.y
*** src/backend/parser/gram.y	3 Sep 2006 22:37:05 -0000	2.565
--- src/backend/parser/gram.y	20 Sep 2006 19:02:52 -0000
***************
*** 346,351 ****
--- 346,354 ----
  %type <str>		OptTableSpace OptConsTableSpace OptTableSpaceOwner
  %type <list>	opt_check_option
  
+ %type <list>	OptCommonTableExpressionList CommonTableExpressionList
+ %type <node>	CommonTableExpression
+ 
  
  /*
   * If you make any token changes, update the keyword table in
***************
*** 5594,5599 ****
--- 5597,5638 ----
  			| WITHOUT HOLD					{ $$ = FALSE; }
  		;
  
+ 
+ 
+ /*****************************************************************************
+  *
+  *		COMMON TABLE EXPRESSIONS
+  *				WITH STATEMENTS
+  *
+  *****************************************************************************/
+ 
+ 
+ OptCommonTableExpressionList:
+ 			WITH CommonTableExpressionList			{ $$ = $2; }
+ 			| /* EMPTY */					{ $$ = NIL; }
+ 		;
+ 
+ CommonTableExpressionList:
+ 			CommonTableExpression						{ $$ = list_make1($1); }
+ 			| CommonTableExpressionList ',' CommonTableExpression		{ $$ = lappend($1, $3); }
+ 		;
+ 
+ CommonTableExpression:
+ 			ColId '(' columnList ')' AS select_with_parens
+ 				{
+ 					Alias *a = makeNode(Alias);
+ 					a->aliasname = $1;
+ 					a->colnames = $3;
+ 
+ 					RangeSubselect *n = makeNode(RangeSubselect);
+ 					n->subquery = $6;
+ 					n->alias = a;
+ 					$$ = (Node *) n;
+ 				}
+ 		;
+ 
+ 
+ 
  /*****************************************************************************
   *
   *		QUERY:
***************
*** 5705,5723 ****
   * However, this is not checked by the grammar; parse analysis must check it.
   */
  simple_select:
! 			SELECT opt_distinct target_list
  			into_clause from_clause where_clause
  			group_clause having_clause
  				{
  					SelectStmt *n = makeNode(SelectStmt);
! 					n->distinctClause = $2;
! 					n->targetList = $3;
! 					n->into = $4;
  					n->intoColNames = NIL;
! 					n->fromClause = $5;
! 					n->whereClause = $6;
! 					n->groupClause = $7;
! 					n->havingClause = $8;
  					$$ = (Node *)n;
  				}
  			| values_clause							{ $$ = $1; }
--- 5746,5765 ----
   * However, this is not checked by the grammar; parse analysis must check it.
   */
  simple_select:
! 			OptCommonTableExpressionList SELECT opt_distinct target_list
  			into_clause from_clause where_clause
  			group_clause having_clause
  				{
  					SelectStmt *n = makeNode(SelectStmt);
! 					n->withClause = $1;
! 					n->distinctClause = $3;
! 					n->targetList = $4;
! 					n->into = $5;
  					n->intoColNames = NIL;
! 					n->fromClause = $6;
! 					n->whereClause = $7;
! 					n->groupClause = $8;
! 					n->havingClause = $9;
  					$$ = (Node *)n;
  				}
  			| values_clause							{ $$ = $1; }
***************
*** 8752,8758 ****
  			| VARYING
  			| VIEW
  			| VOLATILE
- 			| WITH
  			| WITHOUT
  			| WORK
  			| WRITE
--- 8794,8799 ----
***************
*** 8925,8930 ****
--- 8966,8972 ----
  			| USING
  			| WHEN
  			| WHERE
+ 			| WITH
  		;
  
  
Index: src/backend/parser/parse_clause.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/parser/parse_clause.c,v
retrieving revision 1.157
diff -c -r1.157 parse_clause.c
*** src/backend/parser/parse_clause.c	14 Aug 2006 23:39:32 -0000	1.157
--- src/backend/parser/parse_clause.c	20 Sep 2006 19:02:54 -0000
***************
*** 56,61 ****
--- 56,65 ----
  						RangeSubselect *r);
  static RangeTblEntry *transformRangeFunction(ParseState *pstate,
  					   RangeFunction *r);
+ static Node *transformWithClauseItem(ParseState *pstate, Node *n,
+ 						RangeTblEntry **top_rte, int *top_rti,
+ 						List **relnamespace,
+ 						Relids *containedRels);
  static Node *transformFromClauseItem(ParseState *pstate, Node *n,
  						RangeTblEntry **top_rte, int *top_rti,
  						List **relnamespace,
***************
*** 67,72 ****
--- 71,115 ----
  
  
  /*
+  * transformWithClause -
+  *	  Process the WITH clause by adding the CTEs to the query's range 
+  *	  table.
+  */
+ void
+ transformWithClause(ParseState *pstate, List *withList)
+ {
+ 	
+ 	ListCell   *fl;
+ 
+ 	/*
+ 	 * The grammar will have produced a list of RangeSubselects. Transform
+ 	 * each one (possibly adding entries to the rtable), check for duplicate 
+ 	 * refnames, and then add it to the namespaces.
+ 	 */
+ 	foreach(fl, withList)
+ 	{
+ 		Node	   *n = lfirst(fl);
+ 		RangeTblEntry *rte;
+ 		int			rtindex;
+ 		List	   *relnamespace;
+ 		Relids		containedRels;
+ 
+ 		n = transformWithClauseItem(pstate, n,
+ 									&rte,
+ 									&rtindex,
+ 									&relnamespace,
+ 									&containedRels);
+ 		checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace);
+ 		pstate->p_joinlist = lappend(pstate->p_joinlist, n);
+ 		pstate->p_relnamespace = list_concat(pstate->p_relnamespace,
+ 											 relnamespace);
+ 		pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte);
+ 		bms_free(containedRels);
+ 	}
+ }
+ 
+ 
+ /*
   * transformFromClause -
   *	  Process the FROM clause and add items to the query's range table,
   *	  joinlist, and namespaces.
***************
*** 105,116 ****
  									&rtindex,
  									&relnamespace,
  									&containedRels);
! 		checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace);
! 		pstate->p_joinlist = lappend(pstate->p_joinlist, n);
! 		pstate->p_relnamespace = list_concat(pstate->p_relnamespace,
! 											 relnamespace);
! 		pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte);
! 		bms_free(containedRels);
  	}
  }
  
--- 148,166 ----
  									&rtindex,
  									&relnamespace,
  									&containedRels);
! 		/*
! 		 * If transformFromClauseItem returns NULL then it means we don't have to do
! 		 * anything since the RangeVar points to a CTE already in the range table
! 		 */
! 		if (n)
! 		{
! 			checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace);
! 			pstate->p_joinlist = lappend(pstate->p_joinlist, n);
! 			pstate->p_relnamespace = list_concat(pstate->p_relnamespace,
! 												 relnamespace);
! 			pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte);
! 			bms_free(containedRels);
! 		}
  	}
  }
  
***************
*** 561,566 ****
--- 611,642 ----
  }
  
  
+ 
+ static Node *
+ transformWithClauseItem(ParseState *pstate, Node *n,
+ 						RangeTblEntry **top_rte, int *top_rti,
+ 						List **relnamespace,
+ 						Relids *containedRels)
+ {
+ 	/* sub-SELECT is like a plain relation */
+ 	RangeTblRef *rtr;
+ 	RangeTblEntry *rte;
+ 	int			rtindex;
+ 
+ 	rte = transformRangeSubselect(pstate, (RangeSubselect *) n);
+ 	/* assume new rte is at end */
+ 	rtindex = list_length(pstate->p_rtable);
+ 	Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
+ 	*top_rte = rte;
+ 	*top_rti = rtindex;
+ 	*relnamespace = list_make1(rte);
+ 	*containedRels = bms_make_singleton(rtindex);
+ 	rtr = makeNode(RangeTblRef);
+ 	rtr->rtindex = rtindex;
+ 	return (Node *) rtr;
+ }
+ 
+ 
  /*
   * transformFromClauseItem -
   *	  Transform a FROM-clause item, adding any required entries to the
***************
*** 596,616 ****
  	if (IsA(n, RangeVar))
  	{
  		/* Plain relation reference */
  		RangeTblRef *rtr;
  		RangeTblEntry *rte;
  		int			rtindex;
  
! 		rte = transformTableEntry(pstate, (RangeVar *) n);
! 		/* assume new rte is at end */
! 		rtindex = list_length(pstate->p_rtable);
! 		Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
! 		*top_rte = rte;
! 		*top_rti = rtindex;
! 		*relnamespace = list_make1(rte);
! 		*containedRels = bms_make_singleton(rtindex);
! 		rtr = makeNode(RangeTblRef);
! 		rtr->rtindex = rtindex;
! 		return (Node *) rtr;
  	}
  	else if (IsA(n, RangeSubselect))
  	{
--- 672,715 ----
  	if (IsA(n, RangeVar))
  	{
  		/* Plain relation reference */
+ 		RangeVar *rv = (RangeVar *)n;
  		RangeTblRef *rtr;
  		RangeTblEntry *rte;
  		int			rtindex;
  
! 		/*
! 	 	 * When a FROM clause is parsed, it is impossible to determine at
! 		 * parse time whether the RangeVar represents a physical relation
! 		 * or a CTE. Since transformWithClause() has already created
! 		 * range table entries for each CTE, we first search the range table
! 	         * to see if have a match before moving on to check the physical
! 		 * relation.
! 	 	 */
! 		rte = refnameRangeTblEntry(pstate, rv->schemaname, rv->relname, NULL);
! 		if (!rte)
! 		{
! 			/* 
! 			 * Can't find an entry already in the range table, so assume
! 			 * we are looking at a physical relation
! 			 */
! 			rte = transformTableEntry(pstate, (RangeVar *) n);
! 			/* assume new rte is at end */
! 			rtindex = list_length(pstate->p_rtable);
! 			Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
! 
! 			*top_rte = rte;
! 			*top_rti = rtindex;
! 			*relnamespace = list_make1(rte);
! 			*containedRels = bms_make_singleton(rtindex);
! 			rtr = makeNode(RangeTblRef);
! 			rtr->rtindex = rtindex;
! 			return (Node *) rtr;
! 		}
! 		else
! 		{
! 			/* Indicate to the caller that the RangeVar references a CTE already in the range table */
! 			return NULL;
! 		}
  	}
  	else if (IsA(n, RangeSubselect))
  	{
Index: src/include/nodes/parsenodes.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/nodes/parsenodes.h,v
retrieving revision 1.330
diff -c -r1.330 parsenodes.h
*** src/include/nodes/parsenodes.h	5 Sep 2006 21:08:36 -0000	1.330
--- src/include/nodes/parsenodes.h	20 Sep 2006 19:02:59 -0000
***************
*** 732,737 ****
--- 732,738 ----
  	Node	   *whereClause;	/* WHERE qualification */
  	List	   *groupClause;	/* GROUP BY clauses */
  	Node	   *havingClause;	/* HAVING conditional-expression */
+ 	List       *withClause;		/* WITH common table expressions */
  
  	/*
  	 * In a "leaf" node representing a VALUES list, the above fields are all
Index: src/include/parser/parse_clause.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/parser/parse_clause.h,v
retrieving revision 1.46
diff -c -r1.46 parse_clause.h
*** src/include/parser/parse_clause.h	3 Jul 2006 22:45:40 -0000	1.46
--- src/include/parser/parse_clause.h	20 Sep 2006 19:02:59 -0000
***************
*** 16,21 ****
--- 16,22 ----
  
  #include "parser/parse_node.h"
  
+ extern void transformWithClause(ParseState *pstate, List *withList);
  extern void transformFromClause(ParseState *pstate, List *frmList);
  extern int setTargetTable(ParseState *pstate, RangeVar *relation,
  			   bool inh, bool alsoSource, AclMode requiredPerms);
---------------------------(end of broadcast)---------------------------
TIP 3: Have you checked our extensive FAQ?

               http://www.postgresql.org/docs/faq

Reply via email to