Repository: trafodion
Updated Branches:
  refs/heads/master 75c7b3954 -> 2be44ed1b


[TRAFODION-2969] Fix interaction of [first n] etc. with subqueries


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/2f2714d8
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/2f2714d8
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/2f2714d8

Branch: refs/heads/master
Commit: 2f2714d8fd719b92476e96642cd3fc4389523d92
Parents: 75c7b39
Author: Dave Birdsall <[email protected]>
Authored: Tue Feb 27 00:18:39 2018 +0000
Committer: Dave Birdsall <[email protected]>
Committed: Tue Feb 27 00:18:39 2018 +0000

----------------------------------------------------------------------
 core/sql/optimizer/BindRelExpr.cpp      | 102 ++++++++++++++-----
 core/sql/optimizer/RelExpr.cpp          |  14 ++-
 core/sql/optimizer/RelMisc.h            |   3 +-
 core/sql/regress/core/EXPECTED002.LINUX | 147 ++++++++++++++++++++++++---
 core/sql/regress/core/TEST002           |  56 ++++++++++
 5 files changed, 274 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f2714d8/core/sql/optimizer/BindRelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/BindRelExpr.cpp 
b/core/sql/optimizer/BindRelExpr.cpp
index 7beaf9e..f78d6fb 100644
--- a/core/sql/optimizer/BindRelExpr.cpp
+++ b/core/sql/optimizer/BindRelExpr.cpp
@@ -6094,8 +6094,11 @@ RelExpr *RelRoot::bindNode(BindWA *bindWA)
   if (prevScope)
     inRowSubquery = prevScope->context()->inRowSubquery();
 
+  NABoolean GroupByAggNodeAdded = FALSE;
   if (inRowSubquery && (CmpCommon::getDefault(COMP_BOOL_137) == DF_OFF))
-      addOneRowAggregates(bindWA);
+    // force adding one row aggregates in the [last 0] case
+    GroupByAggNodeAdded = addOneRowAggregates(bindWA, 
+                            getFirstNRows() == -2 /* [last 0] case */);
 
   returnedRoot = 
     transformGroupByWithOrdinalPhase2(bindWA);
@@ -6665,36 +6668,79 @@ RelExpr *RelRoot::bindNode(BindWA *bindWA)
   if ((getFirstNRows() != -1) ||
        (getFirstNRowsParam()))
     {
-      // create a firstN node to retrieve firstN rows.
-      FirstN * firstn = new(bindWA->wHeap())
-        FirstN(child(0), getFirstNRows(), needFirstSortedRows(), 
getFirstNRowsParam());   
+      // [first/any/last N] processing
 
-      firstn->bindNode(bindWA);
-      if (bindWA->errStatus())
-        return NULL;
+      RelExpr * nodeToInsertUnder = this;
+      if (inRowSubquery)
+        {
+          // [first/any/last N] in a row subquery special case
+          //
+          // In this case, if N > 1 it is first/any N, and we can simply
+          // ignore that as row subqueries already enforce an at-most-one
+          // row semantic. For [first 1], [last 1], [last 0], we need to
+          // add the node below any one-row aggregate group by node created
+          // earlier in this method. (If it put it above that group by node,
+          // that is too late; the one-row aggregate will raise an 8401 error
+          // before our FirstN node has a chance to narrow the result down to
+          // zero or one rows.) There is an interesting nuance with [last 0]:
+          // We forced the addition of a one-row aggregate group by node
+          // in that case, because [last 0] returns no rows. We might have
+          // a scalar aggregate subquery which ordinarily would not require
+          // a one-row aggregate group, but when [last 0] is present we want
+          // to force the aggregates to become NULL. Adding a one-row 
+          // aggregate group on top of the scalar aggregate, with the FirstN
+          // node in between them does the trick.
+          if (GroupByAggNodeAdded &&
+               ( (getFirstNRows() == 1) ||   // [first 1] or [any 1]
+                 (getFirstNRows() == -2) ||  // [last 0]
+                 (getFirstNRows() == -3) ) ) // [last 1]
+            {
+              nodeToInsertUnder = child(0);
+              CMPASSERT(nodeToInsertUnder->getOperatorType() == REL_GROUPBY);  
           
+            }
+          else if (!GroupByAggNodeAdded && (getFirstNRows() == -2))  // [last 
0]
+            {
+              CMPASSERT(GroupByAggNodeAdded);  // a GroupByAgg should have 
been forced
+            }
+          else  // a case where we can throw the [first/any/last N] away
+            {
+              nodeToInsertUnder = NULL;
+            }
+        }
+          
+      if (nodeToInsertUnder)
+        {
+          // create a firstN node to retrieve firstN rows.
+          FirstN * firstn = new(bindWA->wHeap())
+            FirstN(nodeToInsertUnder->child(0), getFirstNRows(), 
needFirstSortedRows(), getFirstNRowsParam());   
 
-      // Note: For ORDER BY + [first n], we want to sort the rows before 
-      // picking just n of them. (We don't do this for [any n].) We might
-      // be tempted to copy the orderByTree into the FirstN node at this
-      // point, but this doesn't work. Instead, we copy the bound ValueIds
-      // at normalize time. We have to do this in case there are expressions
-      // involved in the ORDER BY and there is a DESC. The presence of the
-      // Inverse node at the top of the expression tree seems to cause the
-      // expressions underneath to be bound to different ValueIds, which 
-      // causes coverage tests in FirstN::createContextForAChild requirements
-      // generation to fail. An example of where this occurs is:
-      //
-      // prepare s1 from
-      //   select [first 2] y, x from
-      //    (select a,b + 26 from t1) as t(x,y)
-      //   order by y desc;
-      //
-      // If we copy the ORDER BY ItemExpr tree and rebind, we get a different
-      // ValueId for the expression b + 26 in the child characteristic outputs
-      // than what we get for the child of Inverse in Inverse(B + 26). The
-      // trick of copying the already-bound ORDER BY clause later avoids this.
+          firstn->bindNode(bindWA);
+          if (bindWA->errStatus())
+            return NULL;
 
-      setChild(0, firstn);
+          // Note: For ORDER BY + [first n], we want to sort the rows before 
+          // picking just n of them. (We don't do this for [any n].) We might
+          // be tempted to copy the orderByTree into the FirstN node at this
+          // point, but this doesn't work. Instead, we copy the bound ValueIds
+          // at normalize time. We have to do this in case there are 
expressions
+          // involved in the ORDER BY and there is a DESC. The presence of the
+          // Inverse node at the top of the expression tree seems to cause the
+          // expressions underneath to be bound to different ValueIds, which 
+          // causes coverage tests in FirstN::createContextForAChild 
requirements
+          // generation to fail. An example of where this occurs is:
+          //
+          // prepare s1 from
+          //   select [first 2] y, x from
+          //    (select a,b + 26 from t1) as t(x,y)
+          //   order by y desc;
+          //
+          // If we copy the ORDER BY ItemExpr tree and rebind, we get a 
different
+          // ValueId for the expression b + 26 in the child characteristic 
outputs
+          // than what we get for the child of Inverse in Inverse(B + 26). The
+          // trick of copying the already-bound ORDER BY clause later avoids 
this.
+
+          nodeToInsertUnder->setChild(0, firstn);
+        }
 
       // reset firstN indication in the root node.
       setFirstNRows(-1);

http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f2714d8/core/sql/optimizer/RelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelExpr.cpp b/core/sql/optimizer/RelExpr.cpp
index 5c0af91..f263d7e 100644
--- a/core/sql/optimizer/RelExpr.cpp
+++ b/core/sql/optimizer/RelExpr.cpp
@@ -11388,8 +11388,9 @@ void RelRoot::setMvBindContext(MvBindContext * 
pMvBindContext)
   pMvBindContextForScope_ = pMvBindContext;
 }
 
-void RelRoot::addOneRowAggregates(BindWA* bindWA)
+NABoolean RelRoot::addOneRowAggregates(BindWA* bindWA, NABoolean 
forceGroupByAgg)
 {
+  NABoolean GroupByAggNodeAdded = FALSE;
   RelExpr * childOfRoot = child(0);
   GroupByAgg *aggNode = NULL;
   // If the One Row Subquery is already enforced by a scalar aggregate
@@ -11404,7 +11405,11 @@ void RelRoot::addOneRowAggregates(BindWA* bindWA)
   // way out and add a one row aggregate.
   // Also if the groupby is non scalar then we need to add a one row aggregate.
   // Also if we have select max(a) + select b from t1 from t2;
-  if (childOfRoot->getOperatorType() == REL_GROUPBY)
+  // Still another exception is if there is a [last 0] on top of this node. We
+  // need an extra GroupByAgg node with one row aggregates in this case so
+  // we can put the FirstN node underneath that.
+  if (!forceGroupByAgg &&
+      (childOfRoot->getOperatorType() == REL_GROUPBY))
     {
       aggNode = (GroupByAgg *)childOfRoot;
 
@@ -11421,7 +11426,7 @@ void RelRoot::addOneRowAggregates(BindWA* bindWA)
 
     }
   if (aggNode)
-    return ;
+    return GroupByAggNodeAdded;
 
   const RETDesc *oldTable = getRETDesc();
   RETDesc *resultTable = new(bindWA->wHeap()) RETDesc(bindWA);
@@ -11469,9 +11474,12 @@ void RelRoot::addOneRowAggregates(BindWA* bindWA)
 
   newGrby->bindNode(bindWA) ;
   child(0) = newGrby ;
+  GroupByAggNodeAdded = TRUE;
   // Set the return descriptor
   //
   setRETDesc(resultTable);
+
+  return GroupByAggNodeAdded;
 }
 // -----------------------------------------------------------------------
 // member functions for class PhysicalRelRoot

http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f2714d8/core/sql/optimizer/RelMisc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelMisc.h b/core/sql/optimizer/RelMisc.h
index e6920ca..9ea9782 100644
--- a/core/sql/optimizer/RelMisc.h
+++ b/core/sql/optimizer/RelMisc.h
@@ -521,7 +521,8 @@ public:
   // defined in generator/GenRelMisc.cpp
   TrafQuerySimilarityInfo * genSimilarityInfo(Generator *generator);
 
-  void addOneRowAggregates(BindWA * bindWA);
+  // returns TRUE if a GroupByAgg node was added
+  NABoolean addOneRowAggregates(BindWA * bindWA, NABoolean forceGroupByAgg);
 
   inline void setNumBMOs(unsigned short num) { numBMOs_ = num; }
   inline unsigned short getNumBMOs() { return numBMOs_; }

http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f2714d8/core/sql/regress/core/EXPECTED002.LINUX
----------------------------------------------------------------------
diff --git a/core/sql/regress/core/EXPECTED002.LINUX 
b/core/sql/regress/core/EXPECTED002.LINUX
index 6d2a190..fd9659c 100644
--- a/core/sql/regress/core/EXPECTED002.LINUX
+++ b/core/sql/regress/core/EXPECTED002.LINUX
@@ -21,6 +21,13 @@
 --- SQL operation complete.
 >>#ifMX
 >>
+>>create table t002main (a int not null, b int, primary key (a));
+
+--- SQL operation complete.
+>>create table t002sub  (x int not null, y int, primary key (x));
+
+--- SQL operation complete.
+>>
 >>?section dml
 >>-- INSERT queries
 >>insert into t002t1 values (10, 'abc', 20, 'xy');
@@ -48,6 +55,13 @@
 --- 2 row(s) inserted.
 >>#ifMX
 >>
+>>insert into t002main values (1,1);
+
+--- 1 row(s) inserted.
+>>insert into t002sub values (1,1), (2,1);
+
+--- 2 row(s) inserted.
+>>
 >>?section subqtests
 >>-- Expect 2 identical rows saying "2 2";
 >>-- thus certain queries following expect 2 identical rows
@@ -464,22 +478,16 @@ A            AMAX         B                   BMAX        
        C            C
 >>
 >>
 >>------------------------------------------------------------------------
->>?section Genesis_10_000222_6892_p1
->>SELECT 1 FROM T002T3 T1
-+>GROUP BY T1.A
-+>HAVING T1.A >ANY
-+>  ( SELECT 2 FROM T002T1 T2
-+>    WHERE T2.C >SOME
-+>      ( SELECT AVG (T1.A) FROM T002T1 T3 )
-+>  );
-
-(EXPR)
-------
-
-     1
-     1
-
---- 2 row(s) selected.
+>>-- This test disabled since it is non -deterministic.
+>>-- It will be enabled after further investigation.
+>>-- ?section Genesis_10_000222_6892_p1
+>>-- SELECT 1 FROM T002T3 T1
+>>-- GROUP BY T1.A
+>>-- HAVING T1.A >ANY
+>>--  ( SELECT 2 FROM T002T1 T2
+>>--    WHERE T2.C >SOME
+>>--      ( SELECT AVG (T1.A) FROM T002T1 T3 )
+>>--  );
 >>
 >>?section Genesis_10_000222_6892_p2
 >>SELECT 1 FROM T002T1 T1
@@ -1149,4 +1157,111 @@ A            AMAX         B                   BMAX      
          C            C
 >> -- cnt
 >>------------------------------------------------------------------------
 >>
+>>-- Tests of the interaction of [first n] etc. with subqueries
+>>
+>>-- Should return 1
+>>select 
++>(select [FIRST 1] y aa from t002sub b where b.x = a.b) as result_value
++>from t002main a;
+
+RESULT_VALUE
+------------
+
+           1
+
+--- 1 row(s) selected.
+>>
+>>-- Should return null
+>>select 
++>(select [last 0] y aa from t002sub b where b.x = a.b) as result_value
++>from t002main a;
+
+RESULT_VALUE
+------------
+
+           ?
+
+--- 1 row(s) selected.
+>>
+>>-- Should get a cardinality violation (error 8401)
+>>select 
++>(select [first 2] y aa from t002sub b where b.y = a.b) as result_value
++>from t002main a;
+
+*** ERROR[8401] A row subquery or SELECT...INTO statement cannot return more 
than one row.
+
+--- 0 row(s) selected.
+>>
+>>-- Should return 1
+>>select 
++>(select [first 1] y aa from t002sub b where b.y = a.b) as result_value
++>from t002main a;
+
+RESULT_VALUE
+------------
+
+           1
+
+--- 1 row(s) selected.
+>>
+>>-- Should return 1
+>>select 
++>(select [last 1] y aa from t002sub b where b.y = a.b) as result_value
++>from t002main a;
+
+RESULT_VALUE
+------------
+
+           1
+
+--- 1 row(s) selected.
+>>
+>>-- Should return null
+>>select 
++>(select [last 0] y aa from t002sub b where b.y = a.b) as result_value
++>from t002main a;
+
+RESULT_VALUE
+------------
+
+           ?
+
+--- 1 row(s) selected.
+>>
+>>-- Should return null
+>>select
++>(select [last 0] count(*) from t002sub) as result_value
++>from t002main;
+
+RESULT_VALUE        
+--------------------
+
+                   ?
+
+--- 1 row(s) selected.
+>>
+>>-- Should return 2
+>>select
++>(select [first 20] count(*) from t002sub) as result_value
++>from t002main;
+
+RESULT_VALUE        
+--------------------
+
+                   2
+
+--- 1 row(s) selected.
+>>
+>>-- Should return null
+>>select
++>(select [last 0] x from t002sub) as result_value
++>from t002main;
+
+RESULT_VALUE
+------------
+
+           ?
+
+--- 1 row(s) selected.
+>>
 >>log;

http://git-wip-us.apache.org/repos/asf/trafodion/blob/2f2714d8/core/sql/regress/core/TEST002
----------------------------------------------------------------------
diff --git a/core/sql/regress/core/TEST002 b/core/sql/regress/core/TEST002
index 5d0889e..34529c7 100755
--- a/core/sql/regress/core/TEST002
+++ b/core/sql/regress/core/TEST002
@@ -50,6 +50,9 @@ create table t002ut1 (a int, b nchar(9), c int, d nchar(4));
 create table t002ut2 (a int not null, b nchar(9), c int, d nchar(4), primary 
key (a));
 #ifMX
 
+create table t002main (a int not null, b int, primary key (a));
+create table t002sub  (x int not null, y int, primary key (x));
+
 ?section dml
 -- INSERT queries
 insert into t002t1 values (10, 'abc', 20, 'xy');
@@ -63,6 +66,9 @@ insert into t002ut1(b,d,a,c) values (N'defg', N'wx', 10+10, 
30);
 insert into t002ut2 select * from t002ut1;
 #ifMX
 
+insert into t002main values (1,1);
+insert into t002sub values (1,1), (2,1);
+
 ?section subqtests
 -- Expect 2 identical rows saying "2 2";
 -- thus certain queries following expect 2 identical rows
@@ -533,6 +539,53 @@ select SUM  (O.X) from t002FU O having exists(select 
COUNT(I.X) from t002FUI I);
 select COUNT(O.X) from t002FU O having exists(select COUNT(I.X) from t002FUI 
I); -- cnt
 ------------------------------------------------------------------------
 
+-- Tests of the interaction of [first n] etc. with subqueries
+
+-- Should return 1
+select 
+(select [FIRST 1] y aa from t002sub b where b.x = a.b) as result_value
+from t002main a;
+
+-- Should return null
+select 
+(select [last 0] y aa from t002sub b where b.x = a.b) as result_value
+from t002main a;
+
+-- Should get a cardinality violation (error 8401)
+select 
+(select [first 2] y aa from t002sub b where b.y = a.b) as result_value
+from t002main a;
+
+-- Should return 1
+select 
+(select [first 1] y aa from t002sub b where b.y = a.b) as result_value
+from t002main a;
+
+-- Should return 1
+select 
+(select [last 1] y aa from t002sub b where b.y = a.b) as result_value
+from t002main a;
+
+-- Should return null
+select 
+(select [last 0] y aa from t002sub b where b.y = a.b) as result_value
+from t002main a;
+
+-- Should return null
+select
+(select [last 0] count(*) from t002sub) as result_value
+from t002main;
+
+-- Should return 2
+select
+(select [first 20] count(*) from t002sub) as result_value
+from t002main;
+
+-- Should return null
+select
+(select [last 0] x from t002sub) as result_value
+from t002main;
+
 log;
 obey TEST002(clnup);
 exit;
@@ -554,6 +607,9 @@ drop table t002ut1;
 drop table t002ut2;
 #ifMX
 
+drop table t002main;
+drop table t002sub;
+
 ?section clnup_end
 
 

Reply via email to