Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 3a7b00563 -> 08e1faa21


[TRAFODION-2021] Internal error on upsert with index

In this particular case, the extra nodes added by index maintenance
and the WHERE clause interacted in a way that caused some missing
characteristic inputs, leading to the generator error.  The fix is to
add the required inputs not directly to the nodes, but as "outer
references" to the BindScope, so that they get added automatically to
any nodes created in that BindScope. Since the predicates get created
by a method called at a higher level, I also needed to add a "remote
control" feature for the BindScope of a MergeUpdate node.

Also: [TRAFODION-2026] Upsert fails on table with nullable key column.

When creating the predicate that links the target clustering key with
the corresponding source values, use "special nulls" semantics for
nullable key columns, so a comparesion NULL = NULL is allowed and
yields TRUE.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/2459135a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/2459135a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/2459135a

Branch: refs/heads/master
Commit: 2459135a3ed27d68acc2350b0da4d64132f249a2
Parents: d199362
Author: Hans Zeller <[email protected]>
Authored: Thu Jun 2 21:28:44 2016 +0000
Committer: Hans Zeller <[email protected]>
Committed: Thu Jun 2 21:28:44 2016 +0000

----------------------------------------------------------------------
 core/sql/optimizer/BindRelExpr.cpp | 111 +++++++++++++++++++++-----------
 core/sql/optimizer/BindWA.h        |   1 +
 core/sql/optimizer/NormRelExpr.cpp |   4 +-
 core/sql/optimizer/RelExpr.cpp     |   2 +-
 core/sql/optimizer/RelUpdate.h     |   5 ++
 5 files changed, 84 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2459135a/core/sql/optimizer/BindRelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/BindRelExpr.cpp 
b/core/sql/optimizer/BindRelExpr.cpp
index b81fdcf..38cf77c 100644
--- a/core/sql/optimizer/BindRelExpr.cpp
+++ b/core/sql/optimizer/BindRelExpr.cpp
@@ -10218,6 +10218,9 @@ NABoolean Insert::isUpsertThatNeedsMerge(NABoolean 
isAlignedRowFormat, NABoolean
      return FALSE;
 }
 
+// take an insert(src) node and transform it into
+// tsj_flow(src, merge_update(input_scan))
+// with a newly created input_scan
 RelExpr* Insert::xformUpsertToMerge(BindWA *bindWA) 
 {
   NATable *naTable = bindWA->getNATable(getTableName());
@@ -10231,11 +10234,24 @@ RelExpr* Insert::xformUpsertToMerge(BindWA *bindWA)
     return NULL;               
   }
 
+  // columns of the target table
   const ValueIdList &tableCols = updateToSelectMap().getTopValues();
   const ValueIdList &sourceVals = updateToSelectMap().getBottomValues();
 
   NABoolean isAlignedRowFormat = 
getTableDesc()->getNATable()->isSQLMXAlignedTable();
                    
+  // Create a new BindScope, to encompass the new nodes 
merge_update(input_scan)
+  // and any inlining nodes that will be created. Any values the merge_update
+  // and children will need from src will be marked as outer references in that
+  // new BindScope. We assume that "src" is already bound.
+  ValueIdSet currOuterRefs = bindWA->getCurrentScope()->getOuterRefs();
+
+  CMPASSERT(child(0)->nodeIsBound());
+  bindWA->initNewScope();
+
+  BindScope *mergeScope = bindWA->getCurrentScope();
+
+  // create a new scan of the target table, to be used in the merge
   Scan * inputScan =
     new (bindWA->wHeap())
     Scan(CorrName(getTableDesc()->getCorrNameObj(), bindWA->wHeap()));
@@ -10252,8 +10268,9 @@ RelExpr* Insert::xformUpsertToMerge(BindWA *bindWA)
   ColReference * targetColRef;
   int predCount = 0;
   int setCount = 0;
-  ValueIdSet myOuterRefs;
+  ValueIdSet newOuterRefs;
 
+  // loop over the columns of the target table
   for (CollIndex i = 0; i<tableCols.entries(); i++)
   {
     baseCol = (BaseColumn *)(tableCols[i].getItemExpr()) ;
@@ -10265,10 +10282,15 @@ RelExpr* Insert::xformUpsertToMerge(BindWA *bindWA)
               baseCol->getNAColumn()->getFullColRefName(), bindWA->wHeap()));
     if (baseCol->getNAColumn()->isClusteringKey())
     {
+      // create a join/key predicate between source and target table,
+      // on the clustering key columns of the target table, making
+      // ColReference nodes for the target table, so that we can bind
+      // those to the new scan
       keyPredPrev = keyPred;
       keyPred = new (bindWA->wHeap())
         BiRelat(ITM_EQUAL, targetColRef, 
-                sourceVals[i].getItemExpr());
+                sourceVals[i].getItemExpr(),
+                baseCol->getType().supportsSQLnull());
       predCount++;
       if (predCount > 1) 
       {
@@ -10278,8 +10300,13 @@ RelExpr* Insert::xformUpsertToMerge(BindWA *bindWA)
       }
     }
     if (sourceVals[i].getItemExpr()->getOperatorType() != ITM_CONSTANT)
-      myOuterRefs += sourceVals[i];
+      {
+        newOuterRefs += sourceVals[i];
+        mergeScope->addOuterRef(sourceVals[i]);
+      }
 
+    // create the INSERT (WHEN NOT MATCHED) part of the merge for this column, 
again
+    // with a ColReference that we will then bind to the MergeUpdate target 
table
     insertValPrev = insertVal;
     insertColPrev = insertCol ;
     insertVal = sourceVals[i].getItemExpr();
@@ -10304,9 +10331,11 @@ RelExpr* Insert::xformUpsertToMerge(BindWA *bindWA)
       else
       if (! col->isClusteringKey()) 
       {
-         // We need to bind in the new = old values
+         // Create the UPDATE (WHEN MATCHED) part of the new MergeUpdate for
+         // a non-key column. We need to bind in the new = old values
          // in GenericUpdate::bindNode. So skip the columns that are not user
-         // specified and 
+         // specified. Note that we had a discussion on whether such a 
transformed
+         // UPSERT shouldn't update all columns.
          //
          if (assignExpr->isUserSpecified())
              copySetAssign = TRUE;
@@ -10325,40 +10354,45 @@ RelExpr* Insert::xformUpsertToMerge(BindWA *bindWA)
          }
      }
   }
-  RelExpr * re = NULL;
-
-  re = new (bindWA->wHeap())
+  MergeUpdate *mu = new (bindWA->wHeap())
     MergeUpdate(CorrName(getTableDesc()->getCorrNameObj(), bindWA->wHeap()),
                 NULL,
                 REL_UNARY_UPDATE,
-                inputScan,
-                setAssign,
-                insertCol, 
-                insertVal,
+                inputScan, // USING
+                setAssign, // WHEN MATCHED THEN UPDATE
+                insertCol, // WHEN NOT MATCHED THEN INSERT (cols) ...
+                insertVal, // ... VALUES()
                 bindWA->wHeap(),
                 NULL);
 
-  ((MergeUpdate *)re)->setXformedUpsert();
-  RelExpr * mu = re;
-    
-  re = new(bindWA->wHeap()) Join
-    (child(0), mu, REL_TSJ_FLOW, NULL);
-  ((Join*)re)->doNotTransformToTSJ();
-  ((Join*)re)->setTSJForMerge(TRUE);   
-  ((Join*)re)->setTSJForMergeWithInsert(TRUE);
-  ((Join*)re)->setTSJForMergeUpsert(TRUE);
-  ((Join*)re)->setTSJForWrite(TRUE);
-
-  // if Inputs of current insert are empty (i.e. we have no params/rowsets)
-  // then there will be no pull up of inputs during transform and the join will
-  // not see the inputs of the mergeUpdate due to intermediate nodes. So
-  // add inputs directly to join and use elimination to remove extra inputs
-  if (NOT getGroupAttr()->getCharacteristicInputs().isEmpty())
-    mu->getGroupAttr()->addCharacteristicInputs(myOuterRefs);
-  else
-    re->getGroupAttr()->addCharacteristicInputs(myOuterRefs);
-  
-  re = re->bindNode(bindWA);
+  mu->setXformedUpsert();
+  // Use mergeScope, the scope we created here, for the MergeUpdate. We are
+  // creating some expressions with outer references here in this method, so
+  // we need to control the scope from here.
+  mu->setNeedsBindScope(FALSE);
+
+  RelExpr *boundMU = mu->bindNode(bindWA);
+
+  // remove the BindScope created earlier in this method
+  bindWA->removeCurrentScope();
+
+  // Remove the outer refs from the parent scope, they are provided
+  // by the left child of the TSJ_FLOW, unless they were already outer refs
+  // when we started this method. The binder logic doesn't handle
+  // that well, since they come from a child scope, not the current one,
+  // so we help a little.
+  newOuterRefs -= currOuterRefs;
+  bindWA->getCurrentScope()->removeOuterRefs(newOuterRefs);
+
+  Join * jn = new(bindWA->wHeap()) Join(child(0), boundMU, REL_TSJ_FLOW, NULL);
+
+  jn->doNotTransformToTSJ();
+  jn->setTSJForMerge(TRUE);    
+  jn->setTSJForMergeWithInsert(TRUE);
+  jn->setTSJForMergeUpsert(TRUE);
+  jn->setTSJForWrite(TRUE);
+
+  RelExpr *result = jn->bindNode(bindWA);
   if (bindWA->errStatus())
     return NULL;
   // Copy the userSecified and canBeSkipped attribute to 
mergeUpdateInsertExprArray
@@ -10370,7 +10404,7 @@ RelExpr* Insert::xformUpsertToMerge(BindWA *bindWA)
       ((Assign 
*)mergeInsertExprArray[i].getItemExpr())->setUserSpecified(assignExpr->isUserSpecified());
   }
  
-  return re;
+  return result;
 }
 
 RelExpr *HBaseBulkLoadPrep::bindNode(BindWA *bindWA)
@@ -10668,8 +10702,9 @@ RelExpr *MergeUpdate::bindNode(BindWA *bindWA)
       bindWA->getCurrentScope()->setRETDesc(getRETDesc());
       return this;
     }
-  
-  bindWA->initNewScope();
+
+  if (needsBindScope_)
+    bindWA->initNewScope();
 
   // For an xformaed upsert any UDF or subquery is guaranteed to be 
   // in the using clause. Upsert will not generate a merge without using 
@@ -10826,7 +10861,9 @@ RelExpr *MergeUpdate::bindNode(BindWA *bindWA)
     getGroupAttr()->addCharacteristicInputs
       (bindWA->getCurrentScope()->getOuterRefs());
   }
-  bindWA->removeCurrentScope(xformedUpsert()); // keepLocalRefs for Upsert
+
+  if (needsBindScope_)
+    bindWA->removeCurrentScope();
 
   bindWA->setMergeStatement(TRUE);
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2459135a/core/sql/optimizer/BindWA.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/BindWA.h b/core/sql/optimizer/BindWA.h
index 61715c0..0094bc8 100644
--- a/core/sql/optimizer/BindWA.h
+++ b/core/sql/optimizer/BindWA.h
@@ -634,6 +634,7 @@ public:
   // --------------------------------------------------------------------
   const ValueIdSet &getOuterRefs() const       { return outerRefs_; }
   void addOuterRef(ValueId vid)                { outerRefs_.insert(vid); }
+  void removeOuterRefs(ValueIdSet vids)         { outerRefs_ -= vids; }
 
   // --------------------------------------------------------------------
   // mergeOuterRefs() is called by the parent BindScope to merge the

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2459135a/core/sql/optimizer/NormRelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NormRelExpr.cpp 
b/core/sql/optimizer/NormRelExpr.cpp
index 88259c3..bd3d83f 100644
--- a/core/sql/optimizer/NormRelExpr.cpp
+++ b/core/sql/optimizer/NormRelExpr.cpp
@@ -1475,6 +1475,8 @@ void Join::transformNode(NormWA & normWARef,
       // Check to see if we need to turn this into a TSJ.
       ValueIdSet neededInputs;
       neededInputs = 
child(1).getPtr()->getGroupAttr()->getCharacteristicInputs();
+      // is this ok? Our set of char. inputs may not yet be minimal,
+      // and could contain char. outputs from the left child.
       neededInputs -= getGroupAttr()->getCharacteristicInputs();
 
       ValueIdSet crossReferences;
@@ -1646,7 +1648,7 @@ void Join::pullUpPreds()
   // If outer/semi join then predicates from the right child go to
   // joinPred otherwise they go to the selectionPred.
   // ---------------------------------------------------------------------
-  if (isInnerNonSemiJoin())
+  if (isInnerNonSemiJoin() || getOperatorType() == REL_TSJ_FLOW)
     {
        selectionPred() += child(1)->getSelectionPred();
     }

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2459135a/core/sql/optimizer/RelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelExpr.cpp b/core/sql/optimizer/RelExpr.cpp
index fafc600..b50d666 100644
--- a/core/sql/optimizer/RelExpr.cpp
+++ b/core/sql/optimizer/RelExpr.cpp
@@ -12797,7 +12797,7 @@ MergeUpdate::MergeUpdate(const CorrName &name,
                         ItemExpr *where)
      : Update(name,tabId,otype,child,setExpr,NULL,oHeap),
        insertCols_(insertCols), insertValues_(insertValues),
-       where_(where), xformedUpsert_(FALSE)
+       where_(where), xformedUpsert_(FALSE), needsBindScope_(TRUE)
 {
   setCacheableNode(CmpMain::BIND);
   

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2459135a/core/sql/optimizer/RelUpdate.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelUpdate.h b/core/sql/optimizer/RelUpdate.h
index d949781..0a7a20e 100644
--- a/core/sql/optimizer/RelUpdate.h
+++ b/core/sql/optimizer/RelUpdate.h
@@ -1446,11 +1446,16 @@ public:
 
   NABoolean xformedUpsert() {return xformedUpsert_;}
   void setXformedUpsert() {xformedUpsert_ = TRUE;}
+
+  NABoolean needsBindScope() const { return needsBindScope_; }
+  void setNeedsBindScope(NABoolean b) { needsBindScope_ = b; }
+
 private:
   ItemExpr *insertCols_;
   ItemExpr *insertValues_;
   ItemExpr *where_;
   NABoolean xformedUpsert_;
+  NABoolean needsBindScope_;
 };
 
 

Reply via email to