Github user zellerh commented on a diff in the pull request:
https://github.com/apache/incubator-trafodion/pull/255#discussion_r49412057
--- Diff: core/sql/generator/GenPreCode.cpp ---
@@ -11274,6 +11274,485 @@ short
HbaseAccess::extractHbaseFilterPreds(Generator * generator,
return 0;
}
+////////////////////////////////////////////////////////////////////////////
+// To push down, the predicate must have the following form:
+// xp:= <column> <op> <value-expr>
+// xp:= <column> is not null (no support for hbase lookup)
+// xp:= <column> is null (no support for hbase lookup)
+// (xp:=<column> like <value-expr> not yet implemented)
+// xp:=<xp> OR <xp> (not evaluated in isHbaseFilterPredV2, but by
extractHbaseFilterPredV2)
+// xp:=<xp> AND <xp>(not evaluated in isHbaseFilterPredV2, but by
extractHbaseFilterPredV2)
+//
+// and all of the following conditions must be met:
+//
+// <column>: a base table or index column which can be
serialized and belong to the table being scanned.
+// serialized: either the column doesn't need
encoding, like
+// an unsigned integer, or the
column
+// was declared with the
SERIALIZED option.
+// it also must not be an added
column with default non null.
+// <op>: eq, ne, gt, ge, lt, le
+// <value-expr>: an expression that only contains const or param
values, and
+// <value-expr>'s datatype is not a superset of
<column>'s datatype.
+//
+/////////////////////////////////////////////////////////////////////////////
+NABoolean HbaseAccess::isHbaseFilterPredV2(Generator * generator, ItemExpr
* ie,
+ ValueId &colVID, ValueId &valueVID,
+ NAString &op)
+{
+ NABoolean foundBinary = FALSE;
+ NABoolean foundUnary = FALSE;
+ NABoolean hbaseLookupPred = FALSE;
+ NABoolean flipOp = FALSE; // set to TRUE when column is child(1)
+
+ if (ie &&
+ ((ie->getOperatorType() >= ITM_EQUAL) &&
+ (ie->getOperatorType() <= ITM_GREATER_EQ))) //binary operator case
+ {//begin expression
+ ItemExpr * child0 = ie->child(0)->castToItemExpr();
+ ItemExpr * child1 = ie->child(1)->castToItemExpr();
+
+ if ((ie->child(0)->getOperatorType() == ITM_BASECOLUMN) &&
+ (NOT hasColReference(ie->child(1))))
+ {
+ foundBinary = TRUE;
+ colVID = ie->child(0)->getValueId();
+ valueVID = ie->child(1)->getValueId();
+ }
+ else if ((ie->child(1)->getOperatorType() == ITM_BASECOLUMN) &&
+ (NOT hasColReference(ie->child(0))))
+ {
+ foundBinary = TRUE;
+ flipOp = TRUE;
+ colVID = ie->child(1)->getValueId();
+ valueVID = ie->child(0)->getValueId();
+ }
+ else if ((ie->child(0)->getOperatorType() == ITM_INDEXCOLUMN) &&
+ (NOT hasColReference(ie->child(1))))
+ {
+ foundBinary = TRUE;
+ colVID = ie->child(0)->getValueId();
+ valueVID = ie->child(1)->getValueId();
+ }
+ else if ((ie->child(1)->getOperatorType() == ITM_INDEXCOLUMN) &&
+ (NOT hasColReference(ie->child(0))))
+ {
+ foundBinary = TRUE;
+ flipOp = TRUE;
+ colVID = ie->child(1)->getValueId();
+ valueVID = ie->child(0)->getValueId();
+ }
+ else if ((ie->child(0)->getOperatorType() == ITM_REFERENCE) &&
+ (NOT hasColReference(ie->child(1))))
+ {
+ foundBinary = TRUE;
+ colVID = ie->child(0)->getValueId();
+ valueVID = ie->child(1)->getValueId();
+ }
+ else if ((ie->child(1)->getOperatorType() == ITM_REFERENCE) &&
+ (NOT hasColReference(ie->child(0))))
+ {
+ foundBinary = TRUE;
+ flipOp = TRUE;
+ colVID = ie->child(1)->getValueId();
+ valueVID = ie->child(0)->getValueId();
+ }
+ else if ((ie->child(0)->getOperatorType() ==
ITM_HBASE_COLUMN_LOOKUP) &&
+ (NOT hasColReference(ie->child(1))))
+ {
+ HbaseColumnLookup * hcl =
(HbaseColumnLookup*)ie->child(0)->castToItemExpr();
+ if (hcl->getValueId().getType().getTypeQualifier() ==
NA_CHARACTER_TYPE)
+ {
+ hbaseLookupPred = TRUE;
+
+ ItemExpr * newCV = new(generator->wHeap())
ConstValue(hcl->hbaseCol());
+ newCV = newCV->bindNode(generator->getBindWA());
+ newCV = newCV->preCodeGen(generator);
+
+ foundBinary = TRUE;
+ colVID = newCV->getValueId();
+ valueVID = ie->child(1)->getValueId();
+ }
+ }
+ else if ((ie->child(1)->getOperatorType() ==
ITM_HBASE_COLUMN_LOOKUP) &&
+ (NOT hasColReference(ie->child(0))))
+ {
+ HbaseColumnLookup * hcl =
(HbaseColumnLookup*)ie->child(1)->castToItemExpr();
+ if (hcl->getValueId().getType().getTypeQualifier() ==
NA_CHARACTER_TYPE)
+ {
+ hbaseLookupPred = TRUE;
+
+ ItemExpr * newCV = new(generator->wHeap())
ConstValue(hcl->hbaseCol());
+ newCV = newCV->bindNode(generator->getBindWA());
+ newCV = newCV->preCodeGen(generator);
+
+ foundBinary = TRUE;
+ flipOp = TRUE;
+ colVID = newCV->getValueId();
+ valueVID = ie->child(0)->getValueId();
+ }
+ }
+ }//end binary operators
+ else if (ie && ((ie->getOperatorType() ==
ITM_IS_NULL)||(ie->getOperatorType() == ITM_IS_NOT_NULL))){//check for unary
operators
+ ItemExpr * child0 = ie->child(0)->castToItemExpr();
+ if ((ie->child(0)->getOperatorType() == ITM_BASECOLUMN) ||
+ (ie->child(0)->getOperatorType() == ITM_INDEXCOLUMN)||
+ (ie->child(0)->getOperatorType() == ITM_REFERENCE)){
+ foundUnary = TRUE;
+ colVID = ie->child(0)->getValueId();
+ valueVID = NULL_VALUE_ID;
+ }
+
+ }//end unary operators
+
+ //check if found columns belong to table being scanned (so is not an
input to the scan node)
+ if (foundBinary || foundUnary){
+ ValueId dummyValueId;
+ if
(getGroupAttr()->getCharacteristicInputs().referencesTheGivenValue(colVID,dummyValueId)){
+ foundBinary=FALSE;
+ foundUnary=FALSE;
+ }
+ }
+ //check if not an added column with default non null
+ if ((foundBinary || foundUnary)&& (NOT hbaseLookupPred)){
+ NAColumn * nac;
+ switch (colVID.getItemExpr()->getOperatorType()){
+ case ITM_BASECOLUMN:
+ nac =
((BaseColumn*)colVID.getItemExpr())->getNAColumn();
+ break;
+ case ITM_INDEXCOLUMN:
+ nac =
((IndexColumn*)colVID.getItemExpr())->getNAColumn();
+ break;
+ default:
+ break;
+ }
+ if (nac && nac->isAddedColumn() && nac->getDefaultValue()){
+ foundBinary=FALSE;
+ foundUnary=FALSE;
+ }
+ }
+
+ if (foundBinary)
+ {
+ const NAType &colType = colVID.getType();
+ const NAType &valueType = valueVID.getType();
+
+ NABoolean generateNarrow = FALSE;
+ if (NOT hbaseLookupPred)
+ {
+ generateNarrow = valueType.errorsCanOccur(colType);
+ if ((generateNarrow) || // value not a superset of column
+ (NOT columnEnabledForSerialization(colVID.getItemExpr())))
+ foundBinary = FALSE;
+ }
+
+ if (foundBinary)
+ {
+ if (colType.getTypeQualifier() == NA_CHARACTER_TYPE)
+ {
+ const CharType &charColType = (CharType&)colType;
+ const CharType &charValType = (CharType&)valueType;
+
+ if ((charColType.isCaseinsensitive() ||
charValType.isCaseinsensitive()) ||
+ (charColType.isUpshifted() || charValType.isUpshifted()))
+ foundBinary = FALSE;
+ }
+ else if (colType.getTypeQualifier() == NA_NUMERIC_TYPE)
+ {
+ const NumericType &numType = (NumericType&)colType;
+ const NumericType &valType = (NumericType&)valueType;
+ if (numType.isBigNum() || valType.isBigNum())
+ foundBinary = FALSE;
+ }
+ }
+
+ if (foundBinary)
+ {
+ if ((ie) && (((BiRelat*)ie)->addedForLikePred()) &&
+ (valueVID.getItemExpr()->getOperatorType() == ITM_CONSTANT))
+ {
+ // remove trailing '\0' characters since this is being pushed
down to hbase.
+ ConstValue * cv = (ConstValue*)(valueVID.getItemExpr());
+ char * cvv = (char*)cv->getConstValue();
+ Lng32 len = cv->getStorageSize() - 1;
+ while ((len > 0) && (cvv[len] == '\0'))
+ len--;
+
+ NAString newCVV(cvv, len+1);
+
+ ItemExpr * newCV = new(generator->wHeap()) ConstValue(newCVV);
+ newCV = newCV->bindNode(generator->getBindWA());
+ newCV = newCV->preCodeGen(generator);
+ valueVID = newCV->getValueId();
+ }
+
+ ItemExpr * castValue = NULL;
+ if (NOT hbaseLookupPred)
+ castValue = new(generator->wHeap())
Cast(valueVID.getItemExpr(), &colType);
+ else
+ {
+ castValue = new(generator->wHeap())
Cast(valueVID.getItemExpr(), &valueVID.getType());
+ }
+
+ if ((NOT hbaseLookupPred) &&
+ (isEncodingNeededForSerialization(colVID.getItemExpr())))
+ {
+ castValue = new(generator->wHeap()) CompEncode
+ (castValue, FALSE, -1, CollationInfo::Sort, TRUE, FALSE);
+ }
+
+ castValue = castValue->bindNode(generator->getBindWA());
+ castValue = castValue->preCodeGen(generator);
+
+ valueVID = castValue->getValueId();
+
+ NAString nullType;
+
+ if ((colType.supportsSQLnull()) ||
+ (valueType.supportsSQLnull()))
+ {
+ nullType = "_NULL";
+ }
+ else
+ {
+ nullType = "";
+ }
+
+ // append -NULL to the operator to signify the java code generating
pushdown filters to handle NULL semantic logic
+ if (ie->getOperatorType() == ITM_EQUAL)
+ op = "EQUAL"+nullType;
+ else if (ie->getOperatorType() == ITM_NOT_EQUAL)
+ op = "NOT_EQUAL"+nullType;
+ else if (ie->getOperatorType() == ITM_LESS){
+ if (flipOp)
+ op = "GREATER"+nullType;
+ else
+ op = "LESS"+nullType;
+ }
+ else if (ie->getOperatorType() == ITM_LESS_EQ){
+ if (flipOp)
+ op = "GREATER_OR_EQUAL"+nullType;
+ else
+ op = "LESS_OR_EQUAL"+nullType;
+ }else if (ie->getOperatorType() == ITM_GREATER){
+ if (flipOp)
+ op = "LESS"+nullType;
+ else
+ op = "GREATER"+nullType;
+ }else if (ie->getOperatorType() == ITM_GREATER_EQ){
+ if (flipOp)
+ op = "LESS_OR_EQUAL"+nullType;
+ else
+ op = "GREATER_OR_EQUAL"+nullType;
+ }else
+ op = "NO_OP"+nullType;
+ }
+ }
+ if (foundUnary){
+ const NAType &colType = colVID.getType();
+ NAString nullType;
+
+ if (colType.supportsSQLnull())
+ {
+ nullType = "_NULL";
+ }
+ else
+ {
+ nullType = "";
+ }
+ if (ie->getOperatorType() == ITM_IS_NULL)
+ op = "IS_NULL"+nullType;
+ else if (ie->getOperatorType() == ITM_IS_NOT_NULL)
+ op = "IS_NOT_NULL"+nullType;
+ }
+
+ return foundBinary || foundUnary;
+}
+short HbaseAccess::extractHbaseFilterPredsVX(Generator * generator,
+ ValueIdSet &preds, ValueIdSet &newExePreds){
+ //separate the code that should not belong in the recursive function
+ if (CmpCommon::getDefault(HBASE_FILTER_PREDS) == DF_OFF)
+ return 0;
+ // check if initial (version 1) implementation
+ if (CmpCommon::getDefault(HBASE_FILTER_PREDS) == DF_MINIMUM)
+ return extractHbaseFilterPreds(generator,preds,newExePreds);
+
+ // if here, we are DF_MEDIUM
+ // cannot push preds for aligned format row
+ NABoolean isAlignedFormat =
getTableDesc()->getNATable()->isAlignedFormat(getIndexDesc());
+
+ if (isAlignedFormat)
+ return 0;
+ //recursive function call
+ opList_.insert("V2");//to instruct the java side that we are dealing
with predicate pushdown V2 semantic, add "V2" marker
+ extractHbaseFilterPredsV2(generator,preds,newExePreds,FALSE,TRUE);
+ return 0;
+
+}
+
+// return true if successfull push down of node
+NABoolean HbaseAccess::extractHbaseFilterPredsV2(Generator * generator,
+ ValueIdSet &preds, ValueIdSet
&newExePreds, NABoolean checkOnly, NABoolean isFirstAndLayer )
+{
+
+ // the isFirstAndLayer is used to allow detecting top level predicate
that can still be pushed to executor
+ int addedNode=0;
+ for (ValueId vid = preds.init();
+ (preds.next(vid));
+ preds.advance(vid))
+ {
+ ItemExpr * ie = vid.getItemExpr();
+
+ // if it is AND operation, recurse through left and right children
+ if (ie->getOperatorType() == ITM_AND)
+ {
+ ValueIdSet leftPreds;
+ ValueIdSet rightPreds;
+ leftPreds += ie->child(0)->castToItemExpr()->getValueId();
+ rightPreds += ie->child(1)->castToItemExpr()->getValueId();
+ if (isFirstAndLayer){
+ NABoolean leftOK = extractHbaseFilterPredsV2(generator,
leftPreds, newExePreds,TRUE, TRUE);
+ NABoolean rightOK = extractHbaseFilterPredsV2(generator,
rightPreds, newExePreds,TRUE, TRUE);
+ if (leftOK && rightOK){
+ if (!checkOnly){
+ extractHbaseFilterPredsV2(generator,
leftPreds, newExePreds,FALSE, TRUE);//generate tree
+ extractHbaseFilterPredsV2(generator,
rightPreds, newExePreds,FALSE, TRUE);//generate tree
+ opList_.insert("AND"); // insert an AND
node since both side are OK to push down
+ if (addedNode>0)opList_.insert("AND");
// if it is not the first node addd to the push down, AND it with the rest
+ addedNode++; // we just pushed it down,
so increase the node count pushed down.
+ }
+ if (preds.entries()==1)
+ return TRUE;
+ } else if (leftOK){ // if only left is OK to push down
+ if(!checkOnly){
+ extractHbaseFilterPredsV2(generator,
leftPreds, newExePreds,FALSE, TRUE);//generate left tree
+ newExePreds.insert(rightPreds); //make
sure we add the right child to predicates that needs executor evalvaluation
+ if (addedNode>0)opList_.insert("AND");
// if it is not the first node addd to the push down, AND it with the rest
+ addedNode++; // we pushed down left
side so mark it
+ }
+ if (preds.entries()==1)
+ return TRUE;
+ } else if (rightOK){// if only right is OK to push down
+ if(!checkOnly){
+ extractHbaseFilterPredsV2(generator,
rightPreds, newExePreds,FALSE, TRUE);//generate right tree
+ newExePreds.insert(leftPreds);//make
sure we add the left child to predicates that needs executor evalvaluation
+ if (addedNode>0)opList_.insert("AND");
// if it is not the first node addd to the push down, AND it with the rest
+ addedNode++;// we pushed down right
side so mark it
+ }
+ if (preds.entries()==1)
+ return TRUE;
+ } else{
+ if(!checkOnly){
+ newExePreds.insert(vid);// we pushed down
nothing, make sure the whole node is evaluated by Executor
+ }
+ if (preds.entries()==1)
+ return FALSE;
+
+ }
+ }
+ else{//if not first AND layer, both left and right must be
pushable to get anything pushed
+ if(extractHbaseFilterPredsV2(generator, leftPreds,
newExePreds, TRUE, FALSE)&&
--- End diff --
Do you think the recursive call is ok? In some cases we may have an OR
predicate with thousands of recursive calls. We have run into stack overflows
in the past. For the OR case, it may be worth converting the OR backbone into a
ValueIdSet or ValueIdList and iterating over that instead.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---