Github user eowhadi commented on a diff in the pull request:
https://github.com/apache/incubator-trafodion/pull/255#discussion_r49525333
--- Diff: core/sql/src/main/java/org/trafodion/sql/HTableClient.java ---
@@ -421,29 +839,140 @@ else if (versions > 0)
else
numColsInScan = 0;
if (colNamesToFilter != null) {
- FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL);
-
- for (int i = 0; i < colNamesToFilter.length; i++) {
- byte[] colName = (byte[])colNamesToFilter[i];
- byte[] coByte = (byte[])compareOpList[i];
- byte[] colVal = (byte[])colValuesToCompare[i];
-
- if ((coByte == null) || (colVal == null)) {
- return false;
- }
-
- String coStr = new String(coByte);
- CompareOp co = CompareOp.valueOf(coStr);
-
- SingleColumnValueFilter filter1 =
- new SingleColumnValueFilter(getFamily(colName),
getName(colName),
- co, colVal);
- list.addFilter(filter1);
- }
-
+ FilterList list;
+ boolean narrowDownResultColumns = false; //to check if we need
a narrow down column filter (V2 only feature)
+ if (compareOpList == null)return false;
+ if (new String((byte[])compareOpList[0]).equals("V2")){ // are
we dealing with predicate pushdown V2
+ list = new
FilterList(FilterList.Operator.MUST_PASS_ALL);
+ HashMap<String,Object> columnsToRemove = new
HashMap<String,Object>();
+ //if columnsToRemove not null, we are narrowing down
using the SingleColumnValue[Exclude]Filter method
+ //else we will use the explicit FamilyFilter and
QualifierFilter
+ //the simplified logic is that we can use the first
method if and only if each and every column in the
+ //pushed down predicate shows up only once.
+ for (int i = 0; i < colNamesToFilter.length; i++) {
+ byte[] colName = (byte[])colNamesToFilter[i];
+
+ // check if the filter column is already part of the
column list, if not add it if we are limiting columns (not *)
+ if(columns!=null && columns.length > 0){// if not *
+ boolean columnAlreadyIn = false; //assume column not
yet in the scan object
+ for (int k=0; k<columns.length;k++){
+ if (Arrays.equals(colName,
(byte[])columns[k])){
+ columnAlreadyIn = true;//found
already exist
+ break;//no need to look further
+ }
+ }
+ if (!columnAlreadyIn){// column was not already in,
so add it
+
scan.addColumn(getFamily(colName),getName(colName));
+ narrowDownResultColumns = true; //since we
added a column for predicate eval, we need to remove it later out of result set
+ String strColName = new String(colName);
+ if (columnsToRemove != null &&
columnsToRemove.containsKey(strColName)){// if we already added this column, it
means it shows up more than once
+ columnsToRemove = null; // therefore,
use the FamilyFilter/QualifierFilter method
+ }else if (columnsToRemove != null)// else
+ columnsToRemove.put(strColName,null);
// add it to the list of column that should be nuked with the Exclude version
of the SingleColumnValueFilter
+ }
+ }
+ }
+ if (columnsToRemove != null)
+ { //we are almost done checking if Exclude version of
SingleColumnnValueFilter can be used. Th elast check s about to know if there
is a IS_NULL_NULL
+ //operation that cannot be using the Exclude method, as
it is transformed in a filterList with OR, therefore we cannot guaranty that
the SingleColumnValueExcludeFilter
+ //performing the exclusion will be reached.
+ boolean is_null_nullFound = false;
+ for (Object o:compareOpList ){
+ if (new
String((byte[])o).equals("IS_NULL_NULL")){
+ is_null_nullFound = true;
+ break;
+ }
+ }
+ if (is_null_nullFound){
+ columnsToRemove = null; // disable Exclude
method version of SingleColumnnValueFilter
+ }else
+ narrowDownResultColumns = false; // we will use
the Exclude version of SingleColumnnValueFilter, so bypass the
Family/QualifierFilter method
+ }
+ Filter f
=constructV2Filter(colNamesToFilter,compareOpList,colValuesToCompare,
columnsToRemove);
+ if (f==null) return false; // error logging done inside
constructV2Filter
+ list.addFilter(f);
+ }//end V2
+ else{// deal with V1
+ list = new FilterList(FilterList.Operator.MUST_PASS_ALL);
+
+ for (int i = 0; i < colNamesToFilter.length; i++) {
+ byte[] colName = (byte[])colNamesToFilter[i];
+ byte[] coByte = (byte[])compareOpList[i];
+ byte[] colVal = (byte[])colValuesToCompare[i];
+
+ if ((coByte == null) || (colVal == null)) {
+ return false;
+ }
+ String coStr = new String(coByte);
+ CompareOp co = CompareOp.valueOf(coStr);
+
+ SingleColumnValueFilter filter1 =
+ new SingleColumnValueFilter(getFamily(colName),
getName(colName),
+ co, colVal);
+ list.addFilter(filter1);
+ }
+ }//end V1
+ // if we added a column for predicate eval, we need to filter down
result columns
+ FilterList resultColumnsOnlyFilter = null;
+ if (narrowDownResultColumns){
+ HashMap<String,ArrayList<byte[]>> hm = new
HashMap<String,ArrayList<byte[]>>(3);//use to deal with multiple family table
+ // initialize hm with list of columns requested for output
+ for (int i=0; i<columns.length; i++){ // if we are here
we know columns is not null
+ if (hm.containsKey(new
String(getFamily((byte[])columns[i])))){
+ hm.get(new
String(getFamily((byte[])columns[i]))).add((byte[])columns[i]);
+ }else{
+ ArrayList<byte[]> al = new
ArrayList<byte[]>();
+ al.add((byte[])columns[i]);
+ hm.put(new
String(getFamily((byte[])columns[i])), al);
+ }
+ }
+
+ if (hm.size()==1){//only one column family
+ resultColumnsOnlyFilter = new
FilterList(FilterList.Operator.MUST_PASS_ALL);
--- End diff --
in this explicit example, the code is optimized not to use qualifier and
family filter, instead it used the SingleColumnValueExcludeFilter, therefore
not adding the overhead. Now if instead you do select a,b from t where B>10 and
B<20 and c<5, then the optimization is not applicable and yes, I use the
qualifier/family filter to pull in a and b only. Note that if a is nullable,
and b is nullable, then I will also add one column from the key, avoiding
_SALT_ if possible to make sure "absent" null are correctly accounted for. The
column to retrieved is computed in HbaseAccess::computeRetrievedCols() that you
may not see since it did not change. The trick is that by moving the predicate
pushdown computing before invoking computeRetrievedCols in V2, the
executorPred() (remaining predicate that cannot be pushed down if any) is
already containing nothing if predates will be pushed down. So column
computation will not include columns that are needed for predicate evaluation
at hbase leve
l. Now for the operator evaluation think you are right. Will have to
experiment with it and see where is the point of diminishing return. One thing
to note is that this is the last item on the filter list on purpose, so this is
not evaluated on filtered out data. Had same concern and tried to put all
mitigation possible... This whole thing about limiting data is intuitely
attractive, but may be counter productive (CPU intense vs memory/network IO
friendly). But given HBase implemented these filter out of the box, I assume
they must have seen a benefit...
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---