This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e90ecf  HIVE-22982 : TopN Key efficiency check might disable filter 
too soon (Attila Magyar via Ashutosh Chauhan)
7e90ecf is described below

commit 7e90ecf480673364eb5de182f17f2c02eb144315
Author: Attila Magyar <amag...@cloudera.com>
AuthorDate: Mon Mar 9 08:26:35 2020 -0700

    HIVE-22982 : TopN Key efficiency check might disable filter too soon 
(Attila Magyar via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <hashut...@apache.org>
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  4 ++--
 .../hadoop/hive/ql/exec/TopNKeyOperator.java       |  3 ++-
 .../hive/ql/exec/vector/VectorTopNKeyOperator.java | 11 ++++-----
 .../vector/wrapper/VectorHashKeyWrapperBatch.java  |  4 ++--
 .../VectorHashKeyWrapperGeneralComparator.java     | 13 +++++------
 .../apache/hadoop/hive/ql/plan/TopNKeyDesc.java    | 26 ++++++++++++++++++++++
 .../hadoop/hive/ql/exec/TestTopNKeyFilter.java     |  2 +-
 7 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 12f4822..a18a6d7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2417,8 +2417,8 @@ public class HiveConf extends Configuration {
     HIVE_OPTIMIZE_TOPNKEY("hive.optimize.topnkey", true, "Whether to enable 
top n key optimizer."),
     HIVE_MAX_TOPN_ALLOWED("hive.optimize.topnkey.max", 128, "Maximum topN 
value allowed by top n key optimizer.\n" +
       "If the LIMIT is greater than this value then top n key optimization 
won't be used."),
-    
HIVE_TOPN_EFFICIENCY_THRESHOLD("hive.optimize.topnkey.efficiency.threshold", 
0.6f, "Disable topN key filter if the ratio between forwarded and total rows 
reaches this limit."),
-    
HIVE_TOPN_EFFICIENCY_CHECK_BATCHES("hive.optimize.topnkey.efficiency.check.nbatches",
 8, "Check topN key filter efficiency after a specific number of batches."),
+    
HIVE_TOPN_EFFICIENCY_THRESHOLD("hive.optimize.topnkey.efficiency.threshold", 
0.8f, "Disable topN key filter if the ratio between forwarded and total rows 
reaches this limit."),
+    
HIVE_TOPN_EFFICIENCY_CHECK_BATCHES("hive.optimize.topnkey.efficiency.check.nbatches",
 10000, "Check topN key filter efficiency after a specific number of batches."),
     HIVE_TOPN_MAX_NUMBER_OF_PARTITIONS("hive.optimize.topnkey.partitions.max", 
64, "Limit the maximum number of partitions used by the top N key operator."),
 
     HIVE_SHARED_WORK_OPTIMIZATION("hive.optimize.shared.work", true,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
index f09867b..e95d779 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
@@ -139,7 +139,8 @@ public class TopNKeyOperator extends Operator<TopNKeyDesc> 
implements Serializab
     }
 
     if (runTimeNumRows % conf.getCheckEfficiencyNumRows() == 0) { // check the 
efficiency at every nth rows
-      checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, 
conf.getEfficiencyThreshold(), LOG);
+      checkTopNFilterEfficiency(
+        topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), 
LOG, conf.getCheckEfficiencyNumRows());
     }
   }
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
index 0f8eb17..10567c7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
@@ -178,14 +178,15 @@ public class VectorTopNKeyOperator extends 
Operator<TopNKeyDesc> implements Vect
     batch.selectedInUse = selectedInUseBackup;
 
     if (incomingBatches % conf.getCheckEfficiencyNumBatches() == 0) {
-      checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, 
conf.getEfficiencyThreshold(), LOG);
+      checkTopNFilterEfficiency(
+        topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), 
LOG, conf.getCheckEfficiencyNumRows());
     }
   }
 
   public static void checkTopNFilterEfficiency(Map<KeyWrapper, TopNKeyFilter> 
filters,
-                                                Set<KeyWrapper> 
disabledPartitions,
-                                                float efficiencyThreshold,
-                                                Logger log)
+                                               Set<KeyWrapper> 
disabledPartitions,
+                                               float efficiencyThreshold,
+                                               Logger log, long 
checkEfficiencyNumRows)
   {
     Iterator<Map.Entry<KeyWrapper, TopNKeyFilter>> iterator = 
filters.entrySet().iterator();
     while (iterator.hasNext()) {
@@ -193,7 +194,7 @@ public class VectorTopNKeyOperator extends 
Operator<TopNKeyDesc> implements Vect
       KeyWrapper partitionKey = each.getKey();
       TopNKeyFilter filter = each.getValue();
       log.debug("Checking TopN Filter efficiency {}, threshold: {}", filter, 
efficiencyThreshold);
-      if (filter.forwardingRatio() >= efficiencyThreshold) {
+      if (filter.getTotal() >= checkEfficiencyNumRows && 
filter.forwardingRatio() >= efficiencyThreshold) {
         log.info("Disabling TopN Filter {}", filter);
         disabledPartitions.add(partitionKey);
       }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java
index b487480..d020756 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java
@@ -1084,8 +1084,8 @@ public class VectorHashKeyWrapperBatch extends 
VectorColumnSetInfo {
       comparator.addColumnComparator(
               i, columnTypeSpecificIndex, columnVectorType, 
columnSortOrder.charAt(i), nullOrder.charAt(i));
     }
-    if (comparator.getComparators().size() == 1) { // don't use the composite 
comparator for n=1
-      return comparator.getComparators().get(0);
+    if (comparator.getComparators().length == 1) { // don't use the composite 
comparator for n=1
+      return comparator.getComparators()[0];
     }
     return comparator;
   }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java
index 06ac661..d08910b 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java
@@ -18,9 +18,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.wrapper;
 
 import java.io.Serializable;
-import java.util.ArrayList;
 import java.util.Comparator;
-import java.util.List;
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
@@ -35,7 +33,7 @@ public class VectorHashKeyWrapperGeneralComparator
   /**
    * Compare {@link VectorHashKeyWrapperBase} instances only by one column.
    */
-  private static class VectorHashKeyWrapperBaseComparator
+  static class VectorHashKeyWrapperBaseComparator
           implements Comparator<VectorHashKeyWrapperBase>, Serializable {
 
     private final int keyIndex;
@@ -72,10 +70,10 @@ public class VectorHashKeyWrapperGeneralComparator
     }
   }
 
-  private final List<VectorHashKeyWrapperBaseComparator> comparators;
+  private final VectorHashKeyWrapperBaseComparator[] comparators;
 
   public VectorHashKeyWrapperGeneralComparator(int numberOfColumns) {
-    this.comparators = new ArrayList<>(numberOfColumns);
+    this.comparators = new VectorHashKeyWrapperBaseComparator[numberOfColumns];
   }
 
   public void addColumnComparator(int keyIndex, int columnTypeSpecificIndex, 
ColumnVector.Type columnVectorType,
@@ -115,8 +113,7 @@ public class VectorHashKeyWrapperGeneralComparator
     default:
       throw new RuntimeException("Unexpected column vector columnVectorType " 
+ columnVectorType);
     }
-
-    comparators.add(
+    comparators[keyIndex] = (
             new VectorHashKeyWrapperBaseComparator(
                     keyIndex,
                     sortOrder == '-' ? comparator.reversed() : comparator,
@@ -134,7 +131,7 @@ public class VectorHashKeyWrapperGeneralComparator
     return 0;
   }
 
-  public List<VectorHashKeyWrapperBaseComparator> getComparators() {
+  public VectorHashKeyWrapperBaseComparator[] getComparators() {
     return comparators;
   }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
index ddd657e..5d6d749 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
@@ -181,6 +181,11 @@ public class TopNKeyDesc extends AbstractOperatorDesc {
     if (getClass().getName().equals(other.getClass().getName())) {
       TopNKeyDesc otherDesc = (TopNKeyDesc) other;
       return getTopN() == otherDesc.getTopN() &&
+          getEfficiencyThreshold() == otherDesc.getEfficiencyThreshold() &&
+          getCheckEfficiencyNumRows() == otherDesc.getCheckEfficiencyNumRows() 
&&
+          getCheckEfficiencyNumBatches() == 
otherDesc.getCheckEfficiencyNumBatches() &&
+          getMaxNumberOfPartitions() == otherDesc.getMaxNumberOfPartitions() &&
+          ExprNodeDescUtils.isSame(partitionKeyColumns, 
otherDesc.partitionKeyColumns) &&
           Objects.equals(columnSortOrder, otherDesc.columnSortOrder) &&
           Objects.equals(nullOrder, otherDesc.nullOrder) &&
           ExprNodeDescUtils.isSame(keyColumns, otherDesc.keyColumns);
@@ -195,9 +200,30 @@ public class TopNKeyDesc extends AbstractOperatorDesc {
     ret.setColumnSortOrder(columnSortOrder);
     ret.setNullOrder(nullOrder);
     ret.setKeyColumns(getKeyColumns() == null ? null : new 
ArrayList<>(getKeyColumns()));
+    ret.setPartitionKeyColumns(getPartitionKeyColumns() == null ? null : new 
ArrayList<>(getPartitionKeyColumns()));
+    ret.setCheckEfficiencyNumRows(checkEfficiencyNumRows);
+    ret.setCheckEfficiencyNumBatches(checkEfficiencyNumBatches);
+    ret.setEfficiencyThreshold(efficiencyThreshold);
+    ret.setMaxNumberOfPartitions(maxNumberOfPartitions);
     return ret;
   }
 
+  public void setEfficiencyThreshold(float efficiencyThreshold) {
+    this.efficiencyThreshold = efficiencyThreshold;
+  }
+
+  public void setCheckEfficiencyNumBatches(long checkEfficiencyNumBatches) {
+    this.checkEfficiencyNumBatches = checkEfficiencyNumBatches;
+  }
+
+  public void setCheckEfficiencyNumRows(long checkEfficiencyNumRows) {
+    this.checkEfficiencyNumRows = checkEfficiencyNumRows;
+  }
+
+  public void setMaxNumberOfPartitions(int maxNumberOfPartitions) {
+    this.maxNumberOfPartitions = maxNumberOfPartitions;
+  }
+
   public class TopNKeyDescExplainVectorization extends 
OperatorExplainVectorization {
     private final TopNKeyDesc topNKeyDesc;
     private final VectorTopNKeyDesc vectorTopNKeyDesc;
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
index a91bc73..0ee5c8d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
@@ -118,7 +118,7 @@ public class TestTopNKeyFilter {
     }};
 
     Set<KeyWrapper> disabled = new HashSet<>();
-    checkTopNFilterEfficiency(filters, disabled, 0.6f, LOG);
+    checkTopNFilterEfficiency(filters, disabled, 0.6f, LOG, 1);
     assertThat(disabled, hasSize(1));
     assertThat(disabled, hasItem(new TestKeyWrapper(200)));
   }

Reply via email to