This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new b296567a3 IMPALA-7942 (part 1): Add query hints for table cardinalities
b296567a3 is described below

commit b296567a32c8f678549fe7e40ea87d7669f81a9e
Author: skyyws <[email protected]>
AuthorDate: Wed Aug 10 15:45:53 2022 +0800

    IMPALA-7942 (part 1): Add query hints for table cardinalities
    
    Currently, we run 'COMPUTE STATS' command to compute table stats
    which is very useful for query planning. Without these stats, a
    query plan may not be optimal. However, these stats may not be
    available, up to date, or valid. To workaround this problem,
    this patch adds a new query hint: 'TABLE_NUM_ROWS', We can use
    this new hint after a hdfs or kudu table in query like this:
    
      * select col from t /* +TABLE_NUM_ROWS(1000) */;
    
    If set, Impala will use this value as table scanned rows when
    table no stats or has corrput stats. This hint value will not
    valid if table stats is normal.
    
    Testing:
    - Added new fe test in 'PlannerTest'
    - Added new fe test in 'AnalyzeStmtsTest' for negative cases
    
    Change-Id: I9f0c773f4e67782a1428db64062f68afbd257af7
    Reviewed-on: http://gerrit.cloudera.org:8080/18829
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/analysis/TableRef.java  | 45 +++++++++++++++++++++-
 .../org/apache/impala/planner/HdfsScanNode.java    | 10 ++++-
 .../org/apache/impala/planner/KuduScanNode.java    |  9 +++--
 .../java/org/apache/impala/planner/ScanNode.java   |  3 ++
 .../apache/impala/planner/SingleNodePlanner.java   |  2 +-
 .../apache/impala/analysis/AnalyzeStmtsTest.java   | 35 ++++++++++++++++-
 .../org/apache/impala/planner/PlannerTest.java     |  9 +++++
 .../PlannerTest/table-cardinality-hint.test        | 43 +++++++++++++++++++++
 8 files changed, 146 insertions(+), 10 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/TableRef.java 
b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
index ad6df20c0..64c1f7050 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
@@ -33,6 +33,7 @@ import org.apache.impala.authorization.Privilege;
 import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.FeFsTable;
 import org.apache.impala.catalog.FeIcebergTable;
+import org.apache.impala.catalog.FeKuduTable;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.planner.JoinNode.DistributionMode;
@@ -170,6 +171,13 @@ public class TableRef extends StmtNode {
   // rewrite.
   private boolean isHidden_ = false;
 
+  // Value of query hint 'TABLE_NUM_ROWS' on this table. Used in constructing 
ScanNode if
+  // the table does not have stats, or has corrupt stats. -1 indicates no 
hint. Currently,
+  // this hint is valid for hdfs and kudu table.
+  private long tableNumRowsHint_ = -1;
+  // Table row hint name used in sql.
+  private static final String TABLE_ROW_HINT = "TABLE_NUM_ROWS";
+
   /**
    * Returns a new, resolved, and analyzed table ref.
    */
@@ -309,6 +317,10 @@ public class TableRef extends StmtNode {
     return convertLimitToSampleHintPercent_;
   }
 
+  public long getTableNumRowsHint() {
+    return tableNumRowsHint_;
+  }
+
   /**
    * Returns true if this table ref has a resolved path that is rooted at a 
registered
    * tuple descriptor, false otherwise.
@@ -499,9 +511,10 @@ public class TableRef extends StmtNode {
     }
     // BaseTableRef will always have their path resolved at this point.
     Preconditions.checkState(getResolvedPath() != null);
+    boolean isTableHintSupported = true;
     if (getResolvedPath().destTable() != null &&
-        !(getResolvedPath().destTable() instanceof FeFsTable)) {
-      analyzer.addWarning("Table hints only supported for Hdfs tables");
+        !supportTableHint(getResolvedPath().destTable(), analyzer)) {
+      isTableHintSupported = false;
     }
     for (PlanHint hint: tableHints_) {
       if (hint.is("SCHEDULE_CACHE_LOCAL")) {
@@ -534,12 +547,40 @@ public class TableRef extends StmtNode {
             addHintWarning(hint, analyzer);
           }
         }
+      } else if (hint.is(TABLE_ROW_HINT)) {
+        List<String> args = hint.getArgs();
+        if (args == null || args.size() != 1 || !isTableHintSupported) {
+          addHintWarning(hint, analyzer);
+          return;
+        }
+        analyzer.setHasPlanHints();
+        tableNumRowsHint_ = Long.parseLong(args.get(0));
       } else {
         addHintWarning(hint, analyzer);
       }
     }
   }
 
+  /**
+   * Returns whether the table supports hint. Currently, hdfs table support 
table hint
+   * usage, kudu table only support {@link this#TABLE_ROW_HINT} hint.
+   *
+   * TODO: Add other table hints for kudu table.
+   */
+  private boolean supportTableHint(FeTable table, Analyzer analyzer) {
+    if (table instanceof FeKuduTable) {
+      if (!(tableHints_.size() == 1 && tableHints_.get(0).is(TABLE_ROW_HINT))) 
{
+        analyzer.addWarning(String.format("Kudu table only support '%s' hint.",
+            TABLE_ROW_HINT));
+        return false;
+      }
+    } else if (!(table instanceof FeFsTable)) {
+      analyzer.addWarning("Table hints only supported for Hdfs/Kudu tables.");
+      return false;
+    }
+    return true;
+  }
+
   private void addHintWarning(PlanHint hint, Analyzer analyzer) {
     Preconditions.checkState(getAliases() != null && getAliases().length > 0);
     analyzer.addWarning("Table hint not recognized for table " + 
getUniqueAlias() +
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index adbc54aae..51347d507 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -354,6 +354,7 @@ public class HdfsScanNode extends ScanNode {
     sampleParams_ = hdfsTblRef.getSampleParams();
     replicaPreference_ = hdfsTblRef.getReplicaPreference();
     randomReplica_ = hdfsTblRef.getRandomReplica();
+    tableNumRowsHint_ = hdfsTblRef.getTableNumRowsHint();
     FeFsTable hdfsTable = (FeFsTable)hdfsTblRef.getTable();
     Preconditions.checkState(tbl_ == hdfsTable);
     isFullAcidTable_ =
@@ -1574,6 +1575,10 @@ public class HdfsScanNode extends ScanNode {
    *
    * Sets these members:
    * numPartitionsWithNumRows_, partitionNumRows_, hasCorruptTableStats_.
+   *
+   * Currently, we provide a table hint 'TABLE_NUM_ROWS' to set table rows 
manually if
+   * table has no stats or has corrupt stats. If the table has valid stats, 
this hint
+   * will be ignored.
    */
   private long getStatsNumRows(TQueryOptions queryOptions) {
     numPartitionsWithNumRows_ = 0;
@@ -1611,7 +1616,7 @@ public class HdfsScanNode extends ScanNode {
     // size of each column of scalar type, and then generate the estimate using
     // sumValues(totalBytesPerFs_), the size of the hdfs table.
     if (!queryOptions.disable_hdfs_num_rows_estimate
-        && (numRows == -1L || hasCorruptTableStats_)) {
+        && (numRows == -1L || hasCorruptTableStats_) && tableNumRowsHint_ == 
-1L) {
       // Compute the estimated table size from those partitions with missing 
or corrupt
       // row count, when taking compression into consideration
       long estimatedTableSize =
@@ -1648,7 +1653,8 @@ public class HdfsScanNode extends ScanNode {
       hasCorruptTableStats_ = true;
     }
 
-    return numRows;
+    // Use hint value if table no stats or stats is corrupt
+    return numRows >= 0 && !hasCorruptTableStats_ ? numRows :tableNumRowsHint_;
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
index 65f8c5a05..b65388ac6 100644
--- a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
@@ -39,6 +39,7 @@ import org.apache.impala.analysis.NumericLiteral;
 import org.apache.impala.analysis.SlotDescriptor;
 import org.apache.impala.analysis.SlotRef;
 import org.apache.impala.analysis.StringLiteral;
+import org.apache.impala.analysis.TableRef;
 import org.apache.impala.analysis.TupleDescriptor;
 import org.apache.impala.catalog.FeKuduTable;
 import org.apache.impala.catalog.KuduColumn;
@@ -128,11 +129,12 @@ public class KuduScanNode extends ScanNode {
   boolean isPointLookupQuery_ = false;
 
   public KuduScanNode(PlanNodeId id, TupleDescriptor desc, List<Expr> 
conjuncts,
-      MultiAggregateInfo aggInfo) {
+      MultiAggregateInfo aggInfo, TableRef kuduTblRef) {
     super(id, desc, "SCAN KUDU");
     kuduTable_ = (FeKuduTable) desc_.getTable();
     conjuncts_ = conjuncts;
     aggInfo_ = aggInfo;
+    tableNumRowsHint_ = kuduTblRef.getTableNumRowsHint();
   }
 
   @Override
@@ -374,8 +376,9 @@ public class KuduScanNode extends ScanNode {
     super.computeStats(analyzer);
     computeNumNodes(analyzer);
 
-    // Update the cardinality
-    inputCardinality_ = cardinality_ = kuduTable_.getNumRows();
+    // Update the cardinality, hint value will be used when table has no stats.
+    inputCardinality_ = cardinality_ =
+        kuduTable_.getNumRows() == -1 ? tableNumRowsHint_ : 
kuduTable_.getNumRows();
     if (isPointLookupQuery_) {
       // Adjust input and output cardinality for point lookup.
       // Planner don't create KuduScanNode for query with closure "limit 0" so
diff --git a/fe/src/main/java/org/apache/impala/planner/ScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/ScanNode.java
index 30f7fee62..d250eb8db 100644
--- a/fe/src/main/java/org/apache/impala/planner/ScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/ScanNode.java
@@ -78,6 +78,9 @@ abstract public class ScanNode extends PlanNode {
   // propagated outside the query block.
   protected ExprSubstitutionMap optimizedAggSmap_;
 
+  // Refer to the comment of 'TableRef.tableNumRowsHint_'
+  protected long tableNumRowsHint_ = -1;
+
   public ScanNode(PlanNodeId id, TupleDescriptor desc, String displayName) {
     super(id, desc.getId().asList(), displayName);
     desc_ = desc;
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java 
b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index 65dcb518a..7939e72c2 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -1889,7 +1889,7 @@ public class SingleNodePlanner {
       return scanNode;
     } else if (tblRef.getTable() instanceof FeKuduTable) {
       scanNode = new KuduScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), 
conjuncts,
-          aggInfo);
+          aggInfo, tblRef);
       scanNode.init(analyzer);
       return scanNode;
     } else {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 15cc8a77b..3f3f799ed 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -2001,7 +2001,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         // Table hints not supported for HBase tables
         AnalyzesOk(String.format("select * from functional_hbase.alltypes %s " 
+
               "%sschedule_random_replica%s", alias, prefix, suffix),
-            "Table hints only supported for Hdfs tables");
+            "Table hints only supported for Hdfs/Kudu tables");
         // Table hints not supported for catalog views
         AnalyzesOk(String.format("select * from functional.alltypes_view %s " +
               "%sschedule_random_replica%s", alias, prefix, suffix),
@@ -4689,7 +4689,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
     testNumberOfMembers(ValuesStmt.class, 0);
 
     // Also check TableRefs.
-    testNumberOfMembers(TableRef.class, 28);
+    testNumberOfMembers(TableRef.class, 30);
     testNumberOfMembers(BaseTableRef.class, 0);
     testNumberOfMembers(InlineViewRef.class, 10);
   }
@@ -5113,4 +5113,35 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
             "l_shipdate <= (select '1998-09-02')",
         "Predicate hint not recognized: ILLEGAL_HINT_TEST3");
   }
+
+  @Test
+  public void testTableCardinalityHintNegative() {
+    // Cannot set cardinality hint with non long type parameter
+    AnalysisError("select * from functional.alltypes /* +TABLE_NUM_ROWS(aa) 
*/",
+        "For input string: \"aa\"");
+    AnalysisError("select * from functional.alltypes /* +TABLE_NUM_ROWS(-1) 
*/",
+        "Syntax error in line 1");
+    AnalysisError("select * from functional.alltypes /* +TABLE_NUM_ROWS(1.0) 
*/",
+        "Syntax error in line 1");
+    // Cannot set cardinality hint with multiple parameters
+    AnalysisError("select * from functional.alltypes /* +TABLE_NUM_ROWS(10, 
20) */",
+        "Syntax error in line 1");
+  }
+
+  @Test
+  public void testTableCardinalityHintPositive() {
+    // Cannot set cardinality hint without parameter
+    AnalyzesOk("select * from functional.alltypes /* +TABLE_NUM_ROWS */",
+        "Table hint not recognized for table functional.alltypes: 
TABLE_NUM_ROWS");
+    // 'TABLE_NUM_ROWS' is only valid for hdfs and kudu table now
+    AnalyzesOk("select * from functional.alltypes /* +TABLE_NUM_ROWS(100) */");
+    AnalyzesOk("select * from functional_kudu.alltypes /* +TABLE_NUM_ROWS(100) 
*/");
+    // Kudu table only support 'TABLE_NUM_ROWS' hint
+    AnalyzesOk("select * from functional_kudu.alltypes /* 
+SCHEDULE_CACHE_LOCAL */",
+        "Kudu table only support 'TABLE_NUM_ROWS' hint.");
+    // Only hdfs and kudu tables can use this hint
+    AnalyzesOk("select * from functional_hbase.alltypes /* 
+TABLE_NUM_ROWS(100) */",
+        "Table hint not recognized for table " +
+            "functional_hbase.alltypes: TABLE_NUM_ROWS(100)");
+  }
 }
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index d040a32d3..1b8bd210e 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -1340,4 +1340,13 @@ public class PlannerTest extends PlannerTestBase {
   public void testOrcStatsAgg() {
     runPlannerTestFile("orc-stats-agg");
   }
+
+  /**
+   * Test new hint of 'TABLE_NUM_ROWS'
+   */
+  @Test
+  public void testTableCardinalityHint() {
+    runPlannerTestFile("table-cardinality-hint",
+        ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY));
+  }
 }
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/table-cardinality-hint.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/table-cardinality-hint.test
new file mode 100644
index 000000000..5a2321e99
--- /dev/null
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/table-cardinality-hint.test
@@ -0,0 +1,43 @@
+# Without table stats, set 'TABLE_NUM_ROWS' hint value and the hint will be 
taken
+SELECT * FROM functional_parquet.alltypes /* +TABLE_NUM_ROWS(10000) */
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+   HDFS partitions=24/24 files=24 size=188.17KB
+   row-size=80B cardinality=10.00K
+====
+# Without table stats, set 'TABLE_NUM_ROWS' hint value and the hint will be 
taken
+SELECT * FROM functional_parquet.alltypes /* +TABLE_NUM_ROWS(0) */
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+   HDFS partitions=24/24 files=24 size=188.17KB
+   row-size=80B cardinality=0
+====
+# With stats for hdfs table, the hint value will be ignored
+SELECT * FROM functional.alltypes /* +TABLE_NUM_ROWS(10000) */
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypes]
+   HDFS partitions=24/24 files=24 size=188.37KB
+   row-size=89B cardinality=7.30K
+====
+# With stats for kudu table, the hint value will be ignored
+SELECT * FROM functional_kudu.alltypes
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN KUDU [functional_kudu.alltypes]
+   row-size=97B cardinality=7.30K
+====
+# With stats for kudu table, the hint value will be ignored
+SELECT * FROM functional_kudu.alltypes /* +TABLE_NUM_ROWS(10000) */
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN KUDU [functional_kudu.alltypes]
+   row-size=97B cardinality=7.30K
+====
\ No newline at end of file

Reply via email to