This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new b296567a3 IMPALA-7942 (part 1): Add query hints for table cardinalities
b296567a3 is described below
commit b296567a32c8f678549fe7e40ea87d7669f81a9e
Author: skyyws <[email protected]>
AuthorDate: Wed Aug 10 15:45:53 2022 +0800
IMPALA-7942 (part 1): Add query hints for table cardinalities
Currently, we run 'COMPUTE STATS' command to compute table stats
which is very useful for query planning. Without these stats, a
query plan may not be optimal. However, these stats may not be
available, up to date, or valid. To workaround this problem,
this patch adds a new query hint: 'TABLE_NUM_ROWS', We can use
this new hint after a hdfs or kudu table in query like this:
* select col from t /* +TABLE_NUM_ROWS(1000) */;
If set, Impala will use this value as table scanned rows when
table no stats or has corrput stats. This hint value will not
valid if table stats is normal.
Testing:
- Added new fe test in 'PlannerTest'
- Added new fe test in 'AnalyzeStmtsTest' for negative cases
Change-Id: I9f0c773f4e67782a1428db64062f68afbd257af7
Reviewed-on: http://gerrit.cloudera.org:8080/18829
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../java/org/apache/impala/analysis/TableRef.java | 45 +++++++++++++++++++++-
.../org/apache/impala/planner/HdfsScanNode.java | 10 ++++-
.../org/apache/impala/planner/KuduScanNode.java | 9 +++--
.../java/org/apache/impala/planner/ScanNode.java | 3 ++
.../apache/impala/planner/SingleNodePlanner.java | 2 +-
.../apache/impala/analysis/AnalyzeStmtsTest.java | 35 ++++++++++++++++-
.../org/apache/impala/planner/PlannerTest.java | 9 +++++
.../PlannerTest/table-cardinality-hint.test | 43 +++++++++++++++++++++
8 files changed, 146 insertions(+), 10 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableRef.java
b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
index ad6df20c0..64c1f7050 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
@@ -33,6 +33,7 @@ import org.apache.impala.authorization.Privilege;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.FeFsTable;
import org.apache.impala.catalog.FeIcebergTable;
+import org.apache.impala.catalog.FeKuduTable;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.planner.JoinNode.DistributionMode;
@@ -170,6 +171,13 @@ public class TableRef extends StmtNode {
// rewrite.
private boolean isHidden_ = false;
+ // Value of query hint 'TABLE_NUM_ROWS' on this table. Used in constructing
ScanNode if
+ // the table does not have stats, or has corrupt stats. -1 indicates no
hint. Currently,
+ // this hint is valid for hdfs and kudu table.
+ private long tableNumRowsHint_ = -1;
+ // Table row hint name used in sql.
+ private static final String TABLE_ROW_HINT = "TABLE_NUM_ROWS";
+
/**
* Returns a new, resolved, and analyzed table ref.
*/
@@ -309,6 +317,10 @@ public class TableRef extends StmtNode {
return convertLimitToSampleHintPercent_;
}
+ public long getTableNumRowsHint() {
+ return tableNumRowsHint_;
+ }
+
/**
* Returns true if this table ref has a resolved path that is rooted at a
registered
* tuple descriptor, false otherwise.
@@ -499,9 +511,10 @@ public class TableRef extends StmtNode {
}
// BaseTableRef will always have their path resolved at this point.
Preconditions.checkState(getResolvedPath() != null);
+ boolean isTableHintSupported = true;
if (getResolvedPath().destTable() != null &&
- !(getResolvedPath().destTable() instanceof FeFsTable)) {
- analyzer.addWarning("Table hints only supported for Hdfs tables");
+ !supportTableHint(getResolvedPath().destTable(), analyzer)) {
+ isTableHintSupported = false;
}
for (PlanHint hint: tableHints_) {
if (hint.is("SCHEDULE_CACHE_LOCAL")) {
@@ -534,12 +547,40 @@ public class TableRef extends StmtNode {
addHintWarning(hint, analyzer);
}
}
+ } else if (hint.is(TABLE_ROW_HINT)) {
+ List<String> args = hint.getArgs();
+ if (args == null || args.size() != 1 || !isTableHintSupported) {
+ addHintWarning(hint, analyzer);
+ return;
+ }
+ analyzer.setHasPlanHints();
+ tableNumRowsHint_ = Long.parseLong(args.get(0));
} else {
addHintWarning(hint, analyzer);
}
}
}
+ /**
+ * Returns whether the table supports hint. Currently, hdfs table support
table hint
+ * usage, kudu table only support {@link this#TABLE_ROW_HINT} hint.
+ *
+ * TODO: Add other table hints for kudu table.
+ */
+ private boolean supportTableHint(FeTable table, Analyzer analyzer) {
+ if (table instanceof FeKuduTable) {
+ if (!(tableHints_.size() == 1 && tableHints_.get(0).is(TABLE_ROW_HINT)))
{
+ analyzer.addWarning(String.format("Kudu table only support '%s' hint.",
+ TABLE_ROW_HINT));
+ return false;
+ }
+ } else if (!(table instanceof FeFsTable)) {
+ analyzer.addWarning("Table hints only supported for Hdfs/Kudu tables.");
+ return false;
+ }
+ return true;
+ }
+
private void addHintWarning(PlanHint hint, Analyzer analyzer) {
Preconditions.checkState(getAliases() != null && getAliases().length > 0);
analyzer.addWarning("Table hint not recognized for table " +
getUniqueAlias() +
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index adbc54aae..51347d507 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -354,6 +354,7 @@ public class HdfsScanNode extends ScanNode {
sampleParams_ = hdfsTblRef.getSampleParams();
replicaPreference_ = hdfsTblRef.getReplicaPreference();
randomReplica_ = hdfsTblRef.getRandomReplica();
+ tableNumRowsHint_ = hdfsTblRef.getTableNumRowsHint();
FeFsTable hdfsTable = (FeFsTable)hdfsTblRef.getTable();
Preconditions.checkState(tbl_ == hdfsTable);
isFullAcidTable_ =
@@ -1574,6 +1575,10 @@ public class HdfsScanNode extends ScanNode {
*
* Sets these members:
* numPartitionsWithNumRows_, partitionNumRows_, hasCorruptTableStats_.
+ *
+ * Currently, we provide a table hint 'TABLE_NUM_ROWS' to set table rows
manually if
+ * table has no stats or has corrupt stats. If the table has valid stats,
this hint
+ * will be ignored.
*/
private long getStatsNumRows(TQueryOptions queryOptions) {
numPartitionsWithNumRows_ = 0;
@@ -1611,7 +1616,7 @@ public class HdfsScanNode extends ScanNode {
// size of each column of scalar type, and then generate the estimate using
// sumValues(totalBytesPerFs_), the size of the hdfs table.
if (!queryOptions.disable_hdfs_num_rows_estimate
- && (numRows == -1L || hasCorruptTableStats_)) {
+ && (numRows == -1L || hasCorruptTableStats_) && tableNumRowsHint_ ==
-1L) {
// Compute the estimated table size from those partitions with missing
or corrupt
// row count, when taking compression into consideration
long estimatedTableSize =
@@ -1648,7 +1653,8 @@ public class HdfsScanNode extends ScanNode {
hasCorruptTableStats_ = true;
}
- return numRows;
+ // Use hint value if table no stats or stats is corrupt
+ return numRows >= 0 && !hasCorruptTableStats_ ? numRows :tableNumRowsHint_;
}
/**
diff --git a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
index 65f8c5a05..b65388ac6 100644
--- a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
@@ -39,6 +39,7 @@ import org.apache.impala.analysis.NumericLiteral;
import org.apache.impala.analysis.SlotDescriptor;
import org.apache.impala.analysis.SlotRef;
import org.apache.impala.analysis.StringLiteral;
+import org.apache.impala.analysis.TableRef;
import org.apache.impala.analysis.TupleDescriptor;
import org.apache.impala.catalog.FeKuduTable;
import org.apache.impala.catalog.KuduColumn;
@@ -128,11 +129,12 @@ public class KuduScanNode extends ScanNode {
boolean isPointLookupQuery_ = false;
public KuduScanNode(PlanNodeId id, TupleDescriptor desc, List<Expr>
conjuncts,
- MultiAggregateInfo aggInfo) {
+ MultiAggregateInfo aggInfo, TableRef kuduTblRef) {
super(id, desc, "SCAN KUDU");
kuduTable_ = (FeKuduTable) desc_.getTable();
conjuncts_ = conjuncts;
aggInfo_ = aggInfo;
+ tableNumRowsHint_ = kuduTblRef.getTableNumRowsHint();
}
@Override
@@ -374,8 +376,9 @@ public class KuduScanNode extends ScanNode {
super.computeStats(analyzer);
computeNumNodes(analyzer);
- // Update the cardinality
- inputCardinality_ = cardinality_ = kuduTable_.getNumRows();
+ // Update the cardinality, hint value will be used when table has no stats.
+ inputCardinality_ = cardinality_ =
+ kuduTable_.getNumRows() == -1 ? tableNumRowsHint_ :
kuduTable_.getNumRows();
if (isPointLookupQuery_) {
// Adjust input and output cardinality for point lookup.
// Planner don't create KuduScanNode for query with closure "limit 0" so
diff --git a/fe/src/main/java/org/apache/impala/planner/ScanNode.java
b/fe/src/main/java/org/apache/impala/planner/ScanNode.java
index 30f7fee62..d250eb8db 100644
--- a/fe/src/main/java/org/apache/impala/planner/ScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/ScanNode.java
@@ -78,6 +78,9 @@ abstract public class ScanNode extends PlanNode {
// propagated outside the query block.
protected ExprSubstitutionMap optimizedAggSmap_;
+ // Refer to the comment of 'TableRef.tableNumRowsHint_'
+ protected long tableNumRowsHint_ = -1;
+
public ScanNode(PlanNodeId id, TupleDescriptor desc, String displayName) {
super(id, desc.getId().asList(), displayName);
desc_ = desc;
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index 65dcb518a..7939e72c2 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -1889,7 +1889,7 @@ public class SingleNodePlanner {
return scanNode;
} else if (tblRef.getTable() instanceof FeKuduTable) {
scanNode = new KuduScanNode(ctx_.getNextNodeId(), tblRef.getDesc(),
conjuncts,
- aggInfo);
+ aggInfo, tblRef);
scanNode.init(analyzer);
return scanNode;
} else {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 15cc8a77b..3f3f799ed 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -2001,7 +2001,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
// Table hints not supported for HBase tables
AnalyzesOk(String.format("select * from functional_hbase.alltypes %s "
+
"%sschedule_random_replica%s", alias, prefix, suffix),
- "Table hints only supported for Hdfs tables");
+ "Table hints only supported for Hdfs/Kudu tables");
// Table hints not supported for catalog views
AnalyzesOk(String.format("select * from functional.alltypes_view %s " +
"%sschedule_random_replica%s", alias, prefix, suffix),
@@ -4689,7 +4689,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
testNumberOfMembers(ValuesStmt.class, 0);
// Also check TableRefs.
- testNumberOfMembers(TableRef.class, 28);
+ testNumberOfMembers(TableRef.class, 30);
testNumberOfMembers(BaseTableRef.class, 0);
testNumberOfMembers(InlineViewRef.class, 10);
}
@@ -5113,4 +5113,35 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
"l_shipdate <= (select '1998-09-02')",
"Predicate hint not recognized: ILLEGAL_HINT_TEST3");
}
+
+ @Test
+ public void testTableCardinalityHintNegative() {
+ // Cannot set cardinality hint with non long type parameter
+ AnalysisError("select * from functional.alltypes /* +TABLE_NUM_ROWS(aa)
*/",
+ "For input string: \"aa\"");
+ AnalysisError("select * from functional.alltypes /* +TABLE_NUM_ROWS(-1)
*/",
+ "Syntax error in line 1");
+ AnalysisError("select * from functional.alltypes /* +TABLE_NUM_ROWS(1.0)
*/",
+ "Syntax error in line 1");
+ // Cannot set cardinality hint with multiple parameters
+ AnalysisError("select * from functional.alltypes /* +TABLE_NUM_ROWS(10,
20) */",
+ "Syntax error in line 1");
+ }
+
+ @Test
+ public void testTableCardinalityHintPositive() {
+ // Cannot set cardinality hint without parameter
+ AnalyzesOk("select * from functional.alltypes /* +TABLE_NUM_ROWS */",
+ "Table hint not recognized for table functional.alltypes:
TABLE_NUM_ROWS");
+ // 'TABLE_NUM_ROWS' is only valid for hdfs and kudu table now
+ AnalyzesOk("select * from functional.alltypes /* +TABLE_NUM_ROWS(100) */");
+ AnalyzesOk("select * from functional_kudu.alltypes /* +TABLE_NUM_ROWS(100)
*/");
+ // Kudu table only support 'TABLE_NUM_ROWS' hint
+ AnalyzesOk("select * from functional_kudu.alltypes /*
+SCHEDULE_CACHE_LOCAL */",
+ "Kudu table only support 'TABLE_NUM_ROWS' hint.");
+ // Only hdfs and kudu tables can use this hint
+ AnalyzesOk("select * from functional_hbase.alltypes /*
+TABLE_NUM_ROWS(100) */",
+ "Table hint not recognized for table " +
+ "functional_hbase.alltypes: TABLE_NUM_ROWS(100)");
+ }
}
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index d040a32d3..1b8bd210e 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -1340,4 +1340,13 @@ public class PlannerTest extends PlannerTestBase {
public void testOrcStatsAgg() {
runPlannerTestFile("orc-stats-agg");
}
+
+ /**
+ * Test new hint of 'TABLE_NUM_ROWS'
+ */
+ @Test
+ public void testTableCardinalityHint() {
+ runPlannerTestFile("table-cardinality-hint",
+ ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY));
+ }
}
diff --git
a/testdata/workloads/functional-planner/queries/PlannerTest/table-cardinality-hint.test
b/testdata/workloads/functional-planner/queries/PlannerTest/table-cardinality-hint.test
new file mode 100644
index 000000000..5a2321e99
--- /dev/null
+++
b/testdata/workloads/functional-planner/queries/PlannerTest/table-cardinality-hint.test
@@ -0,0 +1,43 @@
+# Without table stats, set 'TABLE_NUM_ROWS' hint value and the hint will be
taken
+SELECT * FROM functional_parquet.alltypes /* +TABLE_NUM_ROWS(10000) */
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+ HDFS partitions=24/24 files=24 size=188.17KB
+ row-size=80B cardinality=10.00K
+====
+# Without table stats, set 'TABLE_NUM_ROWS' hint value and the hint will be
taken
+SELECT * FROM functional_parquet.alltypes /* +TABLE_NUM_ROWS(0) */
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+ HDFS partitions=24/24 files=24 size=188.17KB
+ row-size=80B cardinality=0
+====
+# With stats for hdfs table, the hint value will be ignored
+SELECT * FROM functional.alltypes /* +TABLE_NUM_ROWS(10000) */
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypes]
+ HDFS partitions=24/24 files=24 size=188.37KB
+ row-size=89B cardinality=7.30K
+====
+# With stats for kudu table, the hint value will be ignored
+SELECT * FROM functional_kudu.alltypes
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN KUDU [functional_kudu.alltypes]
+ row-size=97B cardinality=7.30K
+====
+# With stats for kudu table, the hint value will be ignored
+SELECT * FROM functional_kudu.alltypes /* +TABLE_NUM_ROWS(10000) */
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN KUDU [functional_kudu.alltypes]
+ row-size=97B cardinality=7.30K
+====
\ No newline at end of file