This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 966561a6ed [improvement and fix](statistics)Load the cache for
external table row count while init table (#23170)
966561a6ed is described below
commit 966561a6ed83433bfadd6b0eadb856506cab25d8
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Aug 24 23:34:16 2023 +0800
[improvement and fix](statistics)Load the cache for external table row
count while init table (#23170)
1. Load the cache for external table row count while init table, this could
avoid no row number stats for the very first time to run an sql.
2. Show cardinality for an external scan node when explain the sql.
3. fix bugs introduced by https://github.com/apache/doris/pull/22963
---
.../apache/doris/analysis/ShowTableStatsStmt.java | 15 +++++++++
.../doris/catalog/external/ExternalTable.java | 2 ++
.../doris/catalog/external/HMSExternalTable.java | 14 ++++++---
.../glue/translator/PhysicalPlanTranslator.java | 4 ++-
.../java/org/apache/doris/qe/ShowExecutor.java | 11 ++++++-
.../apache/doris/statistics/AnalysisManager.java | 16 +++++-----
.../apache/doris/statistics/HMSAnalysisTask.java | 6 +++-
.../hive/test_hive_statistic.groovy | 4 +--
.../hive/test_hive_statistic_cache.groovy | 36 +++++++++++++++++++++-
9 files changed, 91 insertions(+), 17 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index af14be37b5..c5e9e211e4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -150,6 +150,21 @@ public class ShowTableStatsStmt extends ShowStmt {
return new ShowResultSet(getMetaData(), result);
}
+ public ShowResultSet constructResultSet(long rowCount) {
+ List<List<String>> result = Lists.newArrayList();
+ List<String> row = Lists.newArrayList();
+ row.add("");
+ row.add("");
+ row.add(String.valueOf(rowCount));
+ row.add("");
+ row.add("");
+ row.add("");
+ row.add("");
+ row.add("");
+ result.add(row);
+ return new ShowResultSet(getMetaData(), result);
+ }
+
public boolean isCached() {
return cached;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
index f4c76cda7a..0f9ec3f564 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
@@ -72,6 +72,7 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
@SerializedName(value = "lastUpdateTime")
protected long lastUpdateTime;
+ protected long dbId;
protected boolean objectCreated;
protected ExternalCatalog catalog;
protected ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true);
@@ -113,6 +114,7 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
try {
// getDbOrAnalysisException will call makeSureInitialized in
ExternalCatalog.
ExternalDatabase db = catalog.getDbOrAnalysisException(dbName);
+ dbId = db.getId();
db.makeSureInitialized();
} catch (AnalysisException e) {
Util.logAndThrowRuntimeException(LOG, String.format("Exception to
get db %s", dbName), e);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index e75b9859b8..a4c19ecc45 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -149,6 +149,7 @@ public class HMSExternalTable extends ExternalTable {
}
}
objectCreated = true;
+ estimatedRowCount = getRowCountFromExternalSource();
}
}
@@ -272,6 +273,15 @@ public class HMSExternalTable extends ExternalTable {
@Override
public long getRowCount() {
makeSureInitialized();
+ long rowCount = getRowCountFromExternalSource();
+ if (rowCount == -1) {
+ LOG.debug("Will estimate row count from file list.");
+ rowCount = StatisticsUtil.getRowCountFromFileList(this);
+ }
+ return rowCount;
+ }
+
+ private long getRowCountFromExternalSource() {
long rowCount;
switch (dlaType) {
case HIVE:
@@ -284,10 +294,6 @@ public class HMSExternalTable extends ExternalTable {
LOG.warn("getRowCount for dlaType {} is not supported.",
dlaType);
rowCount = -1;
}
- if (rowCount == -1) {
- LOG.debug("Will estimate row count from file list.");
- rowCount = StatisticsUtil.getRowCountFromFileList(this);
- }
return rowCount;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 6613fd71a9..fb57e611d0 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -437,7 +437,9 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
TableRef ref = new TableRef(tableName, null, null);
BaseTableRef tableRef = new BaseTableRef(ref, table, tableName);
tupleDescriptor.setRef(tableRef);
-
+ if (fileScan.getStats() != null) {
+ scanNode.setCardinality((long) fileScan.getStats().getRowCount());
+ }
Utils.execWithUncheckedException(scanNode::init);
context.addScanNode(scanNode);
ScanNode finalScanNode = scanNode;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index 69c3fb69cf..f505089cd7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -2451,7 +2451,16 @@ public class ShowExecutor {
ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
TableIf tableIf = showTableStatsStmt.getTable();
TableStats tableStats =
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId());
- resultSet = showTableStatsStmt.constructResultSet(tableStats);
+ /*
+ HMSExternalTable table will fetch row count from HMS
+ or estimate with file size and schema if it's not analyzed.
+ tableStats == null means it's not analyzed, in this case show the
estimated row count.
+ */
+ if (tableStats == null && tableIf instanceof HMSExternalTable) {
+ resultSet =
showTableStatsStmt.constructResultSet(tableIf.estimatedRowCount());
+ } else {
+ resultSet = showTableStatsStmt.constructResultSet(tableStats);
+ }
}
private void handleShowColumnStats() throws AnalysisException {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 62d27a17c9..5fbfb56829 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -32,6 +32,7 @@ import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.View;
+import org.apache.doris.catalog.external.ExternalTable;
import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
@@ -285,9 +286,7 @@ public class AnalysisManager extends Daemon implements
Writable {
// columnNames null means to add all visitable columns.
// Will get all the visible columns in analyzeTblStmt.check()
AnalyzeTblStmt analyzeTblStmt = new
AnalyzeTblStmt(analyzeProperties, tableName,
- table.getBaseSchema().stream().filter(c ->
!StatisticsUtil.isUnsupportedType(c.getType())).map(
- Column::getName).collect(
- Collectors.toList()), db.getId(), table);
+ null, db.getId(), table);
try {
analyzeTblStmt.check();
} catch (AnalysisException analysisException) {
@@ -334,11 +333,10 @@ public class AnalysisManager extends Daemon implements
Writable {
boolean isSync = stmt.isSync();
Map<Long, BaseAnalysisTask> analysisTaskInfos = new HashMap<>();
+ createTaskForEachColumns(jobInfo, analysisTaskInfos, isSync);
if (stmt.isAllColumns()
&& StatisticsUtil.isExternalTable(jobInfo.catalogName,
jobInfo.dbName, jobInfo.tblName)) {
- createTaskForExternalTable(jobInfo, analysisTaskInfos, isSync);
- } else {
- createTaskForEachColumns(jobInfo, analysisTaskInfos, isSync);
+ createTableLevelTaskForExternalTable(jobInfo, analysisTaskInfos,
isSync);
}
if (isSync) {
syncExecute(analysisTaskInfos.values());
@@ -583,7 +581,7 @@ public class AnalysisManager extends Daemon implements
Writable {
}
@VisibleForTesting
- public void createTaskForExternalTable(AnalysisInfo jobInfo,
+ public void createTableLevelTaskForExternalTable(AnalysisInfo jobInfo,
Map<Long, BaseAnalysisTask> analysisTasks,
boolean isSync) throws DdlException {
@@ -616,6 +614,10 @@ public class AnalysisManager extends Daemon implements
Writable {
public void updateTableStats(AnalysisInfo jobInfo) {
TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogName,
jobInfo.dbName, jobInfo.tblName);
+ // External Table update table stats after table level task finished.
+ if (tbl instanceof ExternalTable) {
+ return;
+ }
// TODO: set updatedRows to 0, when loadedRows of transaction info is
ready.
updateTableStatsStatus(new TableStats(tbl.getId(),
tbl.estimatedRowCount(), jobInfo));
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index e741ee1d2c..512aa9982f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -123,7 +123,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
String rowCount = columnResult.get(0).get(0);
Env.getCurrentEnv().getAnalysisManager()
.updateTableStatsStatus(
- new TableStats(table.getId(),
Long.parseLong(rowCount), null));
+ new TableStats(table.getId(),
Long.parseLong(rowCount), info));
}
/**
@@ -269,6 +269,10 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
@Override
protected void afterExecution() {
+ // Table level task doesn't need to sync any value to sync stats, it
stores the value in metadata.
+ if (isTableLevelTask) {
+ return;
+ }
Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(),
-1, col.getName());
}
}
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
index 0d783c13ad..2366267a27 100644
--- a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
+++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
@@ -234,11 +234,11 @@ suite("test_hive_statistic",
"p2,external,hive,external_remote,external_remote_h
sql """analyze database `statistics` with sync"""
result = sql """show table stats statistics"""
assertTrue(result.size() == 1)
- assertTrue(result[0][0] == "100")
+ assertTrue(result[0][2] == "100")
result = sql """show table cached stats statistics"""
assertTrue(result.size() == 1)
- assertTrue(result[0][0] == "100")
+ assertTrue(result[0][2] == "100")
sql """drop stats statistics"""
result = sql """show column cached stats statistics"""
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_cache.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_cache.groovy
index 02d46d4ece..f98962b9f2 100644
---
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_cache.groovy
+++
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_cache.groovy
@@ -29,6 +29,40 @@ suite("test_hive_statistic_cache",
"p2,external,hive,external_remote,external_re
'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
+ sql """use ${catalog_name}.tpch_1000_parquet"""
+ sql """desc customer""";
+ sql """desc lineitem""";
+ sql """desc region""";
+ sql """desc nation""";
+ sql """desc orders""";
+ sql """desc part""";
+ sql """desc partsupp""";
+ sql """desc supplier""";
+ Thread.sleep(1000);
+ def result = sql """show table cached stats customer"""
+ assertTrue(result[0][2] == "150000000")
+
+ result = sql """show table cached stats lineitem"""
+ assertTrue(result[0][2] == "5999989709")
+
+ result = sql """show table cached stats region"""
+ assertTrue(result[0][2] == "5")
+
+ result = sql """show table cached stats nation"""
+ assertTrue(result[0][2] == "25")
+
+ result = sql """show table cached stats orders"""
+ assertTrue(result[0][2] == "1500000000")
+
+ result = sql """show table cached stats part"""
+ assertTrue(result[0][2] == "200000000")
+
+ result = sql """show table cached stats partsupp"""
+ assertTrue(result[0][2] == "800000000")
+
+ result = sql """show table cached stats supplier"""
+ assertTrue(result[0][2] == "10000000")
+
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
@@ -37,7 +71,7 @@ suite("test_hive_statistic_cache",
"p2,external,hive,external_remote,external_re
sql """analyze table `stats` with sync;"""
sql """select count(*) from stats"""
Thread.sleep(5000);
- def result = sql """show column cached stats `stats` (lo_orderkey)"""
+ result = sql """show column cached stats `stats` (lo_orderkey)"""
assertTrue(result[0][0] == "lo_orderkey")
assertTrue(result[0][1] == "100.0")
assertTrue(result[0][2] == "26.0")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]