This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8cd31f60427f57ca7e95f8767f256acb4947cb2b Author: Jibing-Li <[email protected]> AuthorDate: Mon Jul 10 11:33:06 2023 +0800 [improvement](statistics)Support external table show table stats, modify column stats and drop stats (#21624) Support external table show table stats, modify column stats and drop stats. --- .../doris/analysis/AlterColumnStatsStmt.java | 26 +++------ .../org/apache/doris/analysis/DropStatsStmt.java | 17 ++++++ .../apache/doris/analysis/ShowTableStatsStmt.java | 3 -- .../apache/doris/statistics/AnalysisManager.java | 3 ++ .../apache/doris/statistics/HMSAnalysisTask.java | 29 ++++++++-- .../doris/statistics/StatisticsRepository.java | 12 +++++ .../hive/test_hive_statistic.out | 57 ++++++++++++++++++++ .../hive/test_hive_statistic.groovy | 62 ++++++++++++++++++++++ 8 files changed, 183 insertions(+), 26 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java index 93edefcab1..3e16a380e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java @@ -17,10 +17,9 @@ package org.apache.doris.analysis; -import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.ErrorCode; @@ -28,7 +27,7 @@ import org.apache.doris.common.ErrorReport; import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.UserException; import org.apache.doris.common.util.PrintableMap; -import org.apache.doris.common.util.Util; +import org.apache.doris.datasource.CatalogIf; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.ColumnStatistic; @@ -109,9 +108,6 @@ public class AlterColumnStatsStmt extends DdlStmt { // check table name tableName.analyze(analyzer); - // disallow external catalog - Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName()); - // check partition & column checkColumnNames(); @@ -138,19 +134,11 @@ public class AlterColumnStatsStmt extends DdlStmt { }); } - /** - * TODO(wzt): Support for external tables - */ private void checkColumnNames() throws AnalysisException { - Database db = analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb()); - Table table = db.getTableOrAnalysisException(tableName.getTbl()); - - if (table.getType() != Table.TableType.OLAP) { - throw new AnalysisException("Only OLAP table statistics are supported"); - } - - OlapTable olapTable = (OlapTable) table; - if (olapTable.getColumn(columnName) == null) { + CatalogIf catalog = analyzer.getEnv().getCatalogMgr().getCatalog(tableName.getCtl()); + DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb()); + TableIf table = db.getTableOrAnalysisException(tableName.getTbl()); + if (table.getColumn(columnName) == null) { ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, columnName, FeNameFormat.getColumnNameRegex()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java index e821fecaed..aa80f664dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java @@ -27,6 +27,7 @@ import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.UserException; import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; @@ -51,6 +52,8 @@ public class DropStatsStmt extends DdlStmt { private final TableName tableName; private Set<String> columnNames; + // Flag to drop external table row count in table_statistics. + private boolean dropTableRowCount; private long tblId; @@ -58,6 +61,7 @@ public class DropStatsStmt extends DdlStmt { this.dropExpired = dropExpired; this.tableName = null; this.columnNames = null; + this.dropTableRowCount = false; } public DropStatsStmt(TableName tableName, @@ -65,6 +69,11 @@ public class DropStatsStmt extends DdlStmt { this.tableName = tableName; if (columnNames != null) { this.columnNames = new HashSet<>(columnNames); + this.dropTableRowCount = false; + } else { + // columnNames == null means drop all columns, in this case, + // external table need to drop the table row count as well. + dropTableRowCount = true; } dropExpired = false; } @@ -81,6 +90,10 @@ public class DropStatsStmt extends DdlStmt { } tableName.analyze(analyzer); String catalogName = tableName.getCtl(); + if (InternalCatalog.INTERNAL_CATALOG_NAME.equals(catalogName)) { + // Internal table doesn't need to drop table row count. + dropTableRowCount = false; + } String dbName = tableName.getDb(); String tblName = tableName.getTbl(); CatalogIf catalog = analyzer.getEnv().getCatalogMgr() @@ -115,6 +128,10 @@ public class DropStatsStmt extends DdlStmt { return columnNames; } + public boolean dropTableRowCount() { + return dropTableRowCount; + } + @Override public String toSql() { StringBuilder sb = new StringBuilder(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 845111d036..e462c8585c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -27,7 +27,6 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.UserException; -import org.apache.doris.common.util.Util; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; @@ -76,8 +75,6 @@ public class ShowTableStatsStmt extends ShowStmt { throw new AnalysisException("Only one partition name could be specified"); } } - // disallow external catalog - Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName()); CatalogIf<DatabaseIf> catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(tableName.getCtl()); if (catalog == null) { ErrorReport.reportAnalysisException("Catalog: {} not exists", tableName.getCtl()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 1b8f2e7e0d..fde1407c11 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -712,6 +712,9 @@ public class AnalysisManager extends Daemon implements Writable { for (String col : cols) { Env.getCurrentEnv().getStatisticsCache().invalidate(tblId, -1L, col); } + if (dropStatsStmt.dropTableRowCount()) { + StatisticsRepository.dropExternalTableStatistics(tblId); + } } public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index a7b45c13cb..4483f738a7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -49,6 +49,25 @@ public class HMSAnalysisTask extends BaseAnalysisTask { public static final String TIMESTAMP = "transient_lastDdlTime"; private static final String ANALYZE_SQL_TABLE_TEMPLATE = "INSERT INTO " + + "${internalDB}.${columnStatTbl}" + + " SELECT " + + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, " + + "${catalogId} AS catalog_id, " + + "${dbId} AS db_id, " + + "${tblId} AS tbl_id, " + + "${idxId} AS idx_id, " + + "'${colId}' AS col_id, " + + "NULL AS part_id, " + + "COUNT(1) AS row_count, " + + "NDV(`${colName}`) AS ndv, " + + "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS null_count, " + + "MIN(`${colName}`) AS min, " + + "MAX(`${colName}`) AS max, " + + "${dataSizeFunction} AS data_size, " + + "NOW() " + + "FROM `${catalogName}`.`${dbName}`.`${tblName}`"; + + private static final String ANALYZE_SQL_PARTITION_TEMPLATE = "INSERT INTO " + "${internalDB}.${columnStatTbl}" + " SELECT " + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, " @@ -104,7 +123,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask { if (isPartitionOnly) { for (String partId : partitionNames) { StringBuilder sb = new StringBuilder(); - sb.append(ANALYZE_TABLE_COUNT_TEMPLATE); + sb.append(ANALYZE_SQL_PARTITION_TEMPLATE); sb.append(" where "); String[] splits = partId.split("/"); for (int i = 0; i < splits.length; i++) { @@ -122,7 +141,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask { StatisticsRepository.persistTableStats(params); } } else { - Map<String, String> params = buildTableStatsParams("NULL"); + Map<String, String> params = buildTableStatsParams(null); List<InternalQueryResult.ResultRow> columnResult = StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) .replace(ANALYZE_TABLE_COUNT_TEMPLATE)); @@ -226,8 +245,11 @@ public class HMSAnalysisTask extends BaseAnalysisTask { private Map<String, String> buildTableStatsParams(String partId) { Map<String, String> commonParams = new HashMap<>(); String id = StatisticsUtil.constructId(tbl.getId(), -1); - if (!partId.equals("NULL")) { + if (partId == null) { + commonParams.put("partId", "NULL"); + } else { id = StatisticsUtil.constructId(id, partId); + commonParams.put("partId", "\'" + partId + "\'"); } commonParams.put("id", id); commonParams.put("catalogId", String.valueOf(catalog.getId())); @@ -235,7 +257,6 @@ public class HMSAnalysisTask extends BaseAnalysisTask { commonParams.put("tblId", String.valueOf(tbl.getId())); commonParams.put("indexId", "-1"); commonParams.put("idxId", "-1"); - commonParams.put("partId", "\'" + partId + "\'"); commonParams.put("catalogName", catalog.getName()); commonParams.put("dbName", db.getFullName()); commonParams.put("tblName", tbl.getName()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index ced8e1e6a6..70c495f29b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -202,6 +202,18 @@ public class StatisticsRepository { dropStatisticsByColName(tblId, colNames, StatisticConstants.HISTOGRAM_TBL_NAME); } + public static void dropExternalTableStatistics(long tblId) throws DdlException { + Map<String, String> params = new HashMap<>(); + String inPredicate = String.format("tbl_id = %s", tblId); + params.put("tblName", StatisticConstants.ANALYSIS_TBL_NAME); + params.put("condition", inPredicate); + try { + StatisticsUtil.execUpdate(new StringSubstitutor(params).replace(DROP_TABLE_STATISTICS_TEMPLATE)); + } catch (Exception e) { + throw new DdlException(e.getMessage(), e); + } + } + public static void dropStatisticsByColName(long tblId, Set<String> colNames, String statsTblName) throws DdlException { Map<String, String> params = new HashMap<>(); diff --git a/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out b/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out new file mode 100644 index 0000000000..ad8b494c00 --- /dev/null +++ b/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out @@ -0,0 +1,57 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +lo_quantity 100.0 46.0 0.0 404.0 4.0 1 50 + +-- !2 -- +lo_orderkey 100.0 26.0 0.0 404.0 4.0 1 98 + +-- !3 -- +lo_linenumber 100.0 7.0 0.0 404.0 4.0 1 7 + +-- !4 -- +lo_custkey 100.0 26.0 0.0 404.0 4.0 67423 2735521 + +-- !5 -- +lo_partkey 100.0 100.0 0.0 404.0 4.0 2250 989601 + +-- !6 -- +lo_suppkey 100.0 100.0 0.0 404.0 4.0 4167 195845 + +-- !7 -- +lo_orderdate 100.0 26.0 0.0 404.0 4.0 19920221 19980721 + +-- !8 -- +lo_orderpriority 100.0 5.0 0.0 888.8000000000001 8.8 '1-URGENT' '5-LOW' + +-- !9 -- +lo_shippriority 100.0 1.0 0.0 404.0 4.0 0 0 + +-- !10 -- +lo_extendedprice 100.0 100.0 0.0 404.0 4.0 104300 9066094 + +-- !11 -- +lo_ordtotalprice 100.0 26.0 0.0 404.0 4.0 3428256 36771805 + +-- !12 -- +lo_discount 100.0 11.0 0.0 404.0 4.0 0 10 + +-- !13 -- +lo_revenue 100.0 100.0 0.0 404.0 4.0 101171 8703450 + +-- !14 -- +lo_supplycost 100.0 100.0 0.0 404.0 4.0 58023 121374 + +-- !15 -- +lo_tax 100.0 9.0 0.0 404.0 4.0 0 8 + +-- !16 -- +lo_commitdate 100.0 95.0 0.0 404.0 4.0 19920515 19981016 + +-- !17 -- +lo_shipmode 100.0 7.0 0.0 425.21 4.21 'AIR' 'TRUCK' + +-- !18 -- +lo_shipmode 6001215.0 0.0 0.0 0.0 0.0 'NULL' 'NULL' + +-- !19 -- + diff --git a/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy b/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy new file mode 100644 index 0000000000..ff184e4506 --- /dev/null +++ b/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_statistic", "p2") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "test_hive_statistic" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """use statistics;""" + sql """analyze table `statistics` with sync""" + qt_1 "show column stats `statistics` (lo_quantity)" + qt_2 "show column stats `statistics` (lo_orderkey)" + qt_3 "show column stats `statistics` (lo_linenumber)" + qt_4 "show column stats `statistics` (lo_custkey)" + qt_5 "show column stats `statistics` (lo_partkey)" + qt_6 "show column stats `statistics` (lo_suppkey)" + qt_7 "show column stats `statistics` (lo_orderdate)" + qt_8 "show column stats `statistics` (lo_orderpriority)" + qt_9 "show column stats `statistics` (lo_shippriority)" + qt_10 "show column stats `statistics` (lo_extendedprice)" + qt_11 "show column stats `statistics` (lo_ordtotalprice)" + qt_12 "show column stats `statistics` (lo_discount)" + qt_13 "show column stats `statistics` (lo_revenue)" + qt_14 "show column stats `statistics` (lo_supplycost)" + qt_15 "show column stats `statistics` (lo_tax)" + qt_16 "show column stats `statistics` (lo_commitdate)" + qt_17 "show column stats `statistics` (lo_shipmode)" + + sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')""" + qt_18 "show column stats `statistics` (lo_shipmode)" + + sql """drop stats statistics""" + qt_19 "show column stats statistics" + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
