This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new db9c74c38f8 [improvement](statistics)Drop expired external stats only
when the catalog is dropped. (#42244) (#42410)
db9c74c38f8 is described below
commit db9c74c38f883362bc64715a1eb606b7d10c25be
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Oct 24 23:47:53 2024 +0800
[improvement](statistics)Drop expired external stats only when the catalog
is dropped. (#42244) (#42410)
backport: https://github.com/apache/doris/pull/42244
---
.../apache/doris/statistics/StatisticsCleaner.java | 16 +++--
.../statistics/test_drop_expired_stats.groovy | 76 ++++++++++++++++++++++
2 files changed, 88 insertions(+), 4 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
index 2dbdca39b58..a61d08f7d8f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
@@ -183,10 +183,8 @@ public class StatisticsCleaner extends MasterDaemon {
private Map<Long, DatabaseIf> constructDbMap() {
Map<Long, DatabaseIf> idToDb = Maps.newHashMap();
- for (CatalogIf<? extends DatabaseIf> ctl : idToCatalog.values()) {
- for (DatabaseIf db : ctl.getAllDbs()) {
- idToDb.put(db.getId(), db);
- }
+ for (DatabaseIf db :
Env.getCurrentEnv().getInternalCatalog().getAllDbs()) {
+ idToDb.put(db.getId(), db);
}
return idToDb;
}
@@ -272,6 +270,16 @@ public class StatisticsCleaner extends MasterDaemon {
expiredStats.expiredCatalog.add(catalogId);
continue;
}
+ // Skip check external DBs and tables to avoid fetch too
much metadata.
+ // Remove expired external table stats only when the
external catalog is dropped.
+ // TODO: Need to check external database and table exist
or not. But for now, we only check catalog.
+ // Because column_statistics table only keep table id and
db id.
+ // But meta data doesn't always cache all external tables'
ids.
+ // So we may fail to find the external table only by id.
Need to use db name and table name instead.
+ // Have to store db name and table name in
column_statistics in the future.
+ if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID) {
+ continue;
+ }
long dbId = statsId.dbId;
if (!idToDb.containsKey(dbId)) {
expiredStats.expiredDatabase.add(dbId);
diff --git a/regression-test/suites/statistics/test_drop_expired_stats.groovy
b/regression-test/suites/statistics/test_drop_expired_stats.groovy
new file mode 100644
index 00000000000..23067f670b5
--- /dev/null
+++ b/regression-test/suites/statistics/test_drop_expired_stats.groovy
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_drop_expired_stats") {
+
+ sql """drop database if exists test_drop_expired_stats"""
+ sql """create database test_drop_expired_stats"""
+ sql """use test_drop_expired_stats"""
+ sql """set global enable_auto_analyze=false"""
+
+ sql """CREATE TABLE table1 (
+ key1 bigint NOT NULL,
+ key2 bigint NOT NULL,
+ value1 int NOT NULL,
+ value2 int NOT NULL,
+ value3 int NOT NULL
+ )ENGINE=OLAP
+ DUPLICATE KEY(`key1`, `key2`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY HASH(`key1`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ )
+ """
+
+ sql """CREATE TABLE table2 (
+ key1 bigint NOT NULL,
+ key2 bigint NOT NULL,
+ value1 int NOT NULL
+ )ENGINE=OLAP
+ DUPLICATE KEY(`key1`, `key2`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY HASH(`key1`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ )
+ """
+
+ def id1 = getTableId("test_drop_expired_stats", "table1")
+ def id2 = getTableId("test_drop_expired_stats", "table2")
+
+ sql """analyze table table1 with sync"""
+ sql """analyze table table2 with sync"""
+ def result = sql """select * from __internal_schema.column_statistics
where tbl_id = ${id1}"""
+ assertEquals(5, result.size())
+ result = sql """select * from __internal_schema.column_statistics where
tbl_id = ${id2}"""
+ assertEquals(3, result.size())
+ sql """drop table table1"""
+ sql """drop expired stats"""
+ result = sql """select * from __internal_schema.column_statistics where
tbl_id = ${id1}"""
+ assertEquals(0, result.size())
+ result = sql """select * from __internal_schema.column_statistics where
tbl_id = ${id2}"""
+ assertEquals(3, result.size())
+
+ sql """drop database if exists test_drop_expired_stats"""
+ sql """drop expired stats"""
+ result = sql """select * from __internal_schema.column_statistics where
tbl_id = ${id1}"""
+ assertEquals(0, result.size())
+ result = sql """select * from __internal_schema.column_statistics where
tbl_id = ${id2}"""
+ assertEquals(0, result.size())
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]