This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5ccc875824 [fix](recycle) refactor the logic of erase meta with same
name (#14551)
5ccc875824 is described below
commit 5ccc87582461c2e97c81727cf507c7199052b2f9
Author: Mingyu Chen <[email protected]>
AuthorDate: Fri Nov 25 09:47:24 2022 +0800
[fix](recycle) refactor the logic of erase meta with same name (#14551)
in #14482, we implement the feature to keep specific number of meta with
same name in catalog recycle bin.
But it will cause meta replay bug.
Because every time we drop db/table/partition, it will try to erase a
certain number of meta with same name.
And when replay "drop" edit log, it will do same thing. But the number of
meta to erase it based on current config value,
not persist in edit log, so it will cause inconsistency with "drop" and
"replay drop".
In this PR, I move the "erase meta with same name" logic to the daemon
thread of catalog recycle bin.
---
docs/en/docs/admin-manual/config/fe-config.md | 2 +-
docs/zh-CN/docs/admin-manual/config/fe-config.md | 2 +-
.../apache/doris/catalog/CatalogRecycleBin.java | 156 +++++++++++++--------
.../main/java/org/apache/doris/common/Config.java | 2 +
4 files changed, 105 insertions(+), 57 deletions(-)
diff --git a/docs/en/docs/admin-manual/config/fe-config.md
b/docs/en/docs/admin-manual/config/fe-config.md
index 2b0d8f8510..ab7215a561 100644
--- a/docs/en/docs/admin-manual/config/fe-config.md
+++ b/docs/en/docs/admin-manual/config/fe-config.md
@@ -2338,7 +2338,7 @@ Is it a configuration item unique to the Master FE node:
false
### `max_same_name_catalog_trash_num`
-It is used to set the maximum number of meta information with the same name in
the catalog recycle bin. When the maximum value is exceeded, the earliest
deleted meta trash will be completely deleted and cannot be recovered.
+It is used to set the maximum number of meta information with the same name in
the catalog recycle bin. When the maximum value is exceeded, the earliest
deleted meta trash will be completely deleted and cannot be recovered. 0 means
not to keep objects of the same name. < 0 means no limit.
Note: The judgment of metadata with the same name will be limited to a certain
range. For example, the judgment of the database with the same name will be
limited to the same cluster, the judgment of the table with the same name will
be limited to the same database (with the same database id), the judgment of
the partition with the same name will be limited to the same database (with the
same database id) and the same table (with the same table) same table id).
diff --git a/docs/zh-CN/docs/admin-manual/config/fe-config.md
b/docs/zh-CN/docs/admin-manual/config/fe-config.md
index d12d0f2ac8..05fd33e1d1 100644
--- a/docs/zh-CN/docs/admin-manual/config/fe-config.md
+++ b/docs/zh-CN/docs/admin-manual/config/fe-config.md
@@ -2394,7 +2394,7 @@ hive partition 的最大缓存数量。
### `max_same_name_catalog_trash_num`
-用于设置回收站中同名元数据的最大个数,超过最大值时,最早删除的元数据将被彻底删除,不能再恢复。
+用于设置回收站中同名元数据的最大个数,超过最大值时,最早删除的元数据将被彻底删除,不能再恢复。0 表示不保留同名对象。< 0 表示不做限制。
注意:同名元数据的判断会局限在一定的范围内。比如同名database的判断会限定在相同cluster下,同名table的判断会限定在相同database(指相同database
id)下,同名partition的判断会限定在相同database(指相同database id)并且相同table(指相同table id)下。
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
index 842de6103f..011df7eb15 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
@@ -32,10 +32,12 @@ import org.apache.doris.thrift.TStorageMedium;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
+import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Range;
import com.google.common.collect.Sets;
+import com.google.common.collect.Table.Cell;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -131,11 +133,6 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
// db should be empty. all tables are recycled before
Preconditions.checkState(db.getTables().isEmpty());
- // erase db with same name
- if (Config.max_same_name_catalog_trash_num > 0) {
- eraseDatabaseWithSameName(db.getFullName(), isReplay,
Config.max_same_name_catalog_trash_num);
- }
-
// recycle db
RecycleDatabaseInfo databaseInfo = new RecycleDatabaseInfo(db,
tableNames, tableIds);
idToDatabase.put(db.getId(), databaseInfo);
@@ -156,11 +153,6 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
return false;
}
- // erase table with same name
- if (Config.max_same_name_catalog_trash_num > 0) {
- eraseTableWithSameName(dbId, table.getName(), isReplay,
Config.max_same_name_catalog_trash_num);
- }
-
// recycle table
RecycleTableInfo tableInfo = new RecycleTableInfo(dbId, table);
if (!isReplay || replayRecycleTime == 0) {
@@ -183,11 +175,6 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
return false;
}
- // erase partition with same name
- if (Config.max_same_name_catalog_trash_num > 0) {
- erasePartitionWithSameName(dbId, tableId, partition.getName(),
Config.max_same_name_catalog_trash_num);
- }
-
// recycle partition
RecyclePartitionInfo partitionInfo = new RecyclePartitionInfo(dbId,
tableId, partition,
range, listPartitionItem, dataProperty, replicaAlloc,
isInMemory);
@@ -210,7 +197,8 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
return latency > minEraseLatency && latency >
Config.catalog_trash_expire_second * 1000L;
}
- private synchronized void eraseDatabase(long currentTimeMs) {
+ private synchronized void eraseDatabase(long currentTimeMs, int keepNum) {
+ // 1. erase expired database
Iterator<Map.Entry<Long, RecycleDatabaseInfo>> dbIter =
idToDatabase.entrySet().iterator();
while (dbIter.hasNext()) {
Map.Entry<Long, RecycleDatabaseInfo> entry = dbIter.next();
@@ -224,6 +212,15 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
LOG.info("erase db[{}]", db.getId());
}
}
+ // 2. erase exceed number
+ if (keepNum < 0) {
+ return;
+ }
+ Set<String> dbNames = idToDatabase.values().stream().map(d ->
d.getDb().getFullName())
+ .collect(Collectors.toSet());
+ for (String dbName : dbNames) {
+ eraseDatabaseWithSameName(dbName, currentTimeMs, keepNum);
+ }
}
private synchronized List<Long> getSameNameDbIdListToErase(String dbName,
int maxSameNameTrashNum) {
@@ -242,34 +239,39 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
}
}
List<Long> dbIdToErase = Lists.newArrayList();
- if (dbRecycleTimeLists.size() < maxSameNameTrashNum) {
+ if (dbRecycleTimeLists.size() <= maxSameNameTrashNum) {
return dbIdToErase;
}
// order by recycle time desc
- dbRecycleTimeLists.sort((x, y) -> {
- return (x.get(1).longValue() < y.get(1).longValue()) ? 1 :
((x.get(1).equals(y.get(1))) ? 0 : -1);
- });
+ dbRecycleTimeLists.sort((x, y) ->
+ (x.get(1).longValue() < y.get(1).longValue()) ? 1 :
((x.get(1).equals(y.get(1))) ? 0 : -1));
- for (int i = maxSameNameTrashNum - 1; i < dbRecycleTimeLists.size();
i++) {
+ for (int i = maxSameNameTrashNum; i < dbRecycleTimeLists.size(); i++) {
dbIdToErase.add(dbRecycleTimeLists.get(i).get(0));
}
return dbIdToErase;
}
- private synchronized void eraseDatabaseWithSameName(String dbName, boolean
isReplay, int maxSameNameTrashNum) {
+ private synchronized void eraseDatabaseWithSameName(String dbName, long
currentTimeMs, int maxSameNameTrashNum) {
List<Long> dbIdToErase = getSameNameDbIdListToErase(dbName,
maxSameNameTrashNum);
for (Long dbId : dbIdToErase) {
RecycleDatabaseInfo dbInfo = idToDatabase.get(dbId);
- eraseAllTables(dbInfo, isReplay);
+ if (!isExpireMinLatency(dbId, currentTimeMs)) {
+ continue;
+ }
+ eraseAllTables(dbInfo);
idToDatabase.remove(dbId);
idToRecycleTime.remove(dbId);
- Env.getCurrentEnv().eraseDatabase(dbId, false);
-
+ Env.getCurrentEnv().eraseDatabase(dbId, true);
LOG.info("erase database[{}] name: {}", dbId, dbName);
}
}
- private void eraseAllTables(RecycleDatabaseInfo dbInfo, boolean isReplay) {
+ private synchronized boolean isExpireMinLatency(long id, long
currentTimeMs) {
+ return (currentTimeMs - idToRecycleTime.get(id)) > minEraseLatency;
+ }
+
+ private void eraseAllTables(RecycleDatabaseInfo dbInfo) {
Database db = dbInfo.getDb();
Set<String> tableNames = Sets.newHashSet(dbInfo.getTableNames());
Set<Long> tableIds = Sets.newHashSet(dbInfo.getTableIds());
@@ -285,12 +287,13 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
Table table = tableInfo.getTable();
if (table.getType() == TableType.OLAP) {
- Env.getCurrentEnv().onEraseOlapTable((OlapTable) table,
isReplay);
+ Env.getCurrentEnv().onEraseOlapTable((OlapTable) table, false);
}
- LOG.info("erase db[{}] with table[{}]: {}", dbId, table.getId(),
table.getName());
iterator.remove();
idToRecycleTime.remove(table.getId());
tableNames.remove(table.getName());
+ Env.getCurrentEnv().getEditLog().logEraseTable(table.getId());
+ LOG.info("erase db[{}] with table[{}]: {}", dbId, table.getId(),
table.getName());
}
}
@@ -301,7 +304,8 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
LOG.info("replay erase db[{}]", dbId);
}
- private synchronized void eraseTable(long currentTimeMs) {
+ private synchronized void eraseTable(long currentTimeMs, int keepNum) {
+ // 1. erase expired tables
Iterator<Map.Entry<Long, RecycleTableInfo>> tableIter =
idToTable.entrySet().iterator();
while (tableIter.hasNext()) {
Map.Entry<Long, RecycleTableInfo> entry = tableIter.next();
@@ -323,6 +327,25 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
LOG.info("erase table[{}]", tableId);
}
} // end for tables
+
+ // 2. erase exceed num
+ if (keepNum < 0) {
+ return;
+ }
+ Map<Long, Set<String>> dbId2TableNames = Maps.newHashMap();
+ for (RecycleTableInfo tableInfo : idToTable.values()) {
+ Set<String> tblNames = dbId2TableNames.get(tableInfo.dbId);
+ if (tblNames == null) {
+ tblNames = Sets.newHashSet();
+ dbId2TableNames.put(tableInfo.dbId, tblNames);
+ }
+ tblNames.add(tableInfo.getTable().getName());
+ }
+ for (Map.Entry<Long, Set<String>> entry : dbId2TableNames.entrySet()) {
+ for (String tblName : entry.getValue()) {
+ eraseTableWithSameName(entry.getKey(), tblName, currentTimeMs,
keepNum);
+ }
+ }
}
private synchronized List<Long> getSameNameTableIdListToErase(long dbId,
String tableName,
@@ -346,32 +369,35 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
}
}
List<Long> tableIdToErase = Lists.newArrayList();
- if (tableRecycleTimeLists.size() < maxSameNameTrashNum) {
+ if (tableRecycleTimeLists.size() <= maxSameNameTrashNum) {
return tableIdToErase;
}
// order by recycle time desc
- tableRecycleTimeLists.sort((x, y) -> {
- return (x.get(1).longValue() < y.get(1).longValue()) ? 1 :
((x.get(1).equals(y.get(1))) ? 0 : -1);
- });
+ tableRecycleTimeLists.sort((x, y) ->
+ (x.get(1).longValue() < y.get(1).longValue()) ? 1 :
((x.get(1).equals(y.get(1))) ? 0 : -1));
- for (int i = maxSameNameTrashNum - 1; i <
tableRecycleTimeLists.size(); i++) {
+ for (int i = maxSameNameTrashNum; i < tableRecycleTimeLists.size();
i++) {
tableIdToErase.add(tableRecycleTimeLists.get(i).get(0));
}
return tableIdToErase;
}
- private synchronized void eraseTableWithSameName(long dbId, String
tableName, boolean isReplay,
- int maxSameNameTrashNum) {
+ private synchronized void eraseTableWithSameName(long dbId, String
tableName, long currentTimeMs,
+ int maxSameNameTrashNum) {
List<Long> tableIdToErase = getSameNameTableIdListToErase(dbId,
tableName, maxSameNameTrashNum);
for (Long tableId : tableIdToErase) {
RecycleTableInfo tableInfo = idToTable.get(tableId);
+ if (!isExpireMinLatency(tableId, currentTimeMs)) {
+ continue;
+ }
Table table = tableInfo.getTable();
if (table.getType() == TableType.OLAP) {
- Env.getCurrentEnv().onEraseOlapTable((OlapTable) table,
isReplay);
+ Env.getCurrentEnv().onEraseOlapTable((OlapTable) table, false);
}
idToTable.remove(tableId);
idToRecycleTime.remove(tableId);
+ Env.getCurrentEnv().getEditLog().logEraseTable(tableId);
LOG.info("erase table[{}] name: {} from db[{}]", tableId,
tableName, dbId);
}
}
@@ -387,7 +413,8 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
LOG.info("replay erase table[{}]", tableId);
}
- private synchronized void erasePartition(long currentTimeMs) {
+ private synchronized void erasePartition(long currentTimeMs, int keepNum) {
+ // 1. erase expired partitions
Iterator<Map.Entry<Long, RecyclePartitionInfo>> iterator =
idToPartition.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<Long, RecyclePartitionInfo> entry = iterator.next();
@@ -400,12 +427,31 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
// erase partition
iterator.remove();
idToRecycleTime.remove(partitionId);
-
// log
Env.getCurrentEnv().getEditLog().logErasePartition(partitionId);
- LOG.info("erase partition[{}]", partitionId);
+ LOG.info("erase partition[{}]. reason: expired", partitionId);
}
} // end for partitions
+
+ // 2. erase exceed number
+ if (keepNum < 0) {
+ return;
+ }
+ com.google.common.collect.Table<Long, Long, Set<String>>
dbTblId2PartitionNames = HashBasedTable.create();
+ for (RecyclePartitionInfo partitionInfo : idToPartition.values()) {
+ Set<String> partitionNames =
dbTblId2PartitionNames.get(partitionInfo.dbId, partitionInfo.tableId);
+ if (partitionNames == null) {
+ partitionNames = Sets.newHashSet();
+ dbTblId2PartitionNames.put(partitionInfo.dbId,
partitionInfo.tableId, partitionNames);
+ }
+ partitionNames.add(partitionInfo.getPartition().getName());
+ }
+ for (Cell<Long, Long, Set<String>> cell :
dbTblId2PartitionNames.cellSet()) {
+ for (String partitionName : cell.getValue()) {
+ erasePartitionWithSameName(cell.getRowKey(),
cell.getColumnKey(), partitionName, currentTimeMs,
+ keepNum);
+ }
+ }
}
private synchronized List<Long> getSameNamePartitionIdListToErase(long
dbId, long tableId, String partitionName,
@@ -429,32 +475,34 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
}
}
List<Long> partitionIdToErase = Lists.newArrayList();
- if (partitionRecycleTimeLists.size() < maxSameNameTrashNum) {
+ if (partitionRecycleTimeLists.size() <= maxSameNameTrashNum) {
return partitionIdToErase;
}
// order by recycle time desc
- partitionRecycleTimeLists.sort((x, y) -> {
- return (x.get(1).longValue() < y.get(1).longValue()) ? 1 :
((x.get(1).equals(y.get(1))) ? 0 : -1);
- });
+ partitionRecycleTimeLists.sort((x, y) ->
+ (x.get(1).longValue() < y.get(1).longValue()) ? 1 :
((x.get(1).equals(y.get(1))) ? 0 : -1));
- for (int i = maxSameNameTrashNum - 1; i <
partitionRecycleTimeLists.size(); i++) {
+ for (int i = maxSameNameTrashNum; i <
partitionRecycleTimeLists.size(); i++) {
partitionIdToErase.add(partitionRecycleTimeLists.get(i).get(0));
}
return partitionIdToErase;
}
private synchronized void erasePartitionWithSameName(long dbId, long
tableId, String partitionName,
- int
maxSameNameTrashNum) {
+ long currentTimeMs, int maxSameNameTrashNum) {
List<Long> partitionIdToErase =
getSameNamePartitionIdListToErase(dbId, tableId, partitionName,
- maxSameNameTrashNum);
+ maxSameNameTrashNum);
for (Long partitionId : partitionIdToErase) {
RecyclePartitionInfo partitionInfo =
idToPartition.get(partitionId);
+ if (!isExpireMinLatency(partitionId, currentTimeMs)) {
+ continue;
+ }
Partition partition = partitionInfo.getPartition();
Env.getCurrentEnv().onErasePartition(partition);
idToPartition.remove(partitionId);
idToRecycleTime.remove(partitionId);
-
+ Env.getCurrentEnv().getEditLog().logErasePartition(partitionId);
LOG.info("erase partition[{}] name: {} from table[{}] from
db[{}]", partitionId, partitionName, tableId,
dbId);
}
@@ -861,13 +909,13 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
long currentTimeMs = System.currentTimeMillis();
// should follow the partition/table/db order
// in case of partition(table) is still in recycle bin but table(db)
is missing
- erasePartition(currentTimeMs);
- eraseTable(currentTimeMs);
- eraseDatabase(currentTimeMs);
+ int keepNum = Config.max_same_name_catalog_trash_num;
+ erasePartition(currentTimeMs, keepNum);
+ eraseTable(currentTimeMs, keepNum);
+ eraseDatabase(currentTimeMs, keepNum);
}
public List<List<String>> getInfo() {
- List<List<String>> infos = Lists.newArrayList();
List<List<String>> dbInfos = Lists.newArrayList();
for (Map.Entry<Long, RecycleDatabaseInfo> entry :
idToDatabase.entrySet()) {
List<String> info = Lists.newArrayList();
@@ -943,9 +991,7 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
}
});
- infos = Stream.of(dbInfos, tableInfos,
partitionInfos).flatMap(Collection::stream).collect(Collectors.toList());
-
- return infos;
+ return Stream.of(dbInfos, tableInfos,
partitionInfos).flatMap(Collection::stream).collect(Collectors.toList());
}
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
index 3a44f7275b..c741194544 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
@@ -1920,6 +1920,8 @@ public class Config extends ConfigBase {
/**
* Max num of same name meta informatntion in catalog recycle bin.
* Default is 3.
+ * 0 means do not keep any meta obj with same name.
+ * < 0 means no limit
*/
@ConfField(mutable = true, masterOnly = true)
public static int max_same_name_catalog_trash_num = 3;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]