This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 47ea3ea3909 [feat](fe) Show db total size and recycle bin size with
`ShowDataStmt` (#30663)
47ea3ea3909 is described below
commit 47ea3ea39093711a2677232eeca0c16d678754f7
Author: Lei Zhang <[email protected]>
AuthorDate: Thu Feb 8 12:06:01 2024 +0800
[feat](fe) Show db total size and recycle bin size with `ShowDataStmt`
(#30663)
---
.../sql-reference/Show-Statements/SHOW-DATA.md | 28 ++++++-
.../sql-reference/Show-Statements/SHOW-DATA.md | 28 ++++++-
.../org/apache/doris/analysis/ShowDataStmt.java | 88 ++++++++++++++++++++++
.../apache/doris/catalog/CatalogRecycleBin.java | 39 ++++++++++
.../java/org/apache/doris/catalog/Database.java | 20 +++--
.../suites/show_p0/test_show_data.groovy | 31 ++++++++
6 files changed, 223 insertions(+), 11 deletions(-)
diff --git a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md
b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md
index d0999576ffa..f720d9da4ad 100644
--- a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md
+++ b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md
@@ -56,9 +56,31 @@ illustrate:
### Example
-1. Display the data volume, replica number, aggregate data volume and
aggregate replica number of each table in the default db.
+1. Display the data size and RecycleBin size of each database by default.
+
+ ```
+ SHOW DATA;
+ ```
+
+ ```
+
+-------+-----------------------------------+--------+------------+-------------+-------------------+
+ | DbId | DbName | Size | RemoteSize |
RecycleSize | RecycleRemoteSize |
+
+-------+-----------------------------------+--------+------------+-------------+-------------------+
+ | 21009 | db1 | 0 | 0 | 0
| 0 |
+ | 22011 | regression_test_inverted_index_p0 | 72764 | 0 | 0
| 0 |
+ | 0 | information_schema | 0 | 0 | 0
| 0 |
+ | 22010 | regression_test | 0 | 0 | 0
| 0 |
+ | 1 | mysql | 0 | 0 | 0
| 0 |
+ | 22017 | regression_test_show_p0 | 0 | 0 | 0
| 0 |
+ | 10002 | __internal_schema | 46182 | 0 | 0
| 0 |
+ | Total | NULL | 118946 | 0 | 0
| 0 |
+
+-------+-----------------------------------+--------+------------+-------------+-------------------+
+ ```
+
+2. Display the data volume, replica number, aggregate data volume and
aggregate replica number of each table in a database.
```sql
+ USE db1;
SHOW DATA;
````
@@ -74,7 +96,7 @@ illustrate:
+-----------+-------------+--------------+
````
-2. Display the subdivided data volume, the number of replicas and the number
of statistical rows of the specified table under the specified db
+3. Display the subdivided data volume, the number of replicas and the number
of statistical rows of the specified table under the specified db
```sql
SHOW DATA FROM example_db.test;
@@ -91,7 +113,7 @@ illustrate:
+-----------+-----------+-----------+--------------+----------+
````
-3. It can be combined and sorted according to the amount of data, the number
of copies, the number of statistical rows, etc.
+4. It can be combined and sorted according to the amount of data, the number
of copies, the number of statistical rows, etc.
```sql
SHOW DATA ORDER BY ReplicaCount desc,Size asc;
diff --git
a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md
b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md
index 2acce978774..ce851227bb8 100644
--- a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md
@@ -56,9 +56,31 @@ SHOW DATA [FROM [db_name.]table_name] [ORDER BY ...];
### Example
-1. 展示默认 db 的各个 table 的数据量,副本数量,汇总数据量和汇总副本数量。
+1. 默认展示各个db的汇总数据量,RecycleBin中的数据量
+
+ ```
+ SHOW DATA;
+ ```
+
+ ```
+
+-------+-----------------------------------+--------+------------+-------------+-------------------+
+ | DbId | DbName | Size | RemoteSize |
RecycleSize | RecycleRemoteSize |
+
+-------+-----------------------------------+--------+------------+-------------+-------------------+
+ | 21009 | db1 | 0 | 0 | 0
| 0 |
+ | 22011 | regression_test_inverted_index_p0 | 72764 | 0 | 0
| 0 |
+ | 0 | information_schema | 0 | 0 | 0
| 0 |
+ | 22010 | regression_test | 0 | 0 | 0
| 0 |
+ | 1 | mysql | 0 | 0 | 0
| 0 |
+ | 22017 | regression_test_show_p0 | 0 | 0 | 0
| 0 |
+ | 10002 | __internal_schema | 46182 | 0 | 0
| 0 |
+ | Total | NULL | 118946 | 0 | 0
| 0 |
+
+-------+-----------------------------------+--------+------------+-------------+-------------------+
+ ```
+
+2. 展示特定 db 的各个 table 的数据量,副本数量,汇总数据量和汇总副本数量。
```sql
+ USE db1;
SHOW DATA;
```
@@ -74,7 +96,7 @@ SHOW DATA [FROM [db_name.]table_name] [ORDER BY ...];
+-----------+-------------+--------------+
```
-2. 展示指定 db 的下指定表的细分数据量、副本数量和统计行数
+3. 展示指定 db 的下指定表的细分数据量、副本数量和统计行数
```sql
SHOW DATA FROM example_db.test;
@@ -91,7 +113,7 @@ SHOW DATA [FROM [db_name.]table_name] [ORDER BY ...];
+-----------+-----------+-----------+--------------+----------+
```
-3. 可以按照数据量、副本数量、统计行数等进行组合排序
+4. 可以按照数据量、副本数量、统计行数等进行组合排序
```sql
SHOW DATA ORDER BY ReplicaCount desc,Size asc;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java
index ddaf27fa539..f84784e5826 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java
@@ -19,6 +19,7 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.OlapTable;
@@ -38,6 +39,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSetMetaData;
+import com.google.common.base.Strings;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
@@ -54,6 +56,16 @@ import java.util.TreeMap;
import java.util.TreeSet;
public class ShowDataStmt extends ShowStmt {
+ private static final ShowResultSetMetaData SHOW_DATABASE_DATA_META_DATA =
+ ShowResultSetMetaData.builder()
+ .addColumn(new Column("DbId",
ScalarType.createVarchar(20)))
+ .addColumn(new Column("DbName",
ScalarType.createVarchar(20)))
+ .addColumn(new Column("Size",
ScalarType.createVarchar(30)))
+ .addColumn(new Column("RemoteSize",
ScalarType.createVarchar(30)))
+ .addColumn(new Column("RecycleSize",
ScalarType.createVarchar(30)))
+ .addColumn(new Column("RecycleRemoteSize",
ScalarType.createVarchar(30)))
+ .build();
+
private static final ShowResultSetMetaData SHOW_TABLE_DATA_META_DATA =
ShowResultSetMetaData.builder()
.addColumn(new Column("TableName",
ScalarType.createVarchar(20)))
@@ -71,6 +83,11 @@ public class ShowDataStmt extends ShowStmt {
.addColumn(new Column("RowCount",
ScalarType.createVarchar(20)))
.addColumn(new Column("RemoteSize",
ScalarType.createVarchar(30)))
.build();
+
+ public static final ImmutableList<String>
SHOW_DATABASE_DATA_META_DATA_ORIGIN =
+ new
ImmutableList.Builder<String>().add("DbId").add("DbName").add("Size")
+
.add("RemoteSize").add("RecycleSize").add("RecycleRemoteSize").build();
+
public static final ImmutableList<String> SHOW_TABLE_DATA_META_DATA_ORIGIN
=
new
ImmutableList.Builder<String>().add("TableName").add("Size").add("ReplicaCount")
.add("RemoteSize").build();
@@ -97,6 +114,10 @@ public class ShowDataStmt extends ShowStmt {
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
dbName = analyzer.getDefaultDb();
+ if (Strings.isNullOrEmpty(dbName)) {
+ getAllDbStats();
+ return;
+ }
if (tableName != null) {
tableName.analyze(analyzer);
// disallow external catalog
@@ -374,6 +395,9 @@ public class ShowDataStmt extends ShowStmt {
@Override
public ShowResultSetMetaData getMetaData() {
+ if (Strings.isNullOrEmpty(dbName)) {
+ return SHOW_DATABASE_DATA_META_DATA;
+ }
if (tableName != null) {
return SHOW_INDEX_DATA_META_DATA;
} else {
@@ -408,4 +432,68 @@ public class ShowDataStmt extends ShowStmt {
public String toString() {
return toSql();
}
+
+ private void getAllDbStats() {
+ List<String> dbNames = Env.getCurrentInternalCatalog().getDbNames();
+ if (dbNames == null || dbNames.isEmpty()) {
+ return;
+ }
+ long totalSize = 0;
+ long totalRemoteSize = 0;
+ long totalRecycleSize = 0;
+ long totalRecycleRemoteSize = 0;
+ Map<Long, Pair<Long, Long>> dbToRecycleSize =
Env.getCurrentRecycleBin().getDbToRecycleSize();
+ // show all database datasize
+ for (String dbName : dbNames) {
+ DatabaseIf db =
Env.getCurrentInternalCatalog().getDbNullable(dbName);
+ if (db == null) {
+ continue;
+ }
+ List<String> dbInfo = new ArrayList<>();
+ db.readLock();
+ try {
+ dbInfo.add(String.valueOf(db.getId()));
+ dbInfo.add(dbName);
+ Pair<Long, Long> usedSize = ((Database) db).getUsedDataSize();
+ dbInfo.add(String.valueOf(usedSize.first));
+ dbInfo.add(String.valueOf(usedSize.second));
+ totalSize += usedSize.first;
+ totalRemoteSize += usedSize.second;
+ } finally {
+ db.readUnlock();
+ }
+
+ Pair<Long, Long> recycleSize =
dbToRecycleSize.getOrDefault(db.getId(), Pair.of(0L, 0L));
+ dbInfo.add(String.valueOf(recycleSize.first));
+ dbInfo.add(String.valueOf(recycleSize.second));
+ totalRecycleSize += recycleSize.first;
+ totalRecycleRemoteSize += recycleSize.second;
+ dbToRecycleSize.remove(db.getId());
+ totalRows.add(dbInfo);
+ }
+
+ // Append left database in recycle bin
+ for (Map.Entry<Long, Pair<Long, Long>> entry :
dbToRecycleSize.entrySet()) {
+ List<String> dbInfo = new ArrayList<>();
+ dbInfo.add(String.valueOf(entry.getKey()));
+ dbInfo.add("NULL");
+ dbInfo.add("0");
+ dbInfo.add("0");
+ dbInfo.add(String.valueOf(entry.getValue().first));
+ dbInfo.add(String.valueOf(entry.getValue().second));
+ totalRecycleSize += entry.getValue().first;
+ totalRecycleRemoteSize += entry.getValue().second;
+ totalRows.add(dbInfo);
+ }
+
+ // calc total size
+ List<String> dbInfo = new ArrayList<>();
+ dbInfo.add("Total");
+ dbInfo.add("NULL");
+ dbInfo.add(String.valueOf(totalSize));
+ dbInfo.add(String.valueOf(totalRemoteSize));
+ dbInfo.add(String.valueOf(totalRecycleSize));
+ dbInfo.add(String.valueOf(totalRecycleRemoteSize));
+ totalRows.add(dbInfo);
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
index 50cc2679251..d803cde648a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
@@ -1100,6 +1100,45 @@ public class CatalogRecycleBin extends MasterDaemon
implements Writable {
return Stream.of(dbInfos, tableInfos,
partitionInfos).flatMap(Collection::stream).collect(Collectors.toList());
}
+ public synchronized Map<Long, Pair<Long, Long>> getDbToRecycleSize() {
+ Map<Long, Pair<Long, Long>> dbToRecycleSize = new HashMap<>();
+ for (Map.Entry<Long, RecycleTableInfo> entry : idToTable.entrySet()) {
+ RecycleTableInfo tableInfo = entry.getValue();
+ Table table = tableInfo.getTable();
+ if (!(table instanceof OlapTable)) {
+ continue;
+ }
+ long dataSize = table.getDataSize(false);
+ long remoteDataSize = ((OlapTable) table).getRemoteDataSize();
+ dbToRecycleSize.compute(tableInfo.getDbId(), (k, v) -> {
+ if (v == null) {
+ return Pair.of(dataSize, remoteDataSize);
+ } else {
+ v.first += dataSize;
+ v.second += remoteDataSize;
+ return v;
+ }
+ });
+ }
+
+ for (Map.Entry<Long, RecyclePartitionInfo> entry :
idToPartition.entrySet()) {
+ RecyclePartitionInfo partitionInfo = entry.getValue();
+ Partition partition = partitionInfo.getPartition();
+ long dataSize = partition.getDataSize(false);
+ long remoteDataSize = partition.getRemoteDataSize();
+ dbToRecycleSize.compute(partitionInfo.getDbId(), (k, v) -> {
+ if (v == null) {
+ return Pair.of(dataSize, remoteDataSize);
+ } else {
+ v.first += dataSize;
+ v.second += remoteDataSize;
+ return v;
+ }
+ });
+ }
+ return dbToRecycleSize;
+ }
+
// Need to add "synchronized", because when calling /dump api to dump
image,
// this class is not protected by any lock, will throw
ConcurrentModificationException.
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
index 3975fe8b388..05ba8fac115 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
@@ -272,10 +272,19 @@ public class Database extends MetaObject implements
Writable, DatabaseIf<Table>
}
public long getUsedDataQuotaWithLock() {
- long usedDataQuota = 0;
+ return getUsedDataSize().first;
+ }
+
+ public Pair<Long, Long> getUsedDataSize() {
+ long usedDataSize = 0;
+ long usedRemoteDataSize = 0;
+ List<Table> tables = new ArrayList<>();
readLock();
- List<Table> tables = new ArrayList<>(this.idToTable.values());
- readUnlock();
+ try {
+ tables.addAll(this.idToTable.values());
+ } finally {
+ readUnlock();
+ }
for (Table table : tables) {
if (table.getType() != TableType.OLAP) {
@@ -285,12 +294,13 @@ public class Database extends MetaObject implements
Writable, DatabaseIf<Table>
OlapTable olapTable = (OlapTable) table;
olapTable.readLock();
try {
- usedDataQuota = usedDataQuota + olapTable.getDataSize();
+ usedDataSize = usedDataSize + olapTable.getDataSize();
+ usedRemoteDataSize = usedRemoteDataSize +
olapTable.getRemoteDataSize();
} finally {
olapTable.readUnlock();
}
}
- return usedDataQuota;
+ return Pair.of(usedDataSize, usedRemoteDataSize);
}
public long getReplicaCountWithLock() {
diff --git a/regression-test/suites/show_p0/test_show_data.groovy
b/regression-test/suites/show_p0/test_show_data.groovy
new file mode 100644
index 00000000000..10714a542d4
--- /dev/null
+++ b/regression-test/suites/show_p0/test_show_data.groovy
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_show_data") {
+ def result = sql """show data;"""
+ logger.info("show data result:${result}");
+ assertTrue(result.size() > 0);
+
+ def jdbcUrlWithoutDbStr =
(context.config.jdbcUrl).split(context.config.defaultDb)
+ logger.info("jdbcUrlWithoutDbStr:${jdbcUrlWithoutDbStr}");
+ def result2 = connect(context.config.jdbcUser, password =
context.config.jdbcPassword, url = jdbcUrlWithoutDbStr[0]) {
+ sql """show data;"""
+ }
+
+ assertTrue(result2.size() > 0);
+ assertTrue(result2[0].size() == 6);
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]