This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 127ac8d93ec [Enhancement](ExternalTable)Optimize the performance of
getCachedRowCount when reading ExternalTable (#41659)
127ac8d93ec is described below
commit 127ac8d93ece5310afe20eb5521f6ec883d7282a
Author: daidai <[email protected]>
AuthorDate: Sun Oct 13 20:21:02 2024 +0800
[Enhancement](ExternalTable)Optimize the performance of getCachedRowCount
when reading ExternalTable (#41659)
## Proposed changes
Because ExternalTable will initialize the previously uninitialized table
when `getCachedRowCount()`, which is unnecessary. So for the
uninitialized table, we directly return -1.
This will increase the speed of our query `information_schema.tables`.
---
.../org/apache/doris/datasource/ExternalTable.java | 13 +++----
.../doris/datasource/hive/HMSExternalCatalog.java | 10 ++++++
.../iceberg/IcebergHMSExternalCatalog.java | 13 +------
.../datasource/iceberg/IcebergMetadataCache.java | 41 ++--------------------
.../doris/datasource/iceberg/IcebergUtils.java | 17 ++++++++-
.../iceberg/test_iceberg_table_stats.groovy | 1 +
6 files changed, 38 insertions(+), 57 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
index eedbe4e20da..baa25c991fc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
@@ -202,14 +202,15 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
@Override
public long getCachedRowCount() {
- // Return -1 if makeSureInitialized throw exception.
- // For example, init hive table may throw NotSupportedException.
- try {
- makeSureInitialized();
- } catch (Exception e) {
- LOG.warn("Failed to initialize table {}.{}.{}", catalog.getName(),
dbName, name, e);
+ // Return -1 if uninitialized.
+ // Before this, for uninitialized tables, we would call
makeSureInitialized(), just like the implementation of
+ // ExternalTable.getRowCount(), but this is not very meaningful and
time-consuming.
+ // The getCachedRowCount() function is only used when `show table` and
querying `information_schema.tables`.
+ if (!isObjectCreated()) {
return -1;
}
+ // getExtMetaCacheMgr().getRowCountCache().getCachedRowCount() is an
asynchronous non-blocking operation.
+ // For tables that are not in the cache, it will load asynchronously
and return -1.
return
Env.getCurrentEnv().getExtMetaCacheMgr().getRowCountCache().getCachedRowCount(catalog.getId(),
dbId, id);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
index 99fd96f65db..20b9482041d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
@@ -34,6 +34,7 @@ import org.apache.doris.datasource.ExternalDatabase;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.InitCatalogLog;
import org.apache.doris.datasource.SessionContext;
+import org.apache.doris.datasource.iceberg.IcebergUtils;
import org.apache.doris.datasource.jdbc.client.JdbcClientConfig;
import org.apache.doris.datasource.operations.ExternalMetadataOperations;
import org.apache.doris.datasource.property.PropertyConverter;
@@ -53,6 +54,7 @@ import com.google.common.collect.Maps;
import lombok.Getter;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.iceberg.hive.HiveCatalog;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -85,6 +87,8 @@ public class HMSExternalCatalog extends ExternalCatalog {
private int hmsEventsBatchSizePerRpc = -1;
private boolean enableHmsEventsIncrementalSync = false;
+ //for "type" = "hms" , but is iceberg table.
+ private HiveCatalog icebergHiveCatalog;
@VisibleForTesting
public HMSExternalCatalog() {
@@ -195,6 +199,8 @@ public class HMSExternalCatalog extends ExternalCatalog {
transactionManager =
TransactionManagerFactory.createHiveTransactionManager(hiveOps,
fileSystemProvider,
fileSystemExecutor);
metadataOps = hiveOps;
+
+ icebergHiveCatalog = IcebergUtils.createIcebergHiveCatalog(this,
getName());
}
@Override
@@ -331,6 +337,10 @@ public class HMSExternalCatalog extends ExternalCatalog {
return enableHmsEventsIncrementalSync;
}
+ public HiveCatalog getIcebergHiveCatalog() {
+ return icebergHiveCatalog;
+ }
+
/**
* Enum for meta tables in hive catalog.
* eg: tbl$partitions
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergHMSExternalCatalog.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergHMSExternalCatalog.java
index c1475064934..51d39357b81 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergHMSExternalCatalog.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergHMSExternalCatalog.java
@@ -19,10 +19,6 @@ package org.apache.doris.datasource.iceberg;
import org.apache.doris.datasource.CatalogProperty;
import org.apache.doris.datasource.property.PropertyConverter;
-import org.apache.doris.datasource.property.constants.HMSProperties;
-
-import org.apache.iceberg.CatalogProperties;
-import org.apache.iceberg.hive.HiveCatalog;
import java.util.Map;
@@ -38,14 +34,7 @@ public class IcebergHMSExternalCatalog extends
IcebergExternalCatalog {
@Override
protected void initCatalog() {
icebergCatalogType = ICEBERG_HMS;
- HiveCatalog hiveCatalog = new org.apache.iceberg.hive.HiveCatalog();
- hiveCatalog.setConf(getConfiguration());
- // initialize hive catalog
- Map<String, String> catalogProperties =
catalogProperty.getProperties();
- String metastoreUris =
catalogProperty.getOrDefault(HMSProperties.HIVE_METASTORE_URIS, "");
- catalogProperties.put(CatalogProperties.URI, metastoreUris);
- hiveCatalog.initialize(getName(), catalogProperties);
- catalog = hiveCatalog;
+ catalog = IcebergUtils.createIcebergHiveCatalog(this, getName());
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
index a35c73dc296..c1ac2a79754 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
@@ -22,25 +22,22 @@ import org.apache.doris.common.CacheFactory;
import org.apache.doris.common.Config;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.datasource.ExternalCatalog;
import org.apache.doris.datasource.ExternalMetaCacheMgr;
import org.apache.doris.datasource.hive.HMSExternalCatalog;
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
-import org.apache.doris.datasource.property.constants.HMSProperties;
-import org.apache.doris.fs.remote.dfs.DFSFileSystem;
import org.apache.doris.thrift.TIcebergMetadataParams;
import com.github.benmanes.caffeine.cache.LoadingCache;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.SerializableTable;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.hive.HiveCatalog;
import org.jetbrains.annotations.NotNull;
import java.util.HashMap;
@@ -97,11 +94,6 @@ public class IcebergMetadataCache {
return restTable;
}
- public Table getRemoteTable(CatalogIf catalog, String dbName, String
tbName) {
- IcebergMetadataCacheKey key = IcebergMetadataCacheKey.of(catalog,
dbName, tbName);
- return loadTable(key);
- }
-
@NotNull
private List<Snapshot> loadSnapshots(IcebergMetadataCacheKey key) {
Table icebergTable = getIcebergTable(key.catalog, key.dbName,
key.tableName);
@@ -114,17 +106,13 @@ public class IcebergMetadataCache {
private Table loadTable(IcebergMetadataCacheKey key) {
Catalog icebergCatalog;
if (key.catalog instanceof HMSExternalCatalog) {
- HMSExternalCatalog ctg = (HMSExternalCatalog) key.catalog;
- icebergCatalog = createIcebergHiveCatalog(
- ctg.getHiveMetastoreUris(),
- ctg.getCatalogProperty().getHadoopProperties(),
- ctg.getProperties());
+ icebergCatalog = ((HMSExternalCatalog)
key.catalog).getIcebergHiveCatalog();
} else if (key.catalog instanceof IcebergExternalCatalog) {
icebergCatalog = ((IcebergExternalCatalog)
key.catalog).getCatalog();
} else {
throw new RuntimeException("Only support 'hms' and 'iceberg' type
for iceberg table");
}
- Table icebergTable =
HiveMetaStoreClientHelper.ugiDoAs(key.catalog.getId(),
+ Table icebergTable =
HiveMetaStoreClientHelper.ugiDoAs(((ExternalCatalog)
key.catalog).getConfiguration(),
() -> icebergCatalog.loadTable(TableIdentifier.of(key.dbName,
key.tableName)));
initIcebergTableFileIO(icebergTable, key.catalog.getProperties());
return icebergTable;
@@ -177,29 +165,6 @@ public class IcebergMetadataCache {
});
}
- private Catalog createIcebergHiveCatalog(String uri, Map<String, String>
hdfsConf, Map<String, String> props) {
- // set hdfs configure
- Configuration conf = DFSFileSystem.getHdfsConf(
-
hdfsConf.getOrDefault(DFSFileSystem.PROP_ALLOW_FALLBACK_TO_SIMPLE_AUTH,
"").isEmpty());
- for (Map.Entry<String, String> entry : hdfsConf.entrySet()) {
- conf.set(entry.getKey(), entry.getValue());
- }
- HiveCatalog hiveCatalog = new HiveCatalog();
- hiveCatalog.setConf(conf);
-
- if (props.containsKey(HMSExternalCatalog.BIND_BROKER_NAME)) {
- props.put(HMSProperties.HIVE_METASTORE_URIS, uri);
- props.put("uri", uri);
- hiveCatalog.initialize("hive", props);
- } else {
- Map<String, String> catalogProperties = new HashMap<>();
- catalogProperties.put(HMSProperties.HIVE_METASTORE_URIS, uri);
- catalogProperties.put("uri", uri);
- hiveCatalog.initialize("hive", catalogProperties);
- }
- return hiveCatalog;
- }
-
private static void initIcebergTableFileIO(Table table, Map<String,
String> props) {
Map<String, String> ioConf = new HashMap<>();
table.properties().forEach((key, value) -> {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index f7280f5721f..893ee7bc93b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -47,10 +47,12 @@ import org.apache.doris.common.info.SimpleTableInfo;
import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.datasource.ExternalCatalog;
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
+import org.apache.doris.datasource.property.constants.HMSProperties;
import org.apache.doris.nereids.exceptions.NotSupportedException;
import org.apache.doris.thrift.TExprOpcode;
import com.google.common.collect.Lists;
+import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
@@ -63,6 +65,7 @@ import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.Not;
import org.apache.iceberg.expressions.Or;
import org.apache.iceberg.expressions.Unbound;
+import org.apache.iceberg.hive.HiveCatalog;
import org.apache.iceberg.types.Type.TypeID;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.LocationUtil;
@@ -553,7 +556,7 @@ public class IcebergUtils {
return Env.getCurrentEnv()
.getExtMetaCacheMgr()
.getIcebergMetadataCache()
- .getRemoteTable(catalog, tableInfo.getDbName(),
tableInfo.getTbName());
+ .getIcebergTable(catalog, tableInfo.getDbName(),
tableInfo.getTbName());
}
private static org.apache.iceberg.Table
getIcebergTableInternal(ExternalCatalog catalog, String dbName,
@@ -665,4 +668,16 @@ public class IcebergUtils {
}
return dataLocation;
}
+
+ public static HiveCatalog createIcebergHiveCatalog(ExternalCatalog
externalCatalog, String name) {
+ HiveCatalog hiveCatalog = new org.apache.iceberg.hive.HiveCatalog();
+ hiveCatalog.setConf(externalCatalog.getConfiguration());
+
+ Map<String, String> catalogProperties =
externalCatalog.getProperties();
+ String metastoreUris =
catalogProperties.getOrDefault(HMSProperties.HIVE_METASTORE_URIS, "");
+ catalogProperties.put(CatalogProperties.URI, metastoreUris);
+
+ hiveCatalog.initialize(name, catalogProperties);
+ return hiveCatalog;
+ }
}
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_table_stats.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_table_stats.groovy
index a950f93909e..710a0bc953f 100644
---
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_table_stats.groovy
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_table_stats.groovy
@@ -40,6 +40,7 @@ suite("test_iceberg_table_stats",
"p0,external,doris,external_docker,external_do
def retry = 0
def act = ""
while (retry < 10) {
+ sql """ select * from ${table_name} """
def result = sql """ show table stats ${table_name} """
act = result[0][2]
if (act != "-1") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]