This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7118f1c8010 [fix](hive) refresh hive cache when inconsistency (#58074)
7118f1c8010 is described below
commit 7118f1c801020974d1f196d9e571a6c8c98ab1fb
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Thu Nov 20 17:11:39 2025 +0800
[fix](hive) refresh hive cache when inconsistency (#58074)
### What problem does this PR solve?
The issue is that the user changed a single-level partition table to a
two-level partition table on the Hive side, but the cache on the Doris
side was not updated in time. In the stack trace, partitionColumnTypes
is obtained from the SchemaCache, while partitionName is obtained from
the partitionCache. This can lead to inconsistencies during the user’s
operations.
The current fix is to detect this error, invalidate the SchemaCache, and
then retry.
```
java.lang.IllegalStateException: dt=20240905/core_tag=cold vs.
[varchar(65533)]
at
com.google.common.base.Preconditions.checkState(Preconditions.java:512)
~[guava-32.1.2-jre.jar:?]
at
org.apache.doris.datasource.hive.HiveMetaStoreCache.toListPartitionItem(HiveMetaStoreCache.java:291)
~[doris-fe.jar:1.2-SNAPSHOT]
at
org.apache.doris.datasource.hive.HiveMetaStoreCache.loadPartitionValues(HiveMetaStoreCache.java:263)
~[doris-fe.jar:1.2-SNAPSHOT]
at
com.github.benmanes.caffeine.cache.LocalLoadingCache.lambda$newMappingFunction$2(LocalLoadingCache.java:145)
~[hive-catalog-shade-2.1.1.jar:2.1.1]
at
com.github.benmanes.caffeine.cache.LocalCache.lambda$statsAware$0(LocalCache.java:139)
~[hive-catalog-shade-2.1.1.jar:2.1.1]
at
com.github.benmanes.caffeine.cache.BoundedLocalCache.lambda$doComputeIfAbsent$14(BoundedLocalCache.java:2406)
~[hive-catalog-shade-2.1.1.jar:2.1.1]
at
java.util.concurrent.ConcurrentHashMap.compute(ConcurrentHashMap.java:1916)
~[?:?]
at
com.github.benmanes.caffeine.cache.BoundedLocalCache.doComputeIfAbsent(BoundedLocalCache.java:2404)
~[hive-catalog-shade-2.1.1.jar:2.1.1]
at
com.github.benmanes.caffeine.cache.BoundedLocalCache.computeIfAbsent(BoundedLocalCache.java:2387)
~[hive-catalog-shade-2.1.1.jar:2.1.1]
at
com.github.benmanes.caffeine.cache.LocalCache.computeIfAbsent(LocalCache.java:108)
~[hive-catalog-shade-2.1.1.jar:2.1.1]
at
com.github.benmanes.caffeine.cache.LocalLoadingCache.get(LocalLoadingCache.java:56)
~[hive-catalog-shade-2.1.1.jar:2.1.1]
at
org.apache.doris.datasource.hive.HiveMetaStoreCache.getPartitionValues(HiveMetaStoreCache.java:475)
~[doris-fe.jar:1.2-SNAPSHOT]
at
org.apache.doris.datasource.hive.HiveMetaStoreCache.getPartitionValues(HiveMetaStoreCache.java:471)
~[doris-fe.jar:1.2-SNAPSHOT]
at
org.apache.doris.datasource.hive.HMSExternalTable.getNameToPartitionItems(HMSExternalTable.java:313)
~[doris-fe.jar:1.2-SNAPSHOT]
at
org.apache.doris.datasource.ExternalTable.initSelectedPartitions(ExternalTable.java:379)
~[doris-fe.jar:1.2-SNAPSHOT]
at
org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.<init>(LogicalFileScan.java:66)
~[doris-fe.jar:1.2-SNAPSHOT]
at ...
```
Alao add a new method `getHivePartitionValues()` to wrap the
`cache.getPartitionValues()` method.
---
.../doris/datasource/hive/HMSExternalTable.java | 39 ++++++++++++++++------
.../apache/doris/datasource/hive/HiveDlaTable.java | 8 ++---
.../doris/datasource/hive/HiveMetaStoreCache.java | 7 ++--
.../doris/tablefunction/MetadataGenerator.java | 6 ++--
4 files changed, 38 insertions(+), 22 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index d73276cc6c4..e75bb372417 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -416,11 +416,8 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
if (CollectionUtils.isEmpty(this.getPartitionColumns())) {
return Collections.emptyMap();
}
- HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
- .getMetaStoreCache((HMSExternalCatalog) this.getCatalog());
- List<Type> partitionColumnTypes =
this.getPartitionColumnTypes(MvccUtil.getSnapshotFromContext(this));
- HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
cache.getPartitionValues(
- this, partitionColumnTypes);
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
getHivePartitionValues(
+ MvccUtil.getSnapshotFromContext(this));
Map<Long, PartitionItem> idToPartitionItem =
hivePartitionValues.getIdToPartitionItem();
// transfer id to name
BiMap<Long, String> idToName =
hivePartitionValues.getPartitionNameToIdMap().inverse();
@@ -983,10 +980,10 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
@Override
public long getNewestUpdateVersionOrTime() {
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
getHivePartitionValues(
+ MvccUtil.getSnapshotFromContext(this));
HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
.getMetaStoreCache((HMSExternalCatalog) getCatalog());
- HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
cache.getPartitionValues(this,
-
getPartitionColumnTypes(MvccUtil.getSnapshotFromContext(this)));
List<HivePartition> partitionList =
cache.getAllPartitionsWithCache(this,
Lists.newArrayList(hivePartitionValues.getPartitionValuesMap().values()));
if (CollectionUtils.isEmpty(partitionList)) {
@@ -1062,16 +1059,15 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
if (isView()) {
return null;
}
- HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
- .getMetaStoreCache((HMSExternalCatalog) catalog);
- List<Type> partitionColumnTypes =
getPartitionColumnTypes(MvccUtil.getSnapshotFromContext(this));
+ Optional<MvccSnapshot> snapshot =
MvccUtil.getSnapshotFromContext(this);
+ List<Type> partitionColumnTypes = getPartitionColumnTypes(snapshot);
HiveMetaStoreCache.HivePartitionValues partitionValues = null;
// Get table partitions from cache.
if (!partitionColumnTypes.isEmpty()) {
// It is ok to get partition values from cache,
// no need to worry that this call will invalid or refresh the
cache.
// because it has enough space to keep partition info of all
tables in cache.
- partitionValues = cache.getPartitionValues(this,
partitionColumnTypes);
+ partitionValues = getHivePartitionValues(snapshot);
if (partitionValues == null ||
partitionValues.getPartitionNameToIdMap() == null) {
LOG.warn("Partition values for hive table {} is null", name);
} else {
@@ -1192,4 +1188,25 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient();
return client.getTable(getRemoteDbName(), remoteName);
}
+
+ public HiveMetaStoreCache.HivePartitionValues
getHivePartitionValues(Optional<MvccSnapshot> snapshot) {
+ HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
+ .getMetaStoreCache((HMSExternalCatalog) this.getCatalog());
+ try {
+ List<Type> partitionColumnTypes =
this.getPartitionColumnTypes(snapshot);
+ return cache.getPartitionValues(this, partitionColumnTypes);
+ } catch (Exception e) {
+ if
(e.getMessage().contains(HiveMetaStoreCache.ERR_CACHE_INCONSISTENCY)) {
+ LOG.warn("Hive metastore cache inconsistency detected for
table: {}.{}.{}. "
+ + "Clearing cache and retrying to get
partition values.",
+ this.getCatalog().getName(), this.getDbName(),
this.getName(), e);
+ ExternalSchemaCache schemaCache =
Env.getCurrentEnv().getExtMetaCacheMgr().getSchemaCache(catalog);
+ schemaCache.invalidateTableCache(this);
+ List<Type> partitionColumnTypes =
this.getPartitionColumnTypes(snapshot);
+ return cache.getPartitionValues(this, partitionColumnTypes);
+ } else {
+ throw e;
+ }
+ }
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveDlaTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveDlaTable.java
index 8c93c509559..c49081de60d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveDlaTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveDlaTable.java
@@ -71,11 +71,10 @@ public class HiveDlaTable extends HMSDlaTable {
@Override
public MTMVSnapshotIf getPartitionSnapshot(String partitionName,
MTMVRefreshContext context,
Optional<MvccSnapshot> snapshot) throws AnalysisException {
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
hmsTable.getHivePartitionValues(snapshot);
+ Long partitionId =
getPartitionIdByNameOrAnalysisException(partitionName, hivePartitionValues);
HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
.getMetaStoreCache((HMSExternalCatalog) hmsTable.getCatalog());
- HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
cache.getPartitionValues(
- hmsTable, hmsTable.getPartitionColumnTypes(snapshot));
- Long partitionId =
getPartitionIdByNameOrAnalysisException(partitionName, hivePartitionValues);
HivePartition hivePartition =
getHivePartitionByIdOrAnalysisException(partitionId,
hivePartitionValues, cache);
return new MTMVTimestampSnapshot(hivePartition.getLastModifiedTime());
@@ -95,10 +94,9 @@ public class HiveDlaTable extends HMSDlaTable {
HivePartition maxPartition = null;
long maxVersionTime = 0L;
long visibleVersionTime;
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
hmsTable.getHivePartitionValues(snapshot);
HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
.getMetaStoreCache((HMSExternalCatalog) hmsTable.getCatalog());
- HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
cache.getPartitionValues(
- hmsTable, hmsTable.getPartitionColumnTypes(snapshot));
List<HivePartition> partitionList =
cache.getAllPartitionsWithCache(hmsTable,
Lists.newArrayList(hivePartitionValues.getPartitionValuesMap().values()));
if (CollectionUtils.isEmpty(partitionList)) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 275a5ab1f8c..4fbda7c19f1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -100,6 +100,7 @@ import java.util.stream.Collectors;
public class HiveMetaStoreCache {
private static final Logger LOG =
LogManager.getLogger(HiveMetaStoreCache.class);
public static final String HIVE_DEFAULT_PARTITION =
"__HIVE_DEFAULT_PARTITION__";
+ public static final String ERR_CACHE_INCONSISTENCY =
"ERR_CACHE_INCONSISTENCY: ";
private final HMSExternalCatalog catalog;
private JobConf jobConf;
@@ -276,11 +277,12 @@ public class HiveMetaStoreCache {
partitionNameToIdMap, idToUniqueIdsMap,
singleUidToColumnRangeMap, partitionValuesMap);
}
- public ListPartitionItem toListPartitionItem(String partitionName,
List<Type> types) {
+ private ListPartitionItem toListPartitionItem(String partitionName,
List<Type> types) {
// Partition name will be in format: nation=cn/city=beijing
// parse it to get values "cn" and "beijing"
List<String> partitionValues =
HiveUtil.toPartitionValues(partitionName);
- Preconditions.checkState(partitionValues.size() == types.size(),
partitionName + " vs. " + types);
+ Preconditions.checkState(partitionValues.size() == types.size(),
+ ERR_CACHE_INCONSISTENCY + partitionName + " vs. " + types);
List<PartitionValue> values =
Lists.newArrayListWithExpectedSize(types.size());
for (String partitionValue : partitionValues) {
values.add(new PartitionValue(partitionValue,
HIVE_DEFAULT_PARTITION.equals(partitionValue)));
@@ -433,6 +435,7 @@ public class HiveMetaStoreCache {
return getPartitionValues(key);
}
+ @VisibleForTesting
public HivePartitionValues getPartitionValues(PartitionValueCacheKey key) {
return partitionValuesCache.get(key);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
index 4f37e555595..d754952fbee 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
@@ -1850,10 +1850,8 @@ public class MetadataGenerator {
"column " + colNames + " does not match partition columns
of table " + tbl.getName());
}
- HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
- .getMetaStoreCache((HMSExternalCatalog) tbl.getCatalog());
- HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
cache.getPartitionValues(
- tbl,
tbl.getPartitionColumnTypes(MvccUtil.getSnapshotFromContext(tbl)));
+ HiveMetaStoreCache.HivePartitionValues hivePartitionValues =
tbl.getHivePartitionValues(
+ MvccUtil.getSnapshotFromContext(tbl));
Map<Long, List<String>> valuesMap =
hivePartitionValues.getPartitionValuesMap();
List<TRow> dataBatch = Lists.newArrayList();
for (Map.Entry<Long, List<String>> entry : valuesMap.entrySet()) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]