This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7118f1c8010 [fix](hive) refresh hive cache when inconsistency (#58074)
7118f1c8010 is described below

commit 7118f1c801020974d1f196d9e571a6c8c98ab1fb
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Thu Nov 20 17:11:39 2025 +0800

    [fix](hive) refresh hive cache when inconsistency (#58074)
    
    ### What problem does this PR solve?
    
    The issue is that the user changed a single-level partition table to a
    two-level partition table on the Hive side, but the cache on the Doris
    side was not updated in time. In the stack trace, partitionColumnTypes
    is obtained from the SchemaCache, while partitionName is obtained from
    the partitionCache. This can lead to inconsistencies during the user’s
    operations.
    
    The current fix is to detect this error, invalidate the SchemaCache, and
    then retry.
    
    ```
    java.lang.IllegalStateException: dt=20240905/core_tag=cold vs. 
[varchar(65533)]
            at 
com.google.common.base.Preconditions.checkState(Preconditions.java:512) 
~[guava-32.1.2-jre.jar:?]
            at 
org.apache.doris.datasource.hive.HiveMetaStoreCache.toListPartitionItem(HiveMetaStoreCache.java:291)
 ~[doris-fe.jar:1.2-SNAPSHOT]
            at 
org.apache.doris.datasource.hive.HiveMetaStoreCache.loadPartitionValues(HiveMetaStoreCache.java:263)
 ~[doris-fe.jar:1.2-SNAPSHOT]
            at 
com.github.benmanes.caffeine.cache.LocalLoadingCache.lambda$newMappingFunction$2(LocalLoadingCache.java:145)
 ~[hive-catalog-shade-2.1.1.jar:2.1.1]
            at 
com.github.benmanes.caffeine.cache.LocalCache.lambda$statsAware$0(LocalCache.java:139)
 ~[hive-catalog-shade-2.1.1.jar:2.1.1]
            at 
com.github.benmanes.caffeine.cache.BoundedLocalCache.lambda$doComputeIfAbsent$14(BoundedLocalCache.java:2406)
 ~[hive-catalog-shade-2.1.1.jar:2.1.1]
            at 
java.util.concurrent.ConcurrentHashMap.compute(ConcurrentHashMap.java:1916) 
~[?:?]
            at 
com.github.benmanes.caffeine.cache.BoundedLocalCache.doComputeIfAbsent(BoundedLocalCache.java:2404)
 ~[hive-catalog-shade-2.1.1.jar:2.1.1]
            at 
com.github.benmanes.caffeine.cache.BoundedLocalCache.computeIfAbsent(BoundedLocalCache.java:2387)
 ~[hive-catalog-shade-2.1.1.jar:2.1.1]
            at 
com.github.benmanes.caffeine.cache.LocalCache.computeIfAbsent(LocalCache.java:108)
 ~[hive-catalog-shade-2.1.1.jar:2.1.1]
            at 
com.github.benmanes.caffeine.cache.LocalLoadingCache.get(LocalLoadingCache.java:56)
 ~[hive-catalog-shade-2.1.1.jar:2.1.1]
            at 
org.apache.doris.datasource.hive.HiveMetaStoreCache.getPartitionValues(HiveMetaStoreCache.java:475)
 ~[doris-fe.jar:1.2-SNAPSHOT]
            at 
org.apache.doris.datasource.hive.HiveMetaStoreCache.getPartitionValues(HiveMetaStoreCache.java:471)
 ~[doris-fe.jar:1.2-SNAPSHOT]
            at 
org.apache.doris.datasource.hive.HMSExternalTable.getNameToPartitionItems(HMSExternalTable.java:313)
 ~[doris-fe.jar:1.2-SNAPSHOT]
            at 
org.apache.doris.datasource.ExternalTable.initSelectedPartitions(ExternalTable.java:379)
 ~[doris-fe.jar:1.2-SNAPSHOT]
            at 
org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.<init>(LogicalFileScan.java:66)
 ~[doris-fe.jar:1.2-SNAPSHOT]
            at ...
    ```
    
    Alao add a new method `getHivePartitionValues()` to wrap the
    `cache.getPartitionValues()` method.
---
 .../doris/datasource/hive/HMSExternalTable.java    | 39 ++++++++++++++++------
 .../apache/doris/datasource/hive/HiveDlaTable.java |  8 ++---
 .../doris/datasource/hive/HiveMetaStoreCache.java  |  7 ++--
 .../doris/tablefunction/MetadataGenerator.java     |  6 ++--
 4 files changed, 38 insertions(+), 22 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index d73276cc6c4..e75bb372417 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -416,11 +416,8 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         if (CollectionUtils.isEmpty(this.getPartitionColumns())) {
             return Collections.emptyMap();
         }
-        HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
-                .getMetaStoreCache((HMSExternalCatalog) this.getCatalog());
-        List<Type> partitionColumnTypes = 
this.getPartitionColumnTypes(MvccUtil.getSnapshotFromContext(this));
-        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
cache.getPartitionValues(
-                this, partitionColumnTypes);
+        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
getHivePartitionValues(
+                MvccUtil.getSnapshotFromContext(this));
         Map<Long, PartitionItem> idToPartitionItem = 
hivePartitionValues.getIdToPartitionItem();
         // transfer id to name
         BiMap<Long, String> idToName = 
hivePartitionValues.getPartitionNameToIdMap().inverse();
@@ -983,10 +980,10 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
 
     @Override
     public long getNewestUpdateVersionOrTime() {
+        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
getHivePartitionValues(
+                MvccUtil.getSnapshotFromContext(this));
         HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
                 .getMetaStoreCache((HMSExternalCatalog) getCatalog());
-        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
cache.getPartitionValues(this,
-                
getPartitionColumnTypes(MvccUtil.getSnapshotFromContext(this)));
         List<HivePartition> partitionList = 
cache.getAllPartitionsWithCache(this,
                 
Lists.newArrayList(hivePartitionValues.getPartitionValuesMap().values()));
         if (CollectionUtils.isEmpty(partitionList)) {
@@ -1062,16 +1059,15 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         if (isView()) {
             return null;
         }
-        HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
-                .getMetaStoreCache((HMSExternalCatalog) catalog);
-        List<Type> partitionColumnTypes = 
getPartitionColumnTypes(MvccUtil.getSnapshotFromContext(this));
+        Optional<MvccSnapshot> snapshot = 
MvccUtil.getSnapshotFromContext(this);
+        List<Type> partitionColumnTypes = getPartitionColumnTypes(snapshot);
         HiveMetaStoreCache.HivePartitionValues partitionValues = null;
         // Get table partitions from cache.
         if (!partitionColumnTypes.isEmpty()) {
             // It is ok to get partition values from cache,
             // no need to worry that this call will invalid or refresh the 
cache.
             // because it has enough space to keep partition info of all 
tables in cache.
-            partitionValues = cache.getPartitionValues(this, 
partitionColumnTypes);
+            partitionValues = getHivePartitionValues(snapshot);
             if (partitionValues == null || 
partitionValues.getPartitionNameToIdMap() == null) {
                 LOG.warn("Partition values for hive table {} is null", name);
             } else {
@@ -1192,4 +1188,25 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient();
         return client.getTable(getRemoteDbName(), remoteName);
     }
+
+    public HiveMetaStoreCache.HivePartitionValues 
getHivePartitionValues(Optional<MvccSnapshot> snapshot) {
+        HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
+                .getMetaStoreCache((HMSExternalCatalog) this.getCatalog());
+        try {
+            List<Type> partitionColumnTypes = 
this.getPartitionColumnTypes(snapshot);
+            return cache.getPartitionValues(this, partitionColumnTypes);
+        } catch (Exception e) {
+            if 
(e.getMessage().contains(HiveMetaStoreCache.ERR_CACHE_INCONSISTENCY)) {
+                LOG.warn("Hive metastore cache inconsistency detected for 
table: {}.{}.{}. "
+                                + "Clearing cache and retrying to get 
partition values.",
+                        this.getCatalog().getName(), this.getDbName(), 
this.getName(), e);
+                ExternalSchemaCache schemaCache = 
Env.getCurrentEnv().getExtMetaCacheMgr().getSchemaCache(catalog);
+                schemaCache.invalidateTableCache(this);
+                List<Type> partitionColumnTypes = 
this.getPartitionColumnTypes(snapshot);
+                return cache.getPartitionValues(this, partitionColumnTypes);
+            } else {
+                throw e;
+            }
+        }
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveDlaTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveDlaTable.java
index 8c93c509559..c49081de60d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveDlaTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveDlaTable.java
@@ -71,11 +71,10 @@ public class HiveDlaTable extends HMSDlaTable {
     @Override
     public MTMVSnapshotIf getPartitionSnapshot(String partitionName, 
MTMVRefreshContext context,
             Optional<MvccSnapshot> snapshot) throws AnalysisException {
+        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
hmsTable.getHivePartitionValues(snapshot);
+        Long partitionId = 
getPartitionIdByNameOrAnalysisException(partitionName, hivePartitionValues);
         HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
                 .getMetaStoreCache((HMSExternalCatalog) hmsTable.getCatalog());
-        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
cache.getPartitionValues(
-                hmsTable, hmsTable.getPartitionColumnTypes(snapshot));
-        Long partitionId = 
getPartitionIdByNameOrAnalysisException(partitionName, hivePartitionValues);
         HivePartition hivePartition = 
getHivePartitionByIdOrAnalysisException(partitionId,
                 hivePartitionValues, cache);
         return new MTMVTimestampSnapshot(hivePartition.getLastModifiedTime());
@@ -95,10 +94,9 @@ public class HiveDlaTable extends HMSDlaTable {
         HivePartition maxPartition = null;
         long maxVersionTime = 0L;
         long visibleVersionTime;
+        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
hmsTable.getHivePartitionValues(snapshot);
         HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
                 .getMetaStoreCache((HMSExternalCatalog) hmsTable.getCatalog());
-        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
cache.getPartitionValues(
-                hmsTable, hmsTable.getPartitionColumnTypes(snapshot));
         List<HivePartition> partitionList = 
cache.getAllPartitionsWithCache(hmsTable,
                 
Lists.newArrayList(hivePartitionValues.getPartitionValuesMap().values()));
         if (CollectionUtils.isEmpty(partitionList)) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 275a5ab1f8c..4fbda7c19f1 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -100,6 +100,7 @@ import java.util.stream.Collectors;
 public class HiveMetaStoreCache {
     private static final Logger LOG = 
LogManager.getLogger(HiveMetaStoreCache.class);
     public static final String HIVE_DEFAULT_PARTITION = 
"__HIVE_DEFAULT_PARTITION__";
+    public static final String ERR_CACHE_INCONSISTENCY = 
"ERR_CACHE_INCONSISTENCY: ";
 
     private final HMSExternalCatalog catalog;
     private JobConf jobConf;
@@ -276,11 +277,12 @@ public class HiveMetaStoreCache {
                 partitionNameToIdMap, idToUniqueIdsMap, 
singleUidToColumnRangeMap, partitionValuesMap);
     }
 
-    public ListPartitionItem toListPartitionItem(String partitionName, 
List<Type> types) {
+    private ListPartitionItem toListPartitionItem(String partitionName, 
List<Type> types) {
         // Partition name will be in format: nation=cn/city=beijing
         // parse it to get values "cn" and "beijing"
         List<String> partitionValues = 
HiveUtil.toPartitionValues(partitionName);
-        Preconditions.checkState(partitionValues.size() == types.size(), 
partitionName + " vs. " + types);
+        Preconditions.checkState(partitionValues.size() == types.size(),
+                ERR_CACHE_INCONSISTENCY + partitionName + " vs. " + types);
         List<PartitionValue> values = 
Lists.newArrayListWithExpectedSize(types.size());
         for (String partitionValue : partitionValues) {
             values.add(new PartitionValue(partitionValue, 
HIVE_DEFAULT_PARTITION.equals(partitionValue)));
@@ -433,6 +435,7 @@ public class HiveMetaStoreCache {
         return getPartitionValues(key);
     }
 
+    @VisibleForTesting
     public HivePartitionValues getPartitionValues(PartitionValueCacheKey key) {
         return partitionValuesCache.get(key);
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
index 4f37e555595..d754952fbee 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
@@ -1850,10 +1850,8 @@ public class MetadataGenerator {
                     "column " + colNames + " does not match partition columns 
of table " + tbl.getName());
         }
 
-        HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
-                .getMetaStoreCache((HMSExternalCatalog) tbl.getCatalog());
-        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
cache.getPartitionValues(
-                tbl, 
tbl.getPartitionColumnTypes(MvccUtil.getSnapshotFromContext(tbl)));
+        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
tbl.getHivePartitionValues(
+                MvccUtil.getSnapshotFromContext(tbl));
         Map<Long, List<String>> valuesMap = 
hivePartitionValues.getPartitionValuesMap();
         List<TRow> dataBatch = Lists.newArrayList();
         for (Map.Entry<Long, List<String>> entry : valuesMap.entrySet()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to