This is an automated email from the ASF dual-hosted git repository.

Gabriel39 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5e9126cf5b2 [fix](fe) Normalize default HDFS paths in LocationPath 
(#63476)
5e9126cf5b2 is described below

commit 5e9126cf5b2a162c2e132589a5fc4b53a605e04b
Author: Socrates <[email protected]>
AuthorDate: Thu May 28 09:34:41 2026 +0800

    [fix](fe) Normalize default HDFS paths in LocationPath (#63476)
    
    Iceberg tables written through Hadoop catalog can store data file paths
    without a URI scheme, for example
    `/hadoop_catalog/db/tbl/data/file.parquet`. Doris should normalize these
    paths with the catalog `fs.defaultFS` before creating scan ranges.
    
    The Iceberg `LocationPath` cache path kept the original blank schema
    after normalization and did not derive the schema from the normalized
    URI in the cached fallback path. As a result, partitioned table planning
    could fail with `Invalid location, missing authority`, and
    non-partitioned scans could pass an invalid file type or fs name to BE.
    
    This patch derives the schema from the normalized URI when the original
    path has no scheme and keeps cached `LocationPath` creation consistent
    with full parsing.
---
 .../java/org/apache/doris/common/util/LocationPath.java   | 14 +++++++++++++-
 .../org/apache/doris/common/util/LocationPathTest.java    | 15 +++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
index ff1f0c87a06..019462d33a1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
@@ -146,6 +146,9 @@ public class LocationPath {
         String encodedLocation = encodedLocation(normalizedLocation);
         URI uri = URI.create(encodedLocation);
         String fsIdentifier = Strings.nullToEmpty(uri.getScheme()) + "://" + 
Strings.nullToEmpty(uri.getAuthority());
+        if (StringUtils.isBlank(schema)) {
+            schema = Strings.nullToEmpty(uri.getScheme());
+        }
 
         return new LocationPath(schema, normalizedLocation, fsIdentifier, 
storageProperties);
     }
@@ -190,6 +193,9 @@ public class LocationPath {
             URI uri = URI.create(encodedLocation);
             String fsIdentifier = Strings.nullToEmpty(uri.getScheme()) + "://"
                     + Strings.nullToEmpty(uri.getAuthority());
+            if (StringUtils.isBlank(schema)) {
+                schema = Strings.nullToEmpty(uri.getScheme());
+            }
             return new LocationPath(schema, normalizedLocation, fsIdentifier, 
storageProperties);
         } catch (UserException e) {
             throw new StoragePropertiesException("Failed to create 
LocationPath for location: " + location, e);
@@ -231,6 +237,7 @@ public class LocationPath {
             String normalizedLocation = 
storageProperties.validateAndNormalizeUri(location);
 
             String fsIdentifier;
+            String schema = cachedSchema;
             if (cachedFsIdPrefix != null && 
normalizedLocation.startsWith(cachedFsIdPrefix)) {
                 // Fast path: extract authority from normalized location 
without full URI parsing
                 int authorityStart = cachedFsIdPrefix.length();
@@ -243,6 +250,9 @@ public class LocationPath {
                     throw new StoragePropertiesException("Invalid location, 
missing authority: " + normalizedLocation);
                 }
                 fsIdentifier = cachedFsIdPrefix + authority;
+                if (StringUtils.isBlank(schema)) {
+                    schema = cachedFsIdPrefix.substring(0, 
cachedFsIdPrefix.length() - SCHEME_DELIM.length());
+                }
             } else {
                 // Fallback to full URI parsing
                 String encodedLocation = encodedLocation(normalizedLocation);
@@ -253,9 +263,11 @@ public class LocationPath {
                 }
                 fsIdentifier = Strings.nullToEmpty(uri.getScheme()) + "://"
                         + authority;
+                if (StringUtils.isBlank(schema)) {
+                    schema = Strings.nullToEmpty(uri.getScheme());
+                }
             }
 
-            String schema = cachedSchema != null ? cachedSchema : 
extractScheme(location);
             return new LocationPath(schema, normalizedLocation, fsIdentifier, 
storageProperties);
         } catch (UserException e) {
             throw new StoragePropertiesException("Failed to create 
LocationPath for location: " + location, e);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
index cccbc63bcc8..fd665f6baff 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
@@ -334,6 +334,21 @@ public class LocationPathTest {
         Assertions.assertEquals(full.getSchema(), cached.getSchema());
     }
 
+    @Test
+    public void testLocationPathWithCacheUsesDefaultFsForHdfsPath() {
+        StorageProperties storageProperties = 
STORAGE_PROPERTIES_MAP.get(StorageProperties.Type.HDFS);
+        String location = 
"/hadoop_catalog/fdm/f_csm_t_consume_info/data/data_dt=20220407/file.parquet";
+        LocationPath cached = LocationPath.ofWithCache(location, 
storageProperties, null, null);
+        LocationPath full = LocationPath.of(location, STORAGE_PROPERTIES_MAP);
+        Assertions.assertEquals(full.getNormalizedLocation(), 
cached.getNormalizedLocation());
+        
Assertions.assertEquals("hdfs://namenode:8020/hadoop_catalog/fdm/f_csm_t_consume_info/data/"
+                + "data_dt=20220407/file.parquet", 
cached.getNormalizedLocation());
+        Assertions.assertEquals("hdfs://namenode:8020", 
cached.getFsIdentifier());
+        Assertions.assertEquals("hdfs", cached.getSchema());
+        Assertions.assertEquals(TFileType.FILE_HDFS, 
cached.getTFileTypeForBE());
+        Assertions.assertEquals(FileSystemType.HDFS, 
cached.getFileSystemType());
+    }
+
     @Test
     public void testLocationPathWithCacheMissingAuthority() {
         StorageProperties storageProperties = 
STORAGE_PROPERTIES_MAP.get(StorageProperties.Type.S3);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to