This is an automated email from the ASF dual-hosted git repository.
Gabriel39 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5e9126cf5b2 [fix](fe) Normalize default HDFS paths in LocationPath
(#63476)
5e9126cf5b2 is described below
commit 5e9126cf5b2a162c2e132589a5fc4b53a605e04b
Author: Socrates <[email protected]>
AuthorDate: Thu May 28 09:34:41 2026 +0800
[fix](fe) Normalize default HDFS paths in LocationPath (#63476)
Iceberg tables written through Hadoop catalog can store data file paths
without a URI scheme, for example
`/hadoop_catalog/db/tbl/data/file.parquet`. Doris should normalize these
paths with the catalog `fs.defaultFS` before creating scan ranges.
The Iceberg `LocationPath` cache path kept the original blank schema
after normalization and did not derive the schema from the normalized
URI in the cached fallback path. As a result, partitioned table planning
could fail with `Invalid location, missing authority`, and
non-partitioned scans could pass an invalid file type or fs name to BE.
This patch derives the schema from the normalized URI when the original
path has no scheme and keeps cached `LocationPath` creation consistent
with full parsing.
---
.../java/org/apache/doris/common/util/LocationPath.java | 14 +++++++++++++-
.../org/apache/doris/common/util/LocationPathTest.java | 15 +++++++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
index ff1f0c87a06..019462d33a1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
@@ -146,6 +146,9 @@ public class LocationPath {
String encodedLocation = encodedLocation(normalizedLocation);
URI uri = URI.create(encodedLocation);
String fsIdentifier = Strings.nullToEmpty(uri.getScheme()) + "://" +
Strings.nullToEmpty(uri.getAuthority());
+ if (StringUtils.isBlank(schema)) {
+ schema = Strings.nullToEmpty(uri.getScheme());
+ }
return new LocationPath(schema, normalizedLocation, fsIdentifier,
storageProperties);
}
@@ -190,6 +193,9 @@ public class LocationPath {
URI uri = URI.create(encodedLocation);
String fsIdentifier = Strings.nullToEmpty(uri.getScheme()) + "://"
+ Strings.nullToEmpty(uri.getAuthority());
+ if (StringUtils.isBlank(schema)) {
+ schema = Strings.nullToEmpty(uri.getScheme());
+ }
return new LocationPath(schema, normalizedLocation, fsIdentifier,
storageProperties);
} catch (UserException e) {
throw new StoragePropertiesException("Failed to create
LocationPath for location: " + location, e);
@@ -231,6 +237,7 @@ public class LocationPath {
String normalizedLocation =
storageProperties.validateAndNormalizeUri(location);
String fsIdentifier;
+ String schema = cachedSchema;
if (cachedFsIdPrefix != null &&
normalizedLocation.startsWith(cachedFsIdPrefix)) {
// Fast path: extract authority from normalized location
without full URI parsing
int authorityStart = cachedFsIdPrefix.length();
@@ -243,6 +250,9 @@ public class LocationPath {
throw new StoragePropertiesException("Invalid location,
missing authority: " + normalizedLocation);
}
fsIdentifier = cachedFsIdPrefix + authority;
+ if (StringUtils.isBlank(schema)) {
+ schema = cachedFsIdPrefix.substring(0,
cachedFsIdPrefix.length() - SCHEME_DELIM.length());
+ }
} else {
// Fallback to full URI parsing
String encodedLocation = encodedLocation(normalizedLocation);
@@ -253,9 +263,11 @@ public class LocationPath {
}
fsIdentifier = Strings.nullToEmpty(uri.getScheme()) + "://"
+ authority;
+ if (StringUtils.isBlank(schema)) {
+ schema = Strings.nullToEmpty(uri.getScheme());
+ }
}
- String schema = cachedSchema != null ? cachedSchema :
extractScheme(location);
return new LocationPath(schema, normalizedLocation, fsIdentifier,
storageProperties);
} catch (UserException e) {
throw new StoragePropertiesException("Failed to create
LocationPath for location: " + location, e);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
index cccbc63bcc8..fd665f6baff 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
@@ -334,6 +334,21 @@ public class LocationPathTest {
Assertions.assertEquals(full.getSchema(), cached.getSchema());
}
+ @Test
+ public void testLocationPathWithCacheUsesDefaultFsForHdfsPath() {
+ StorageProperties storageProperties =
STORAGE_PROPERTIES_MAP.get(StorageProperties.Type.HDFS);
+ String location =
"/hadoop_catalog/fdm/f_csm_t_consume_info/data/data_dt=20220407/file.parquet";
+ LocationPath cached = LocationPath.ofWithCache(location,
storageProperties, null, null);
+ LocationPath full = LocationPath.of(location, STORAGE_PROPERTIES_MAP);
+ Assertions.assertEquals(full.getNormalizedLocation(),
cached.getNormalizedLocation());
+
Assertions.assertEquals("hdfs://namenode:8020/hadoop_catalog/fdm/f_csm_t_consume_info/data/"
+ + "data_dt=20220407/file.parquet",
cached.getNormalizedLocation());
+ Assertions.assertEquals("hdfs://namenode:8020",
cached.getFsIdentifier());
+ Assertions.assertEquals("hdfs", cached.getSchema());
+ Assertions.assertEquals(TFileType.FILE_HDFS,
cached.getTFileTypeForBE());
+ Assertions.assertEquals(FileSystemType.HDFS,
cached.getFileSystemType());
+ }
+
@Test
public void testLocationPathWithCacheMissingAuthority() {
StorageProperties storageProperties =
STORAGE_PROPERTIES_MAP.get(StorageProperties.Type.S3);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]