This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 94986fc5746 branch-2.1: [fix](multi-catalog) Fix bug: "Can not create
a Path from an empty string" (#49382) (#49641)
94986fc5746 is described below
commit 94986fc5746b0751daaf71d79af50dcd6004e39f
Author: Socrates <[email protected]>
AuthorDate: Sat Mar 29 09:13:43 2025 +0800
branch-2.1: [fix](multi-catalog) Fix bug: "Can not create a Path from an
empty string" (#49382) (#49641)
### What problem does this PR solve?
Problem Summary:
In HiveMetaStoreCache, the function FileInputFormat.setInputPaths is
used to set input paths. However, this function splits paths using
commas, which is not the expected behavior. As a result, when partition
values contain commas, it leads to incorrect path parsing and potential
errors.
```java
public static void setInputPaths(JobConf conf, String
org.apache.hadoop.shaded.com.aSeparatedPaths) {
setInputPaths(conf, StringUtils.stringToPath(
getPathStrings(org.apache.hadoop.shaded.com.aSeparatedPaths)));
}
```
To prevent FileInputFormat.setInputPaths from splitting paths by commas,
we use another overloaded version of the method. Instead of passing a
comma-separated string, we explicitly pass a Path object, ensuring that
partition values containing commas are handled correctly.
```java
public static void setInputPaths(JobConf conf, Path... inputPaths) {
Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]);
StringBuffer str = new
StringBuffer(StringUtils.escapeString(path.toString()));
for(int i = 1; i < inputPaths.length;i++) {
str.append(StringUtils.COMMA_STR);
path = new Path(conf.getWorkingDirectory(), inputPaths[i]);
str.append(StringUtils.escapeString(path.toString()));
}
conf.set(org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.input.
FileInputFormat.INPUT_DIR, str.toString());
}
```
### Release note
None
---
.../scripts/create_preinstalled_scripts/run74.hql | 53 +++++++++++++++++++++
.../partition_col=,/000000_0 | Bin 0 -> 408 bytes
.../partition_col=a, b, c/000000_0 | Bin 0 -> 408 bytes
.../partition_col=a, b/000000_0 | Bin 0 -> 408 bytes
.../doris/datasource/hive/HiveMetaStoreCache.java | 3 +-
.../hive/test_hive_partitions.out | Bin 3171 -> 3333 bytes
.../hive/test_hive_partitions.groovy | 6 ++-
7 files changed, 59 insertions(+), 3 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql
new file mode 100644
index 00000000000..31e98f370d5
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql
@@ -0,0 +1,53 @@
+create database if not exists partition_tables;
+use partition_tables;
+
+CREATE TABLE decimal_partition_table (
+ id INT,
+ name STRING,
+ value FLOAT
+)
+PARTITIONED BY (partition_col DECIMAL(10, 2))
+STORED AS PARQUET
+LOCATION
'/user/doris/preinstalled_data/partition_tables/decimal_partition_table';
+
+CREATE TABLE int_partition_table (
+ id INT,
+ name STRING,
+ value FLOAT
+)
+PARTITIONED BY (partition_col INT)
+STORED AS PARQUET
+LOCATION '/user/doris/preinstalled_data/partition_tables/int_partition_table';
+
+CREATE TABLE string_partition_table (
+ id INT,
+ name STRING,
+ value FLOAT
+)
+PARTITIONED BY (partition_col STRING)
+STORED AS PARQUET
+LOCATION
'/user/doris/preinstalled_data/partition_tables/string_partition_table';
+
+CREATE TABLE date_partition_table (
+ id INT,
+ name STRING,
+ value FLOAT
+)
+PARTITIONED BY (partition_col DATE)
+STORED AS PARQUET
+LOCATION '/user/doris/preinstalled_data/partition_tables/date_partition_table';
+
+CREATE TABLE string_partition_table_with_comma (
+ id INT,
+ name STRING,
+ value FLOAT
+)
+PARTITIONED BY (partition_col STRING)
+STORED AS PARQUET
+LOCATION
'/user/doris/preinstalled_data/partition_tables/string_partition_table_with_comma';
+
+msck repair table decimal_partition_table;
+msck repair table int_partition_table;
+msck repair table string_partition_table;
+msck repair table date_partition_table;
+msck repair table string_partition_table_with_comma;
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0
new file mode 100644
index 00000000000..a93ce013162
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
b, c/000000_0
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
b, c/000000_0
new file mode 100644
index 00000000000..4e6e043ccf5
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
b, c/000000_0 differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
b/000000_0
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
b/000000_0
new file mode 100644
index 00000000000..63b7f592e7b
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a,
b/000000_0 differ
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 48a625c35a7..751919e85f4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -404,7 +404,8 @@ public class HiveMetaStoreCache {
} catch (Exception e) {
LOG.warn("unknown scheme in path: " + finalLocation, e);
}
- FileInputFormat.setInputPaths(jobConf, finalLocation.get());
+ // NOTICE: the setInputPaths has 2 overloads, the 2nd arg should
be Path not String
+ FileInputFormat.setInputPaths(jobConf, finalLocation.getPath());
try {
FileCacheValue result = getFileCache(finalLocation.get(),
key.inputFormat, jobConf,
key.getPartitionValues(), key.bindBrokerName);
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_partitions.out
b/regression-test/data/external_table_p0/hive/test_hive_partitions.out
index 904eb6eda30..ea0c8f1518c 100644
Binary files
a/regression-test/data/external_table_p0/hive/test_hive_partitions.out and
b/regression-test/data/external_table_p0/hive/test_hive_partitions.out differ
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy
index 0e41adc3127..cc3425106a5 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_partitions.groovy
@@ -91,6 +91,10 @@ suite("test_hive_partitions",
"p0,external,hive,external_docker,external_docker_
q01()
+ qt_string_partition_table_with_comma """
+ select * from
partition_tables.string_partition_table_with_comma order by id;
+ """
+
sql """set num_partitions_in_batch_mode=1"""
explain {
sql ("select * from partition_table")
@@ -99,8 +103,6 @@ suite("test_hive_partitions",
"p0,external,hive,external_docker,external_docker_
contains "(approximate)inputSplitNum=60"
}
sql """unset variable num_partitions_in_batch_mode"""
-
- // sql """drop catalog if exists ${catalog_name}"""
} finally {
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]