This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a15b00f10ba [Fix](outfile) FE check the hdfs URI of outfile (#38203)
a15b00f10ba is described below
commit a15b00f10ba3a729666c5cb7d7a45a7aef6ebc50
Author: Tiewei Fang <[email protected]>
AuthorDate: Tue Jul 30 08:58:03 2024 +0800
[Fix](outfile) FE check the hdfs URI of outfile (#38203)
1. Previously, if the root path of the HDFS URI started with two
slashes, the outfile would be successfully exported without errors, but
the exported path would not be the expected path.
Currently, we will delete repeated '/' which specified by users in FE.
2. move the test case for outfile HDFS from p2 to p0.
---
.../org/apache/doris/analysis/OutFileClause.java | 8 ++
.../outfile/hdfs/test_outfile_hdfs.out} | 64 +++++++++++
.../outfile/hdfs/test_outfile_hdfs.groovy | 97 +++++++++++++++++
.../suites/export_p2/test_export_with_hdfs.groovy | 118 ---------------------
4 files changed, 169 insertions(+), 118 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
index a658bec93af..8dd91c3fd8a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@@ -55,6 +55,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -535,6 +537,12 @@ public class OutFileClause {
filePath = filePath.replace(HDFS_FILE_PREFIX, HDFS_FILE_PREFIX
+ dfsNameServices);
}
}
+ // delete repeated '/'
+ try {
+ filePath = new URI(filePath).normalize().toString();
+ } catch (URISyntaxException e) {
+ throw new AnalysisException("Can not normalize the URI, error: " +
e.getMessage());
+ }
if (Strings.isNullOrEmpty(filePath)) {
throw new AnalysisException("Must specify file in OUTFILE clause");
}
diff --git a/regression-test/data/export_p2/test_export_with_hdfs.out
b/regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out
similarity index 56%
rename from regression-test/data/export_p2/test_export_with_hdfs.out
rename to regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out
index 457bcfced30..b0159c52964 100644
--- a/regression-test/data/export_p2/test_export_with_hdfs.out
+++ b/regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out
@@ -47,6 +47,70 @@
8 ftw-8 26
9 ftw-9 27
+-- !select --
+
+-- !select --
+
+-- !select --
+1 ftw-1 19
+10 \N \N
+2 ftw-2 20
+3 ftw-3 21
+4 ftw-4 22
+5 ftw-5 23
+6 ftw-6 24
+7 ftw-7 25
+8 ftw-8 26
+9 ftw-9 27
+
+-- !select --
+1 ftw-1 19
+10 \N \N
+2 ftw-2 20
+3 ftw-3 21
+4 ftw-4 22
+5 ftw-5 23
+6 ftw-6 24
+7 ftw-7 25
+8 ftw-8 26
+9 ftw-9 27
+
+-- !select --
+1 ftw-1 19
+10 \N \N
+2 ftw-2 20
+3 ftw-3 21
+4 ftw-4 22
+5 ftw-5 23
+6 ftw-6 24
+7 ftw-7 25
+8 ftw-8 26
+9 ftw-9 27
+
+-- !select --
+1 ftw-1 19
+10 \N \N
+2 ftw-2 20
+3 ftw-3 21
+4 ftw-4 22
+5 ftw-5 23
+6 ftw-6 24
+7 ftw-7 25
+8 ftw-8 26
+9 ftw-9 27
+
+-- !select --
+1 ftw-1 19
+10 \N \N
+2 ftw-2 20
+3 ftw-3 21
+4 ftw-4 22
+5 ftw-5 23
+6 ftw-6 24
+7 ftw-7 25
+8 ftw-8 26
+9 ftw-9 27
+
-- !select --
1 ftw-1 19
10 \N \N
diff --git
a/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy
b/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy
new file mode 100644
index 00000000000..fccf5800e6a
--- /dev/null
+++ b/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_outfile_with_hdfs", "external,hive,external_docker") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ def table_export_name = "test_outfile_with_hdfs"
+ // create table and insert
+ sql """ DROP TABLE IF EXISTS ${table_export_name} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${table_export_name} (
+ `id` int(11) NULL,
+ `name` string NULL,
+ `age` int(11) NULL
+ )
+ PARTITION BY RANGE(id)
+ (
+ PARTITION less_than_20 VALUES LESS THAN ("20"),
+ PARTITION between_20_70 VALUES [("20"),("70")),
+ PARTITION more_than_70 VALUES LESS THAN ("151")
+ )
+ DISTRIBUTED BY HASH(id) BUCKETS 3
+ PROPERTIES("replication_num" = "1");
+ """
+ StringBuilder sb = new StringBuilder()
+ int i = 1
+ for (; i < 10; i ++) {
+ sb.append("""
+ (${i}, 'ftw-${i}', ${i + 18}),
+ """)
+ }
+ sb.append("""
+ (${i}, NULL, NULL)
+ """)
+ sql """ INSERT INTO ${table_export_name} VALUES
+ ${sb.toString()}
+ """
+ qt_select_export """ SELECT * FROM ${table_export_name} t ORDER BY id;
"""
+
+ String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ // It's okay to use random `hdfsUser`, but can not be empty.
+ def hdfsUserName = "doris"
+ def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}"
+
+
+ // test outfile
+ def test_outfile = {format, uri ->
+ def res = sql """
+ SELECT * FROM ${table_export_name} t ORDER BY id
+ INTO OUTFILE "${defaultFS}${uri}"
+ FORMAT AS ${format}
+ PROPERTIES (
+ "fs.defaultFS"="${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}"
+ );
+ """
+
+ def outfile_url = res[0][3]
+ // check data correctness
+ order_qt_select """ select * from hdfs(
+ "uri" = "${outfile_url}.${format}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}");
+ """
+ }
+
+ test_outfile('csv', '/tmp/ftw/export/exp_');
+ test_outfile('parquet', '/tmp/ftw/export/exp_');
+ test_outfile('orc', '/tmp/ftw/export/exp_');
+ test_outfile('csv_with_names', '/tmp/ftw/export/exp_');
+ test_outfile('csv_with_names_and_types', '/tmp/ftw/export/exp_');
+
+ // test uri with multi '/'
+ test_outfile('parquet', '//tmp/ftw/export/exp_');
+ test_outfile('parquet', '//tmp//ftw/export/exp_');
+ test_outfile('parquet', '//tmp/ftw/export//exp_');
+ test_outfile('parquet', '//tmp/ftw//export//exp_');
+ test_outfile('parquet', '//tmp/ftw//export/exp_');
+ test_outfile('parquet', '///tmp/ftw/export/exp_');
+ test_outfile('parquet', '////tmp/ftw/export/exp_');
+ }
+}
diff --git a/regression-test/suites/export_p2/test_export_with_hdfs.groovy
b/regression-test/suites/export_p2/test_export_with_hdfs.groovy
deleted file mode 100644
index e523fdf5a47..00000000000
--- a/regression-test/suites/export_p2/test_export_with_hdfs.groovy
+++ /dev/null
@@ -1,118 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_export_with_hdfs", "p2") {
- // open nereids
- sql """ set enable_nereids_planner=true """
- sql """ set enable_fallback_to_original_planner=false """
-
-
- String nameNodeHost = context.config.otherConfigs.get("extHiveHmsHost")
- String hdfsPort = context.config.otherConfigs.get("extHdfsPort")
- String fs = "hdfs://${nameNodeHost}:${hdfsPort}"
- String user_name = context.config.otherConfigs.get("extHiveHmsUser")
-
-
- def table_export_name = "test_export_with_hdfs"
- // create table and insert
- sql """ DROP TABLE IF EXISTS ${table_export_name} """
- sql """
- CREATE TABLE IF NOT EXISTS ${table_export_name} (
- `id` int(11) NULL,
- `name` string NULL,
- `age` int(11) NULL
- )
- PARTITION BY RANGE(id)
- (
- PARTITION less_than_20 VALUES LESS THAN ("20"),
- PARTITION between_20_70 VALUES [("20"),("70")),
- PARTITION more_than_70 VALUES LESS THAN ("151")
- )
- DISTRIBUTED BY HASH(id) BUCKETS 3
- PROPERTIES("replication_num" = "1");
- """
- StringBuilder sb = new StringBuilder()
- int i = 1
- for (; i < 10; i ++) {
- sb.append("""
- (${i}, 'ftw-${i}', ${i + 18}),
- """)
- }
- sb.append("""
- (${i}, NULL, NULL)
- """)
- sql """ INSERT INTO ${table_export_name} VALUES
- ${sb.toString()}
- """
- qt_select_export """ SELECT * FROM ${table_export_name} t ORDER BY id; """
-
-
- def waiting_export = { export_label ->
- while (true) {
- def res = sql """ show export where label = "${export_label}" """
- logger.info("export state: " + res[0][2])
- if (res[0][2] == "FINISHED") {
- def json = parseJson(res[0][11])
- assert json instanceof List
- assertEquals("1", json.fileNumber[0][0])
- log.info("outfile_path: ${json.url[0][0]}")
- return json.url[0][0];
- } else if (res[0][2] == "CANCELLED") {
- throw new IllegalStateException("""export failed:
${res[0][10]}""")
- } else {
- sleep(5000)
- }
- }
- }
-
- def outFilePath = """/user/export_test/export/exp_"""
-
- // 1. csv test
- def test_export = {format, file_suffix, isDelete ->
- def uuid = UUID.randomUUID().toString()
- // exec export
- sql """
- EXPORT TABLE ${table_export_name} TO "${fs}${outFilePath}"
- PROPERTIES(
- "label" = "${uuid}",
- "format" = "${format}",
- "column_separator"=",",
- "delete_existing_files"="${isDelete}"
- )
- with HDFS (
- "fs.defaultFS"="${fs}",
- "hadoop.username" = "${user_name}"
- );
- """
-
- def outfile_url = waiting_export.call(uuid)
-
- // check data correctness
- order_qt_select """ select * from hdfs(
- "uri" = "${outfile_url}0.${file_suffix}",
- "hadoop.username" = "${user_name}",
- "column_separator" = ",",
- "format" = "${format}");
- """
- }
-
- test_export('csv', 'csv', true);
- test_export('parquet', 'parquet', true);
- test_export('orc', 'orc', true);
- test_export('csv_with_names', 'csv', true);
- test_export('csv_with_names_and_types', 'csv', true);
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]