This is an automated email from the ASF dual-hosted git repository.
diwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-spark-connector.git
The following commit(s) were added to refs/heads/master by this push:
new d1e07a7 [improve](spark load) support load hadoop conf from resource
(#280)
d1e07a7 is described below
commit d1e07a718acc61d4f828b92528a632292626eb5f
Author: gnehil <[email protected]>
AuthorDate: Mon Mar 17 20:36:05 2025 +0800
[improve](spark load) support load hadoop conf from resource (#280)
---
.../java/org/apache/doris/SparkLoadRunner.java | 34 ++++++++++++++++-
.../java/org/apache/doris/SparkLoadRunnerTest.java | 44 ++++++++++++++++++++++
.../src/test/resources/core-site.xml | 25 ++++++++++++
.../src/test/resources/hdfs-site.xml | 25 ++++++++++++
.../src/test/resources/yarn-site.xml | 25 ++++++++++++
5 files changed, 152 insertions(+), 1 deletion(-)
diff --git
a/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
b/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
index 5c7329f..9998bab 100644
---
a/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
+++
b/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
@@ -37,6 +37,8 @@ import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -74,7 +76,7 @@ public class SparkLoadRunner {
JobConfig jobConfig = readConfig(cmdOptions.getConfigPath());
try {
- handleS3Config(jobConfig);
+ preprocessConfig(jobConfig);
checkConfig(jobConfig);
} catch (IllegalArgumentException e) {
System.err.println("check config failed, msg: " +
ExceptionUtils.getStackTrace(e));
@@ -156,6 +158,36 @@ public class SparkLoadRunner {
jobConfig.checkHadoopProperties();
}
+ private static void preprocessConfig(JobConfig jobConfig) {
+ loadHadoopConfig(jobConfig);
+ handleS3Config(jobConfig);
+ }
+
+ protected static void loadHadoopConfig(JobConfig jobConfig) {
+ if (jobConfig.getEnv().containsKey("HADOOP_CONF_DIR")) {
+ Configuration conf = new Configuration();
+ String hadoopConfDir = jobConfig.getEnv().get("HADOOP_CONF_DIR");
+ if (new File(hadoopConfDir + "/core-site.xml").exists()) {
+ System.out.println("core-site.xml was found at " +
hadoopConfDir + "/core-site.xml");
+ conf.addResource(new Path(hadoopConfDir, "core-site.xml"));
+ }
+ if (new File(hadoopConfDir + "/hdfs-site.xml").exists()) {
+ System.out.println("hdfs-site.xml was found at " +
hadoopConfDir + "/hdfs-site.xml");
+ conf.addResource(new Path(hadoopConfDir, "hdfs-site.xml"));
+ }
+ if (new File(hadoopConfDir + "/yarn-site.xml").exists()) {
+ System.out.println("yarn-site.xml was found at " +
hadoopConfDir + "/yarn-site.xml");
+ conf.addResource(new Path(hadoopConfDir, "yarn-site.xml"));
+ }
+ Map<String, String> newHadoopProps = new HashMap<>();
+ for (Map.Entry<String, String> confEntry : conf) {
+ newHadoopProps.put(confEntry.getKey(), confEntry.getValue());
+ }
+ newHadoopProps.putAll(jobConfig.getHadoopProperties());
+ jobConfig.setHadoopProperties(newHadoopProps);
+ }
+ }
+
private static void handleS3Config(JobConfig jobConfig) {
URI uri = URI.create(jobConfig.getWorkingDir());
if (uri.getScheme().equalsIgnoreCase("s3")) {
diff --git
a/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java
b/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java
new file mode 100644
index 0000000..81010f9
--- /dev/null
+++
b/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris;
+
+import org.apache.doris.config.JobConfig;
+
+import org.junit.jupiter.api.Assertions;
+import static org.junit.jupiter.api.Assertions.*;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+class SparkLoadRunnerTest {
+
+ @Test
+ void loadHadoopConfig() {
+
+ JobConfig jobConfig = new JobConfig();
+ Map<String, String> envMap = new HashMap<>();
+ envMap.put("HADOOP_CONF_DIR",
this.getClass().getResource("/").getPath());
+ jobConfig.setEnv(envMap);
+ SparkLoadRunner.loadHadoopConfig(jobConfig);
+ Assertions.assertEquals("60000",
jobConfig.getHadoopProperties().get("hadoop.http.idle_timeout.ms"));
+ Assertions.assertEquals("1",
jobConfig.getHadoopProperties().get("dfs.replication"));
+ Assertions.assertEquals("my.hadoop.com",
jobConfig.getHadoopProperties().get("yarn.resourcemanager.address"));
+
+ }
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/resources/core-site.xml
b/spark-load/spark-load-core/src/test/resources/core-site.xml
new file mode 100644
index 0000000..304b272
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/resources/core-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+ <property>
+ <name>hadoop.http.idle_timeout.ms</name>
+ <value>60000</value>
+ </property>
+</configuration>
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/resources/hdfs-site.xml
b/spark-load/spark-load-core/src/test/resources/hdfs-site.xml
new file mode 100644
index 0000000..869686c
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/resources/hdfs-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
+</configuration>
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/resources/yarn-site.xml
b/spark-load/spark-load-core/src/test/resources/yarn-site.xml
new file mode 100644
index 0000000..e8279d2
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/resources/yarn-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+ <property>
+ <name>yarn.resourcemanager.address</name>
+ <value>my.hadoop.com</value>
+ </property>
+</configuration>
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]