This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 39d8fc7c6 [hdfs] Introduce hadoop-conf-loader (#1433)
39d8fc7c6 is described below
commit 39d8fc7c63dc89a7e3ef1130a1f1758262402a87
Author: Jingsong Lee <[email protected]>
AuthorDate: Mon Jun 26 09:57:35 2023 +0800
[hdfs] Introduce hadoop-conf-loader (#1433)
---
docs/content/filesystems/hdfs.md | 2 +
.../java/org/apache/paimon/utils/HadoopUtils.java | 60 ++++++++++++++++++++--
.../apache/paimon/fs/HadoopConfigLoadingTest.java | 37 +++++++++----
3 files changed, 85 insertions(+), 14 deletions(-)
diff --git a/docs/content/filesystems/hdfs.md b/docs/content/filesystems/hdfs.md
index 2c0d934b4..6be5af308 100644
--- a/docs/content/filesystems/hdfs.md
+++ b/docs/content/filesystems/hdfs.md
@@ -45,6 +45,8 @@ configure your HDFS:
The first approach is recommended.
+If you do not want to include the value of the environment variable, you can
configure `hadoop-conf-loader` to `option`.
+
{{< /tab >}}
{{< tab "Hive/Spark" >}}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/utils/HadoopUtils.java
b/paimon-common/src/main/java/org/apache/paimon/utils/HadoopUtils.java
index a478a08e2..7d0ec0e2c 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/HadoopUtils.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/HadoopUtils.java
@@ -18,7 +18,10 @@
package org.apache.paimon.utils;
+import org.apache.paimon.options.ConfigOption;
import org.apache.paimon.options.Options;
+import org.apache.paimon.options.description.DescribedEnum;
+import org.apache.paimon.options.description.InlineElement;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -27,6 +30,9 @@ import org.slf4j.LoggerFactory;
import java.io.File;
+import static org.apache.paimon.options.ConfigOptions.key;
+import static org.apache.paimon.options.description.TextElement.text;
+
/**
* Utility class for working with Hadoop-related classes. This should only be
used if Hadoop is on
* the classpath. Note: decoupled from specific engines.
@@ -34,6 +40,13 @@ import java.io.File;
public class HadoopUtils {
private static final Logger LOG =
LoggerFactory.getLogger(HadoopUtils.class);
+
+ public static final ConfigOption<HadoopConfigLoader> HADOOP_CONF_LOADER =
+ key("hadoop-conf-loader")
+ .enumType(HadoopConfigLoader.class)
+ .defaultValue(HadoopConfigLoader.ALL)
+ .withDescription("Specifies the way of loading hadoop
config.");
+
private static final String[] CONFIG_PREFIXES = {"hadoop."};
public static final String HADOOP_HOME_ENV = "HADOOP_HOME";
public static final String HADOOP_CONF_ENV = "HADOOP_CONF_DIR";
@@ -58,8 +71,10 @@ public class HadoopUtils {
// Approach 1: HADOOP_HOME environment variables
String[] possibleHadoopConfPaths = new String[2];
+ HadoopConfigLoader loader = options.get(HADOOP_CONF_LOADER);
+
final String hadoopHomeDir = System.getenv(HADOOP_HOME_ENV);
- if (hadoopHomeDir != null) {
+ if (hadoopHomeDir != null && loader.loadEnv()) {
LOG.debug("Searching Hadoop configuration files in HADOOP_HOME:
{}", hadoopHomeDir);
possibleHadoopConfPaths[0] = hadoopHomeDir + "/conf";
possibleHadoopConfPaths[1] = hadoopHomeDir + "/etc/hadoop"; //
hadoop 2.2
@@ -71,9 +86,9 @@ public class HadoopUtils {
}
}
- // Approach 2: Paimon configuration (deprecated)
+ // Approach 2: Paimon Catalog Option
final String hadoopConfigPath = options.getString(PATH_HADOOP_CONFIG,
null);
- if (hadoopConfigPath != null) {
+ if (hadoopConfigPath != null && loader.loadOption()) {
LOG.debug(
"Searching Hadoop configuration files in Paimon config:
{}", hadoopConfigPath);
foundHadoopConfiguration =
@@ -82,7 +97,7 @@ public class HadoopUtils {
// Approach 3: HADOOP_CONF_DIR environment variable
String hadoopConfDir = System.getenv(HADOOP_CONF_ENV);
- if (hadoopConfDir != null) {
+ if (hadoopConfDir != null && loader.loadEnv()) {
LOG.debug("Searching Hadoop configuration files in
HADOOP_CONF_DIR: {}", hadoopConfDir);
foundHadoopConfiguration =
addHadoopConfIfFound(result, hadoopConfDir) ||
foundHadoopConfiguration;
@@ -142,4 +157,41 @@ public class HadoopUtils {
}
return foundHadoopConfiguration;
}
+
+ /** Specifies the way of loading hadoop config. */
+ public enum HadoopConfigLoader implements DescribedEnum {
+ ALL("all", "Load Hadoop conf from environment variables and catalog
option.", true, true),
+ ENV("env", "Load Hadoop conf from environment variables only.", true,
false),
+ OPTION("option", "Load Hadoop conf from catalog option only.", false,
true);
+
+ private final String value;
+ private final String description;
+ private final boolean loadEnv;
+ private final boolean loadOption;
+
+ HadoopConfigLoader(String value, String description, boolean loadEnv,
boolean loadOption) {
+ this.value = value;
+ this.description = description;
+ this.loadEnv = loadEnv;
+ this.loadOption = loadOption;
+ }
+
+ public boolean loadEnv() {
+ return loadEnv;
+ }
+
+ public boolean loadOption() {
+ return loadOption;
+ }
+
+ @Override
+ public String toString() {
+ return value;
+ }
+
+ @Override
+ public InlineElement getDescription() {
+ return text(description);
+ }
+ }
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fs/HadoopConfigLoadingTest.java
b/paimon-common/src/test/java/org/apache/paimon/fs/HadoopConfigLoadingTest.java
index 906f97863..e7e3e4a36 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fs/HadoopConfigLoadingTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fs/HadoopConfigLoadingTest.java
@@ -208,7 +208,9 @@ public class HadoopConfigLoadingTest {
final Options options = new Options();
options.setString(HadoopUtils.PATH_HADOOP_CONFIG,
hadoopConfEntryDir.getAbsolutePath());
- final Configuration hadoopConf;
+ final Configuration hadoopConf1;
+ final Configuration hadoopConf2;
+ final Configuration hadoopConf3;
final Map<String, String> originalEnv = System.getenv();
final Map<String, String> newEnv = new HashMap<>(originalEnv);
@@ -216,19 +218,34 @@ public class HadoopConfigLoadingTest {
newEnv.put(HadoopUtils.HADOOP_HOME_ENV, hadoopHome.getAbsolutePath());
try {
CommonTestUtils.setEnv(newEnv);
- hadoopConf = HadoopUtils.getHadoopConfiguration(options);
+ hadoopConf1 = HadoopUtils.getHadoopConfiguration(options);
+
+ options.set(HadoopUtils.HADOOP_CONF_LOADER,
HadoopUtils.HadoopConfigLoader.ENV);
+ hadoopConf2 = HadoopUtils.getHadoopConfiguration(options);
+
+ options.set(HadoopUtils.HADOOP_CONF_LOADER,
HadoopUtils.HadoopConfigLoader.OPTION);
+ hadoopConf3 = HadoopUtils.getHadoopConfiguration(options);
} finally {
CommonTestUtils.setEnv(originalEnv);
}
- // contains extra entries
- assertEquals(v1, hadoopConf.get(k1, null));
- assertEquals(v2, hadoopConf.get(k2, null));
- assertEquals(v4, hadoopConf.get(k4, null));
- assertEquals(v5, hadoopConf.get(k5, null));
-
- // also contains classpath defaults
- assertEquals(IN_CP_CONFIG_VALUE, hadoopConf.get(IN_CP_CONFIG_KEY,
null));
+ assertEquals(v1, hadoopConf1.get(k1, null));
+ assertEquals(v2, hadoopConf1.get(k2, null));
+ assertEquals(v4, hadoopConf1.get(k4, null));
+ assertEquals(v5, hadoopConf1.get(k5, null));
+ assertEquals(IN_CP_CONFIG_VALUE, hadoopConf1.get(IN_CP_CONFIG_KEY,
null));
+
+ assertEquals("from HADOOP_CONF_DIR", hadoopConf2.get(k1, null));
+ assertEquals("from HADOOP_HOME/etc/hadoop", hadoopConf2.get(k2, null));
+ assertEquals("from HADOOP_HOME/etc/hadoop", hadoopConf2.get(k4, null));
+ assertEquals("from HADOOP_HOME/conf", hadoopConf2.get(k5, null));
+ assertEquals(IN_CP_CONFIG_VALUE, hadoopConf2.get(IN_CP_CONFIG_KEY,
null));
+
+ assertEquals("from Paimon config `hadoop-conf-dir`",
hadoopConf3.get(k1, null));
+ assertEquals("from Paimon config `hadoop-conf-dir`",
hadoopConf3.get(k2, null));
+ assertNull(hadoopConf3.get(k4, null));
+ assertNull(hadoopConf3.get(k5, null));
+ assertEquals(IN_CP_CONFIG_VALUE, hadoopConf3.get(IN_CP_CONFIG_KEY,
null));
}
@Test