This is an automated email from the ASF dual-hosted git repository.
zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new 10e8e3d2e [#1192] improvement(hdfs): Add
`RSS_SECURITY_HADOOP_KERBEROS_PROXY_USER_ENABLE` conf for storing shuffle data
(#1194)
10e8e3d2e is described below
commit 10e8e3d2eb553146d9729e5dac2bde20f71c3358
Author: xumanbu <[email protected]>
AuthorDate: Wed Sep 13 19:49:38 2023 +0800
[#1192] improvement(hdfs): Add
`RSS_SECURITY_HADOOP_KERBEROS_PROXY_USER_ENABLE` conf for storing shuffle data
(#1194)
### What changes were proposed in this pull request?
Introduce `RSS_SECURITY_HADOOP_KERBEROS_PROXY_USER_ENABLE` with true by
default,
if set false, shuffle data storing in security hdfs use UGI user.
### Why are the changes needed?
Fix: #1192
### Does this PR introduce _any_ user-facing change?
Introduce `RSS_SECURITY_HADOOP_KERBEROS_PROXY_USER_ENABLE`
### How was this patch tested?
1. add `HadoopSecurityContextTest.testSecuredDisableProxyUser()`
---------
Co-authored-by: jam.xu <[email protected]>
---
README.md | 1 +
.../apache/uniffle/common/config/RssBaseConf.java | 7 +++++++
.../common/security/HadoopSecurityContext.java | 15 +++++++++++++--
.../uniffle/common/security/SecurityConfig.java | 10 ++++++++++
.../common/security/SecurityContextFactory.java | 3 ++-
.../uniffle/common/KerberizedHadoopBase.java | 1 +
.../common/security/HadoopSecurityContextTest.java | 22 ++++++++++++++++++++++
.../org/apache/uniffle/server/ShuffleServer.java | 3 +++
8 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 72d6776e6..6686dde82 100644
--- a/README.md
+++ b/README.md
@@ -324,6 +324,7 @@ The following security configurations are introduced.
|rss.security.hadoop.kerberos.keytab.file|-|The kerberos keytab file path. And
only when rss.security.hadoop.kerberos.enable is enabled, the option will be
valid|
|rss.security.hadoop.kerberos.principal|-|The kerberos keytab principal. And
only when rss.security.hadoop.kerberos.enable is enabled, the option will be
valid|
|rss.security.hadoop.kerberos.relogin.interval.sec|60|The kerberos
authentication relogin interval. unit: sec|
+|rss.security.hadoop.kerberos.proxy.user.enable|true|Whether using proxy user
for job user to access secured Hadoop cluster.|
* The proxy user mechanism is used to keep the data isolation in uniffle,
which means the shuffle-data written by
shuffle-servers is owned by spark app's user. To achieve the this, the login
user specified by above config should
diff --git
a/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
b/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
index 28d5bf135..c30e744c6 100644
--- a/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
+++ b/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
@@ -175,6 +175,13 @@ public class RssBaseConf extends RssConf {
.defaultValue(false)
.withDescription("Whether enable visiting secured hadoop cluster.");
+ public static final ConfigOption<Boolean>
RSS_SECURITY_HADOOP_KERBEROS_PROXY_USER_ENABLE =
+ ConfigOptions.key("rss.security.hadoop.kerberos.proxy.user.enable")
+ .booleanType()
+ .defaultValue(true)
+ .withDescription(
+ "Whether using proxy user for job user to access secured Hadoop
cluster.");
+
public static final ConfigOption<String> RSS_SECURITY_HADOOP_KRB5_CONF_FILE =
ConfigOptions.key("rss.security.hadoop.kerberos.krb5-conf.file")
.stringType()
diff --git
a/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
b/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
index 683657996..575543ad5 100644
---
a/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
+++
b/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
@@ -40,6 +40,7 @@ public class HadoopSecurityContext implements SecurityContext
{
private UserGroupInformation loginUgi;
private ScheduledExecutorService refreshScheduledExecutor;
+ private boolean enableProxyUser;
// The purpose of the proxy user ugi cache is to prevent the creation of
// multiple cache keys for the same user, scheme, and authority in the
Hadoop filesystem.
@@ -51,6 +52,16 @@ public class HadoopSecurityContext implements
SecurityContext {
public HadoopSecurityContext(
String krb5ConfPath, String keytabFile, String principal, long
refreshIntervalSec)
throws Exception {
+ this(krb5ConfPath, keytabFile, principal, refreshIntervalSec, true);
+ }
+
+ public HadoopSecurityContext(
+ String krb5ConfPath,
+ String keytabFile,
+ String principal,
+ long refreshIntervalSec,
+ boolean enableProxyUser)
+ throws Exception {
if (StringUtils.isEmpty(keytabFile)) {
throw new IllegalArgumentException("KeytabFilePath must be not null or
empty");
}
@@ -60,7 +71,7 @@ public class HadoopSecurityContext implements SecurityContext
{
if (refreshIntervalSec <= 0) {
throw new IllegalArgumentException("refreshIntervalSec must be not
negative");
}
-
+ this.enableProxyUser = enableProxyUser;
if (StringUtils.isNotEmpty(krb5ConfPath)) {
System.setProperty(KRB5_CONF_KEY, krb5ConfPath);
}
@@ -100,7 +111,7 @@ public class HadoopSecurityContext implements
SecurityContext {
}
// Run with the proxy user.
- if (!user.equals(loginUgi.getShortUserName())) {
+ if (enableProxyUser && !user.equals(loginUgi.getShortUserName())) {
UserGroupInformation proxyUserUgi =
proxyUserUgiPool.computeIfAbsent(
user, x -> UserGroupInformation.createProxyUser(x, loginUgi));
diff --git
a/common/src/main/java/org/apache/uniffle/common/security/SecurityConfig.java
b/common/src/main/java/org/apache/uniffle/common/security/SecurityConfig.java
index 5b1048cac..cf6caa435 100644
---
a/common/src/main/java/org/apache/uniffle/common/security/SecurityConfig.java
+++
b/common/src/main/java/org/apache/uniffle/common/security/SecurityConfig.java
@@ -22,6 +22,7 @@ public class SecurityConfig {
private String keytabFilePath;
private String principal;
private long reloginIntervalSec;
+ private boolean enableProxyUser;
private SecurityConfig() {
// ignore.
@@ -43,6 +44,10 @@ public class SecurityConfig {
return reloginIntervalSec;
}
+ public boolean isEnableProxyUser() {
+ return enableProxyUser;
+ }
+
public static class Builder {
private SecurityConfig info;
@@ -70,6 +75,11 @@ public class SecurityConfig {
return this;
}
+ public SecurityConfig.Builder enableProxyUser(boolean enableProxyUser) {
+ info.enableProxyUser = enableProxyUser;
+ return this;
+ }
+
public SecurityConfig build() {
return info;
}
diff --git
a/common/src/main/java/org/apache/uniffle/common/security/SecurityContextFactory.java
b/common/src/main/java/org/apache/uniffle/common/security/SecurityContextFactory.java
index 7aef3ce88..a036d59e0 100644
---
a/common/src/main/java/org/apache/uniffle/common/security/SecurityContextFactory.java
+++
b/common/src/main/java/org/apache/uniffle/common/security/SecurityContextFactory.java
@@ -46,7 +46,8 @@ public class SecurityContextFactory {
securityConfig.getKrb5ConfPath(),
securityConfig.getKeytabFilePath(),
securityConfig.getPrincipal(),
- securityConfig.getReloginIntervalSec());
+ securityConfig.getReloginIntervalSec(),
+ securityConfig.isEnableProxyUser());
LOGGER.info("Initialized security context: {}",
securityContext.getClass().getSimpleName());
}
diff --git
a/common/src/test/java/org/apache/uniffle/common/KerberizedHadoopBase.java
b/common/src/test/java/org/apache/uniffle/common/KerberizedHadoopBase.java
index eab00972a..db7157ff0 100644
--- a/common/src/test/java/org/apache/uniffle/common/KerberizedHadoopBase.java
+++ b/common/src/test/java/org/apache/uniffle/common/KerberizedHadoopBase.java
@@ -49,6 +49,7 @@ public class KerberizedHadoopBase {
.keytabFilePath(kerberizedHadoop.getHdfsKeytab())
.principal(kerberizedHadoop.getHdfsPrincipal())
.reloginIntervalSec(1000)
+ .enableProxyUser(true)
.build();
SecurityContextFactory.get().init(securityConfig);
diff --git
a/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
b/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
index ff5bc63f7..a0aae828a 100644
---
a/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
+++
b/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
@@ -103,6 +103,28 @@ public class HadoopSecurityContextTest extends
KerberizedHadoopBase {
}
}
+ @Test
+ public void testSecuredDisableProxyUser() throws Exception {
+ try (HadoopSecurityContext context =
+ new HadoopSecurityContext(
+ null,
+ kerberizedHadoop.getHdfsKeytab(),
+ kerberizedHadoop.getHdfsPrincipal(),
+ 1000,
+ false)) {
+ Path pathWithHdfsUser = new Path("/alex/HadoopSecurityDisableProxyUser");
+ context.runSecured(
+ "alex",
+ (Callable<Void>)
+ () -> {
+ kerberizedHadoop.getFileSystem().mkdirs(pathWithHdfsUser);
+ return null;
+ });
+ FileStatus fileStatus =
kerberizedHadoop.getFileSystem().getFileStatus(pathWithHdfsUser);
+ assertEquals("hdfs", fileStatus.getOwner());
+ }
+ }
+
@Test
public void testCreateIllegalContext() throws Exception {
System.setProperty("sun.security.krb5.debug", "true");
diff --git a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
index 481458846..4049ce639 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
@@ -61,6 +61,7 @@ import org.apache.uniffle.storage.util.StorageType;
import static
org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_ENABLE;
import static
org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE;
import static
org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL;
+import static
org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_PROXY_USER_ENABLE;
import static
org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC;
import static
org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KRB5_CONF_FILE;
import static org.apache.uniffle.common.config.RssBaseConf.RSS_STORAGE_TYPE;
@@ -237,6 +238,8 @@ public class ShuffleServer {
.principal(shuffleServerConf.getString(RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL))
.reloginIntervalSec(
shuffleServerConf.getLong(RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC))
+ .enableProxyUser(
+
shuffleServerConf.getBoolean(RSS_SECURITY_HADOOP_KERBEROS_PROXY_USER_ENABLE))
.build();
}
SecurityContextFactory.get().init(securityConfig);