[
https://issues.apache.org/jira/browse/DRILL-6662?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16584765#comment-16584765
]
ASF GitHub Bot commented on DRILL-6662:
---------------------------------------
asfgit closed pull request #1419: DRILL-6662: Access AWS access key ID and
secret access key using Cred…
URL: https://github.com/apache/drill/pull/1419
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/distribution/src/resources/core-site-example.xml
b/distribution/src/resources/core-site-example.xml
index 854e54dbe67..c7225a14784 100644
--- a/distribution/src/resources/core-site-example.xml
+++ b/distribution/src/resources/core-site-example.xml
@@ -30,4 +30,18 @@
<value>ENTER_YOUR_SECRETKEY</value>
</property>
+ <!--Use this property to specify one or more credential provider URIs
instead of
+ configuring above credentials in plain text-->
+ <!--<property>
+ <name>hadoop.security.credential.provider.path</name>
+ <value>ENTER_YOUR_PROVIDER_PATH</value>
+ </property>-->
+
+ <!--Set this property to true to avoid caching of S3 file system
configuration properties,
+ so when you add/update a property (e.g. fs.s3a.secret.key) in S3
storage plugin its new value will be taken-->
+ <!--<property>
+ <name>fs.s3a.impl.disable.cache</name>
+ <value>true</value>
+ </property>-->
+
</configuration>
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSystemPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSystemPlugin.java
index b1f41a41451..cb66913ad13 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSystemPlugin.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSystemPlugin.java
@@ -20,6 +20,7 @@
import static
org.apache.drill.exec.store.dfs.FileSystemSchemaFactory.DEFAULT_WS_NAME;
import java.io.IOException;
+import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -77,6 +78,10 @@ public FileSystemPlugin(FileSystemConfig config,
DrillbitContext context, String
fsConf.set("fs.classpath.impl", ClassPathFileSystem.class.getName());
fsConf.set("fs.drill-local.impl",
LocalSyncableFileSystem.class.getName());
+ if (isS3Connection(fsConf)) {
+ handleS3Credentials(fsConf);
+ }
+
formatCreator = newFormatCreator(config, context, fsConf);
List<FormatMatcher> matchers = new ArrayList<>();
formatPluginsByConfig = new HashMap<>();
@@ -104,6 +109,33 @@ public FileSystemPlugin(FileSystemConfig config,
DrillbitContext context, String
}
}
+ private boolean isS3Connection(Configuration conf) {
+ URI uri = FileSystem.getDefaultUri(conf);
+ return uri.getScheme().equals("s3a");
+ }
+
+ /**
+ * Retrieve secret and access keys from configured (with
+ * {@link
org.apache.hadoop.security.alias.CredentialProviderFactory#CREDENTIAL_PROVIDER_PATH}
property)
+ * credential providers and set it into {@code conf}. If provider path is
not configured or credential
+ * is absent in providers, it will conditionally fallback to configuration
setting. The fallback will occur unless
+ * {@link
org.apache.hadoop.security.alias.CredentialProvider#CLEAR_TEXT_FALLBACK} is set
to {@code false}.
+ *
+ * @param conf {@code Configuration} which will be updated with credentials
from provider
+ * @throws IOException thrown if a credential cannot be retrieved from
provider
+ */
+ private void handleS3Credentials(Configuration conf) throws IOException {
+ String[] credentialKeys = {"fs.s3a.secret.key", "fs.s3a.access.key"};
+ for (String key : credentialKeys) {
+ char[] credentialChars = conf.getPassword(key);
+ if (credentialChars == null) {
+ logger.warn(String.format("Property '%s' is absent.", key));
+ } else {
+ conf.set(key, String.valueOf(credentialChars));
+ }
+ }
+ }
+
/**
* Creates a new FormatCreator instance.
*
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Access AWS access key ID and secret access key using Credential Provider API
> for S3 storage plugin
> --------------------------------------------------------------------------------------------------
>
> Key: DRILL-6662
> URL: https://issues.apache.org/jira/browse/DRILL-6662
> Project: Apache Drill
> Issue Type: Improvement
> Reporter: Bohdan Kazydub
> Assignee: Bohdan Kazydub
> Priority: Major
> Labels: doc-impacting, ready-to-commit
> Fix For: 1.15.0
>
>
> Hadoop provides [CredentialProvider
> API|[https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html]]
> which allows passwords and other sensitive secrets to be stored in an
> external provider rather than in configuration files in plaintext.
> Currently S3 storage plugin is accessing passwords, namely
> 'fs.s3a.access.key' and 'fs.s3a.secret.key', stored in clear text in
> Configuration with get() method. To give users an ability to remove clear
> text passwords for S3 from configuration files Configuration.getPassword()
> method should be used, given they configure
> 'hadoop.security.credential.provider.path' property which points to a file
> containing encrypted passwords instead of configuring two aforementioned
> properties.
> By using this approach, credential providers will be checked first and if the
> secret is not provided or providers are not configured there will be a
> fallback to secrets configured in clear text (unless
> 'hadoop.security.credential.clear-text-fallback' is configured to be
> "false"), thus making new change backwards-compatible.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)