HADOOP-13242. Authenticate to Azure Data Lake using client ID and keys. Contributed by Atul Sikaria.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/51d16e7b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/51d16e7b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/51d16e7b Branch: refs/heads/HDFS-1312 Commit: 51d16e7b38d247f73b0ec2ffd8b2b02069c05a33 Parents: 51d497f Author: Chris Nauroth <cnaur...@apache.org> Authored: Thu Jun 16 23:35:20 2016 -0700 Committer: Chris Nauroth <cnaur...@apache.org> Committed: Thu Jun 16 23:35:20 2016 -0700 ---------------------------------------------------------------------- hadoop-tools/hadoop-azure-datalake/pom.xml | 7 +- ...ClientCredentialBasedAccesTokenProvider.java | 155 +++++++++++++++++++ .../src/site/markdown/index.md | 64 ++++++++ 3 files changed, 225 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/51d16e7b/hadoop-tools/hadoop-azure-datalake/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml index a4b1fe1..d2161c7 100644 --- a/hadoop-tools/hadoop-azure-datalake/pom.xml +++ b/hadoop-tools/hadoop-azure-datalake/pom.xml @@ -147,7 +147,12 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> - </dependency> + </dependency> + <dependency> + <groupId>com.squareup.okhttp</groupId> + <artifactId>okhttp</artifactId> + <version>2.4.0</version> + </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> http://git-wip-us.apache.org/repos/asf/hadoop/blob/51d16e7b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java new file mode 100644 index 0000000..6dfc593 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hdfs.web.oauth2; + +import com.squareup.okhttp.OkHttpClient; +import com.squareup.okhttp.Request; +import com.squareup.okhttp.RequestBody; +import com.squareup.okhttp.Response; +import com.squareup.okhttp.MediaType; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.web.URLConnectionFactory; +import org.apache.hadoop.util.Timer; +import org.apache.http.HttpStatus; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.ObjectReader; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.hdfs.web.oauth2.Utils.notNull; + + +/** + * Obtain an access token via the credential-based OAuth2 workflow. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class AzureADClientCredentialBasedAccesTokenProvider + extends AccessTokenProvider { + private static final ObjectReader READER = + new ObjectMapper().reader(Map.class); + + public static final String OAUTH_CREDENTIAL_KEY + = "dfs.webhdfs.oauth2.credential"; + + public static final String AAD_RESOURCE_KEY + = "fs.adls.oauth2.resource"; + + public static final String RESOURCE_PARAM_NAME + = "resource"; + + private static final String OAUTH_CLIENT_ID_KEY + = "dfs.webhdfs.oauth2.client.id"; + + private static final String OAUTH_REFRESH_URL_KEY + = "dfs.webhdfs.oauth2.refresh.url"; + + + public static final String ACCESS_TOKEN = "access_token"; + public static final String CLIENT_CREDENTIALS = "client_credentials"; + public static final String CLIENT_ID = "client_id"; + public static final String CLIENT_SECRET = "client_secret"; + public static final String EXPIRES_IN = "expires_in"; + public static final String GRANT_TYPE = "grant_type"; + public static final MediaType URLENCODED + = MediaType.parse("application/x-www-form-urlencoded; charset=utf-8"); + + + private AccessTokenTimer timer; + + private String clientId; + + private String refreshURL; + + private String accessToken; + + private String resource; + + private String credential; + + private boolean initialCredentialObtained = false; + + AzureADClientCredentialBasedAccesTokenProvider() { + this.timer = new AccessTokenTimer(); + } + + AzureADClientCredentialBasedAccesTokenProvider(Timer timer) { + this.timer = new AccessTokenTimer(timer); + } + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + clientId = notNull(conf, OAUTH_CLIENT_ID_KEY); + refreshURL = notNull(conf, OAUTH_REFRESH_URL_KEY); + resource = notNull(conf, AAD_RESOURCE_KEY); + credential = notNull(conf, OAUTH_CREDENTIAL_KEY); + } + + @Override + public String getAccessToken() throws IOException { + if(timer.shouldRefresh() || !initialCredentialObtained) { + refresh(); + initialCredentialObtained = true; + } + return accessToken; + } + + void refresh() throws IOException { + try { + OkHttpClient client = new OkHttpClient(); + client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, + TimeUnit.MILLISECONDS); + client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, + TimeUnit.MILLISECONDS); + + String bodyString = Utils.postBody(CLIENT_SECRET, credential, + GRANT_TYPE, CLIENT_CREDENTIALS, + RESOURCE_PARAM_NAME, resource, + CLIENT_ID, clientId); + + RequestBody body = RequestBody.create(URLENCODED, bodyString); + + Request request = new Request.Builder() + .url(refreshURL) + .post(body) + .build(); + Response responseBody = client.newCall(request).execute(); + + if (responseBody.code() != HttpStatus.SC_OK) { + throw new IllegalArgumentException("Received invalid http response: " + + responseBody.code() + ", text = " + responseBody.toString()); + } + + Map<?, ?> response = READER.readValue(responseBody.body().string()); + + String newExpiresIn = response.get(EXPIRES_IN).toString(); + timer.setExpiresIn(newExpiresIn); + + accessToken = response.get(ACCESS_TOKEN).toString(); + + } catch (Exception e) { + throw new IOException("Unable to obtain access token from credential", e); + } + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/51d16e7b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md index 4158c88..3f03d41 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md @@ -23,6 +23,9 @@ * [OAuth2 Support](#OAuth2_Support) * [Read Ahead Buffer Management](Read_Ahead_Buffer_Management) * [Configuring Credentials & FileSystem](#Configuring_Credentials) + * [Using Refresh Token](#Refresh_Token) + * [Using Client Keys](#Client_Credential_Token) + * [Enabling ADL Filesystem](#Enabling_ADL) * [Accessing adl URLs](#Accessing_adl_URLs) * [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module) @@ -131,6 +134,9 @@ To configure number of concurrent connection to Azure Data Lake Storage Account. </property> ## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem +Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal). + +### <a name="Refresh_Token" />Using Refresh Token Update core-site.xml for OAuth2 configuration @@ -173,6 +179,64 @@ Application require to set Client id and OAuth2 refresh token from Azure Active <value></value> </property> + +### <a name="Client_Credential_Token" />Using Client Keys + +#### Generating the Service Principal +1. Go to the portal (https://portal.azure.com) +2. Under "Browse", look for Active Directory and click on it. +3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user. +4. Go through the wizard +5. Once app is created, Go to app configuration, and find the section on "keys" +6. Select a key duration and hit save. Save the generated keys. +7. Note down the properties you will need to auth: + - The client ID + - The key you just generated above + - The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value) + - Resource: Always https://management.core.windows.net/ , for all customers + +#### Adding the service principal to your ADL Account +1. Go to the portal again, and open your ADL account +2. Select Users under Settings +3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name) +4. Add "Owner" role + +#### Configure core-site.xml +Add the following properties to your core-site.xml + + <property> + <name>dfs.webhdfs.oauth2.access.token.provider</name> + <value>org.apache.hadoop.hdfs.web.oauth2.AzureADClientCredentialBasedAccesTokenProvider</value> + </property> + + <property> + <name>dfs.webhdfs.oauth2.refresh.url</name> + <value>TOKEN ENDPOINT FROM STEP 7 ABOVE</value> + </property> + + <property> + <name>dfs.webhdfs.oauth2.client.id</name> + <value>CLIENT ID FROM STEP 7 ABOVE</value> + </property> + + <property> + <name>dfs.webhdfs.oauth2.credential</name> + <value>PASSWORD FROM STEP 7 ABOVE</value> + </property> + + <property> + <name>fs.adls.oauth2.resource</name> + <value>https://management.core.windows.net/</value> + </property> + + <property> + <name>fs.defaultFS</name> + <value>YOUR ADL STORE URL (e.g., https://example.azuredatalakestore.net) </value> + </property> + + +## <a name="Enabling_ADL" />Enabling ADL Filesystem + For ADL FileSystem to take effect. Update core-site.xml with <property> --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org