Repository: incubator-impala Updated Branches: refs/heads/branch-2.9.0 8049f8113 -> b85585069
IMPALA-5333: Add support for Impala to work with ADLS This patch leverages the AdlFileSystem in Hadoop to allow Impala to talk to the Azure Data Lake Store. This patch has functional changes as well as adds test infrastructure for testing Impala over ADLS. We do not support ACLs on ADLS since the Hadoop ADLS connector does not integrate ADLS ACLs with Hadoop users/groups. For testing, we use the azure-data-lake-store-python client from Microsoft. This client seems to have some consistency issues. For example, a drop table through Impala will delete the files in ADLS, however, listing that directory through the python client immediately after the drop, will still show the files. This behavior is unexpected since ADLS claims to be strongly consistent. Some tests have been skipped due to this limitation with the tag SkipIfADLS.slow_client. Tracked by IMPALA-5335. The azure-data-lake-store-python client also only works on CentOS 6.6 and over, so the python dependencies for Azure will not be downloaded when the TARGET_FILESYSTEM is not "adls". While running ADLS tests, the expectation will be that it runs on a machine that is at least running CentOS 6.6. Note: This is only a test limitation, not a functional one. Clusters with older OSes like CentOS 6.4 will still work with ADLS. Added another dependency to bootstrap_build.sh for the ADLS Python client. Testing: Ran core tests with and without TARGET_FILESYSTEM as 'adls' to make sure that all tests pass and that nothing breaks. Change-Id: Ic56b9988b32a330443f24c44f9cb2c80842f7542 Reviewed-on: http://gerrit.cloudera.org:8080/6910 Tested-by: Impala Public Jenkins Reviewed-by: Sailesh Mukil <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4086f2c8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4086f2c8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4086f2c8 Branch: refs/heads/branch-2.9.0 Commit: 4086f2c84de754d0a4a0ea87c0ee49b7e6eb469f Parents: 8049f81 Author: Sailesh Mukil <[email protected]> Authored: Mon Apr 10 17:08:01 2017 -0700 Committer: Taras Bobrovytsky <[email protected]> Committed: Thu Jun 1 17:49:52 2017 -0700 ---------------------------------------------------------------------- bin/bootstrap_build.sh | 2 +- bin/impala-config.sh | 20 ++++++ fe/pom.xml | 6 ++ .../apache/impala/analysis/LoadDataStmt.java | 17 +++-- .../org/apache/impala/catalog/HdfsTable.java | 8 ++- .../apache/impala/common/FileSystemUtil.java | 22 +++++- .../org/apache/impala/service/JniFrontend.java | 5 +- .../impala/analysis/AnalyzeStmtsTest.java | 4 +- infra/python/bootstrap_virtualenv.py | 17 +++++ infra/python/deps/adls-requirements.txt | 21 ++++++ infra/python/deps/compiled-requirements.txt | 6 +- infra/python/deps/pip_download.py | 2 +- .../common/etc/hadoop/conf/core-site.xml.tmpl | 20 ++++++ tests/common/impala_test_suite.py | 27 +++++-- tests/common/skip.py | 21 ++++++ tests/custom_cluster/test_hdfs_fd_caching.py | 3 +- tests/custom_cluster/test_insert_behaviour.py | 3 +- .../test_parquet_max_page_header.py | 3 +- tests/custom_cluster/test_permanent_udfs.py | 4 +- tests/data_errors/test_data_errors.py | 5 +- tests/failure/test_failpoints.py | 3 +- tests/metadata/test_compute_stats.py | 3 +- tests/metadata/test_ddl.py | 18 +++-- tests/metadata/test_hdfs_encryption.py | 3 +- tests/metadata/test_hdfs_permissions.py | 3 +- tests/metadata/test_hms_integration.py | 4 +- .../metadata/test_metadata_query_statements.py | 4 +- tests/metadata/test_partition_metadata.py | 3 +- tests/metadata/test_refresh_partition.py | 3 +- tests/metadata/test_views_compatibility.py | 3 +- tests/query_test/test_compressed_formats.py | 4 +- tests/query_test/test_hdfs_caching.py | 6 +- tests/query_test/test_hdfs_fd_caching.py | 3 +- tests/query_test/test_insert_behaviour.py | 9 ++- tests/query_test/test_insert_parquet.py | 7 +- tests/query_test/test_join_queries.py | 4 +- tests/query_test/test_nested_types.py | 9 ++- tests/query_test/test_observability.py | 3 +- tests/query_test/test_partitioning.py | 3 +- tests/query_test/test_scanners.py | 11 ++- tests/stress/test_ddl_stress.py | 3 +- tests/util/adls_util.py | 76 ++++++++++++++++++++ tests/util/filesystem_utils.py | 7 ++ 43 files changed, 359 insertions(+), 49 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/bin/bootstrap_build.sh ---------------------------------------------------------------------- diff --git a/bin/bootstrap_build.sh b/bin/bootstrap_build.sh index c3bd22b..fa3dbfd 100755 --- a/bin/bootstrap_build.sh +++ b/bin/bootstrap_build.sh @@ -32,7 +32,7 @@ set -euxo pipefail # Install dependencies: sudo apt-get update sudo apt-get --yes install g++ gcc git libsasl2-dev libssl-dev make maven openjdk-7-jdk \ - python-dev python-setuptools + python-dev python-setuptools libffi-dev export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/ ./buildall.sh -notests -so http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/bin/impala-config.sh ---------------------------------------------------------------------- diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 86a115e..11c34f7 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -240,6 +240,10 @@ export FILESYSTEM_PREFIX="${FILESYSTEM_PREFIX-}" export AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY-DummySecretAccessKey}" export AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID-DummyAccessKeyId}" export S3_BUCKET="${S3_BUCKET-}" +export azure_tenant_id="${azure_tenant_id-DummyAdlsTenantId}" +export azure_client_id="${azure_client_id-DummyAdlsClientId}" +export azure_client_secret="${azure_client_secret-DummyAdlsClientSecret}" +export azure_data_lake_store_name="${azure_data_lake_store_name-}" export HDFS_REPLICATION="${HDFS_REPLICATION-3}" export ISILON_NAMENODE="${ISILON_NAMENODE-}" export DEFAULT_FS="${DEFAULT_FS-hdfs://localhost:20500}" @@ -268,6 +272,22 @@ if [ "${TARGET_FILESYSTEM}" = "s3" ]; then fi DEFAULT_FS="s3a://${S3_BUCKET}" export DEFAULT_FS +elif [ "${TARGET_FILESYSTEM}" = "adls" ]; then + # Basic error checking + if [[ "${azure_client_id}" = "DummyAdlsClientId" ||\ + "${azure_tenant_id}" = "DummyAdlsTenantId" ||\ + "${azure_client_secret}" = "DummyAdlsClientSecret" ]]; then + echo "All 3 of the following need to be assigned valid values and belong + to the owner of the ADLS store in order to access the filesystem: + azure_client_id, azure_tenant_id, azure_client_secret." + return 1 + fi + if [[ "${azure_data_lake_store_name}" = "" ]]; then + echo "azure_data_lake_store_name cannot be an empty string for ADLS" + return 1 + fi + DEFAULT_FS="adl://${azure_data_lake_store_name}.azuredatalakestore.net" + export DEFAULT_FS elif [ "${TARGET_FILESYSTEM}" = "isilon" ]; then if [ "${ISILON_NAMENODE}" = "" ]; then echo "In order to access the Isilon filesystem, ISILON_NAMENODE" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/fe/pom.xml ---------------------------------------------------------------------- diff --git a/fe/pom.xml b/fe/pom.xml index c6aeaa3..238f118 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -86,6 +86,12 @@ under the License. <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-azure-datalake</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>${hadoop.version}</version> </dependency> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java b/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java index 3357df9..ec0244d 100644 --- a/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.adl.AdlFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.impala.authorization.Privilege; import org.apache.impala.catalog.HdfsFileFormat; @@ -126,6 +127,9 @@ public class LoadDataStmt extends StatementBase { * paths for this LOAD statement (which maps onto a sequence of file move operations, * with the requisite permission requirements), and check to see if all files to be * moved are in format that Impala understands. Errors are raised as AnalysisExceptions. + * + * We don't check permissions for the S3AFileSystem and the AdlFileSystem due to + * limitations with thier getAclStatus() API. (see HADOOP-13892 and HADOOP-14437) */ private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException { @@ -137,9 +141,10 @@ public class LoadDataStmt extends StatementBase { try { Path source = sourceDataPath_.getPath(); FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration()); - if (!(fs instanceof DistributedFileSystem) && !(fs instanceof S3AFileSystem)) { + if (!(fs instanceof DistributedFileSystem) && !(fs instanceof S3AFileSystem) && + !(fs instanceof AdlFileSystem)) { throw new AnalysisException(String.format("INPATH location '%s' " + - "must point to an HDFS or S3A filesystem.", sourceDataPath_)); + "must point to an HDFS, S3A or ADL filesystem.", sourceDataPath_)); } if (!fs.exists(source)) { throw new AnalysisException(String.format( @@ -150,6 +155,8 @@ public class LoadDataStmt extends StatementBase { // it. If the source file is a file, we must be able to read from it, and write to // its parent directory (in order to delete the file as part of the move operation). FsPermissionChecker checker = FsPermissionChecker.getInstance(); + // TODO: Disable permission checking for S3A as well (HADOOP-13892) + boolean shouldCheckPerms = !(fs instanceof AdlFileSystem); if (fs.isDirectory(source)) { if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) { @@ -162,7 +169,7 @@ public class LoadDataStmt extends StatementBase { sourceDataPath_)); } if (!checker.getPermissions(fs, source).checkPermissions( - FsAction.READ_WRITE)) { + FsAction.READ_WRITE) && shouldCheckPerms) { throw new AnalysisException(String.format("Unable to LOAD DATA from %s " + "because Impala does not have READ and WRITE permissions on this directory", source)); @@ -175,14 +182,14 @@ public class LoadDataStmt extends StatementBase { } if (!checker.getPermissions(fs, source.getParent()).checkPermissions( - FsAction.WRITE)) { + FsAction.WRITE) && shouldCheckPerms) { throw new AnalysisException(String.format("Unable to LOAD DATA from %s " + "because Impala does not have WRITE permissions on its parent " + "directory %s", source, source.getParent())); } if (!checker.getPermissions(fs, source).checkPermissions( - FsAction.READ)) { + FsAction.READ) && shouldCheckPerms) { throw new AnalysisException(String.format("Unable to LOAD DATA from %s " + "because Impala does not have READ permissions on this file", source)); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 55e4197..23a8c96 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -768,7 +768,7 @@ public class HdfsTable extends Table { * permissions Impala has on the given path. If the path does not exist, recurses up * the path until a existing parent directory is found, and inherit access permissions * from that. - * Always returns READ_WRITE for S3 files. + * Always returns READ_WRITE for S3 and ADLS files. */ private TAccessLevel getAvailableAccessLevel(FileSystem fs, Path location) throws IOException { @@ -781,6 +781,12 @@ public class HdfsTable extends Table { // permissions. (see HADOOP-13892) if (FileSystemUtil.isS3AFileSystem(fs)) return TAccessLevel.READ_WRITE; + // The ADLS connector currently returns ACLs for files in ADLS, but can only map + // them to the ADLS client SPI and not the Hadoop users/groups, causing unexpected + // behavior. So ADLS ACLs are unsupported until the connector is able to map + // permissions to hadoop users/groups (HADOOP-14437). + if (FileSystemUtil.isADLFileSystem(fs)) return TAccessLevel.READ_WRITE; + FsPermissionChecker permissionChecker = FsPermissionChecker.getInstance(); while (location != null) { if (fs.exists(location)) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java index f8c50b4..9ae4269 100644 --- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java +++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.adl.AdlFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.client.HdfsAdmin; import org.apache.hadoop.hdfs.protocol.EncryptionZone; @@ -301,7 +302,8 @@ public class FileSystemUtil { // Common case. if (isDistributedFileSystem(fs)) return true; // Blacklist FileSystems that are known to not to include storage UUIDs. - return !(fs instanceof S3AFileSystem || fs instanceof LocalFileSystem); + return !(fs instanceof S3AFileSystem || fs instanceof LocalFileSystem || + fs instanceof AdlFileSystem); } /** @@ -319,6 +321,20 @@ public class FileSystemUtil { } /** + * Returns true iff the filesystem is AdlFileSystem. + */ + public static boolean isADLFileSystem(FileSystem fs) { + return fs instanceof AdlFileSystem; + } + + /** + * Returns true iff the path is on AdlFileSystem. + */ + public static boolean isADLFileSystem(Path path) throws IOException { + return isADLFileSystem(path.getFileSystem(CONF)); + } + + /** * Returns true iff the filesystem is an instance of LocalFileSystem. */ public static boolean isLocalFileSystem(FileSystem fs) { @@ -463,6 +479,8 @@ public class FileSystemUtil { throws IOException { Path path = new Path(location); return (FileSystemUtil.isDistributedFileSystem(path) || - FileSystemUtil.isLocalFileSystem(path) || FileSystemUtil.isS3AFileSystem(path)); + FileSystemUtil.isLocalFileSystem(path) || + FileSystemUtil.isS3AFileSystem(path) || + FileSystemUtil.isADLFileSystem(path)); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/fe/src/main/java/org/apache/impala/service/JniFrontend.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java index 2b6748d..cfd83a5 100644 --- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java +++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.adl.AdlFileSystem; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.impala.analysis.DescriptorTable; @@ -716,7 +717,9 @@ public class JniFrontend { private String checkFileSystem(Configuration conf) { try { FileSystem fs = FileSystem.get(CONF); - if (!(fs instanceof DistributedFileSystem || fs instanceof S3AFileSystem)) { + if (!(fs instanceof DistributedFileSystem || + fs instanceof S3AFileSystem || + fs instanceof AdlFileSystem)) { return "Currently configured default filesystem: " + fs.getClass().getSimpleName() + ". " + CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY + http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java index 448c62f..06ad842 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java @@ -2990,11 +2990,11 @@ public class AnalyzeStmtsTest extends AnalyzerTest { AnalysisError(String.format("load data inpath '%s' %s into table " + "tpch.lineitem", "file:///test-warehouse/test.out", overwrite), "INPATH location 'file:/test-warehouse/test.out' must point to an " + - "HDFS or S3A filesystem"); + "HDFS, S3A or ADL filesystem."); AnalysisError(String.format("load data inpath '%s' %s into table " + "tpch.lineitem", "s3n://bucket/test-warehouse/test.out", overwrite), "INPATH location 's3n://bucket/test-warehouse/test.out' must point to an " + - "HDFS or S3A filesystem"); + "HDFS, S3A or ADL filesystem."); // File type / table type mismatch. AnalyzesOk(String.format("load data inpath '%s' %s into table " + http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/infra/python/bootstrap_virtualenv.py ---------------------------------------------------------------------- diff --git a/infra/python/bootstrap_virtualenv.py b/infra/python/bootstrap_virtualenv.py index 0d6b3c8..4fb8689 100644 --- a/infra/python/bootstrap_virtualenv.py +++ b/infra/python/bootstrap_virtualenv.py @@ -60,6 +60,10 @@ COMPILED_REQS_PATH = os.path.join(DEPS_DIR, "compiled-requirements.txt") # by the compiled requirements step. KUDU_REQS_PATH = os.path.join(DEPS_DIR, "kudu-requirements.txt") +# Requirements for the ADLS test client step, which depends on Cffi (C Foreign Function +# Interface) being installed by the compiled requirements step. +ADLS_REQS_PATH = os.path.join(DEPS_DIR, "adls-requirements.txt") + def delete_virtualenv_if_exist(): if os.path.exists(ENV_DIR): shutil.rmtree(ENV_DIR) @@ -213,6 +217,18 @@ def install_compiled_deps_if_possible(): mark_reqs_installed(COMPILED_REQS_PATH) return True +def install_adls_deps(): + # The ADLS dependencies require that the OS is at least CentOS 6.6 or above, + # which is why we break this into a seperate step. If the target filesystem is + # ADLS, the expectation is that the dev environment is running at least CentOS 6.6. + if reqs_are_installed(ADLS_REQS_PATH): + LOG.debug("Skipping ADLS deps: matching adls-installed-requirements.txt found") + return True + if os.environ.get('TARGET_FILESYSTEM') == "adls": + LOG.info("Installing ADLS packages into the virtualenv") + exec_pip_install(["-r", ADLS_REQS_PATH]) + mark_reqs_installed(ADLS_REQS_PATH) + def install_kudu_client_if_possible(): '''Installs the Kudu python module if possible, which depends on the toolchain and the compiled requirements in compiled-requirements.txt. If the toolchain isn't @@ -348,3 +364,4 @@ if __name__ == "__main__": setup_virtualenv_if_not_exists() if install_compiled_deps_if_possible(): install_kudu_client_if_possible() + install_adls_deps() http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/infra/python/deps/adls-requirements.txt ---------------------------------------------------------------------- diff --git a/infra/python/deps/adls-requirements.txt b/infra/python/deps/adls-requirements.txt new file mode 100644 index 0000000..f027a6e --- /dev/null +++ b/infra/python/deps/adls-requirements.txt @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# The following dependencies depend on cffi, so it must be installed after the toolchain +# is bootstrapped and all requirements in requirements.txt and compiled-requirements.txt +# are installed into the virtualenv. +azure-datalake-store == 0.0.9 http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/infra/python/deps/compiled-requirements.txt ---------------------------------------------------------------------- diff --git a/infra/python/deps/compiled-requirements.txt b/infra/python/deps/compiled-requirements.txt index 945e3f6..b9273d1 100644 --- a/infra/python/deps/compiled-requirements.txt +++ b/infra/python/deps/compiled-requirements.txt @@ -32,7 +32,11 @@ impyla == 0.14.0 thrift == 0.9.0 thrift_sasl == 0.1.0 psutil == 0.7.1 - +# Required for ADLS Python client + pycparser == 2.17 + cffi==1.10.0 + cryptography==1.8.1 + scandir == 1.5 # Required for Kudu: Cython == 0.23.4 numpy == 1.10.4 http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/infra/python/deps/pip_download.py ---------------------------------------------------------------------- diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py index bd54d30..cc9b412 100755 --- a/infra/python/deps/pip_download.py +++ b/infra/python/deps/pip_download.py @@ -37,7 +37,7 @@ PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org') # The requirement files that list all of the required packages and versions. REQUIREMENTS_FILES = ['requirements.txt', 'compiled-requirements.txt', - 'kudu-requirements.txt'] + 'kudu-requirements.txt', 'adls-requirements.txt'] def check_md5sum(filename, expected_md5): actual_md5 = md5(open(filename).read()).hexdigest() http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl ---------------------------------------------------------------------- diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl index aaea579..9ff4ee7 100644 --- a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl +++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl @@ -112,4 +112,24 @@ DEFAULT</value> <!-- END Kerberos settings --> + <property> + <name>dfs.adls.oauth2.access.token.provider.type</name> + <value>ClientCredential</value> + </property> + + <property> + <name>dfs.adls.oauth2.client.id</name> + <value>${azure_client_id}</value> + </property> + + <property> + <name>dfs.adls.oauth2.credential</name> + <value>${azure_client_secret}</value> + </property> + + <property> + <name>dfs.adls.oauth2.refresh.url</name> + <value>https://login.windows.net/${azure_tenant_id}/oauth2/token</value> + </property> + </configuration> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/common/impala_test_suite.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index 7cbacd3..3af1ed0 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -51,7 +51,13 @@ from tests.common.test_vector import ImpalaTestDimension from tests.performance.query import Query from tests.performance.query_exec_functions import execute_using_jdbc from tests.performance.query_executor import JdbcQueryExecConfig -from tests.util.filesystem_utils import IS_S3, S3_BUCKET_NAME, FILESYSTEM_PREFIX +from tests.util.filesystem_utils import ( + IS_S3, + IS_ADLS, + S3_BUCKET_NAME, + ADLS_STORE_NAME, + FILESYSTEM_PREFIX) + from tests.util.hdfs_util import ( HdfsConfig, get_hdfs_client, @@ -68,9 +74,19 @@ from tests.util.thrift_util import create_transport from hive_metastore import ThriftHiveMetastore from thrift.protocol import TBinaryProtocol +# Initializing the logger before conditional imports, since we will need it +# for them. logging.basicConfig(level=logging.INFO, format='-- %(message)s') LOG = logging.getLogger('impala_test_suite') +# The ADLS python client isn't downloaded when ADLS isn't the target FS, so do a +# conditional import. +if IS_ADLS: + try: + from tests.util.adls_util import ADLSClient + except ImportError: + LOG.error("Need the ADLSClient for testing with ADLS") + IMPALAD_HOST_PORT_LIST = pytest.config.option.impalad.split(',') assert len(IMPALAD_HOST_PORT_LIST) > 0, 'Must specify at least 1 impalad to target' IMPALAD = IMPALAD_HOST_PORT_LIST[0] @@ -126,8 +142,11 @@ class ImpalaTestSuite(BaseTestSuite): cls.impalad_test_service = cls.create_impala_service() cls.hdfs_client = cls.create_hdfs_client() - cls.s3_client = S3Client(S3_BUCKET_NAME) - cls.filesystem_client = cls.s3_client if IS_S3 else cls.hdfs_client + cls.filesystem_client = cls.hdfs_client + if IS_S3: + cls.filesystem_client = S3Client(S3_BUCKET_NAME) + elif IS_ADLS: + cls.filesystem_client = ADLSClient(ADLS_STORE_NAME) @classmethod def teardown_class(cls): @@ -645,7 +664,7 @@ class ImpalaTestSuite(BaseTestSuite): # If 'skip_hbase' is specified or the filesystem is isilon, s3 or local, we don't # need the hbase dimension. if pytest.config.option.skip_hbase or TARGET_FILESYSTEM.lower() \ - in ['s3', 'isilon', 'local']: + in ['s3', 'isilon', 'local', 'adls']: for tf_dimension in tf_dimensions: if tf_dimension.value.file_format == "hbase": tf_dimensions.remove(tf_dimension) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/common/skip.py ---------------------------------------------------------------------- diff --git a/tests/common/skip.py b/tests/common/skip.py index e330712..642dea4 100644 --- a/tests/common/skip.py +++ b/tests/common/skip.py @@ -30,6 +30,7 @@ from tests.util.filesystem_utils import ( IS_LOCAL, IS_HDFS, IS_S3, + IS_ADLS, SECONDARY_FILESYSTEM) @@ -51,6 +52,26 @@ class SkipIfS3: qualified_path = pytest.mark.skipif(IS_S3, reason="Tests rely on HDFS qualified paths, IMPALA-1872") +class SkipIfADLS: + + # These ones are skipped due to product limitations. + caching = pytest.mark.skipif(IS_ADLS, reason="SET CACHED not implemented for ADLS") + hive = pytest.mark.skipif(IS_ADLS, reason="Hive doesn't work with ADLS") + hdfs_block_size = pytest.mark.skipif(IS_ADLS, reason="ADLS uses it's own block size") + hdfs_acls = pytest.mark.skipif(IS_ADLS, reason="HDFS acls are not supported on ADLS") + jira = partial(pytest.mark.skipif, IS_ADLS) + hdfs_encryption = pytest.mark.skipif(IS_ADLS, + reason="HDFS encryption is not supported with ADLS") + + # These ones need test infra work to re-enable. + udfs = pytest.mark.skipif(IS_ADLS, reason="udas/udfs not copied to ADLS") + datasrc = pytest.mark.skipif(IS_ADLS, reason="data sources not copied to ADLS") + hbase = pytest.mark.skipif(IS_ADLS, reason="HBase not started with ADLS") + qualified_path = pytest.mark.skipif(IS_ADLS, + reason="Tests rely on HDFS qualified paths, IMPALA-1872") + eventually_consistent = pytest.mark.skipif(IS_ADLS, + reason="The client is slow to realize changes to file metadata") + class SkipIfKudu: unsupported_env = pytest.mark.skipif(os.environ["KUDU_IS_SUPPORTED"] == "false", reason="Kudu is not supported in this environment") http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/custom_cluster/test_hdfs_fd_caching.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_hdfs_fd_caching.py b/tests/custom_cluster/test_hdfs_fd_caching.py index acc13df..c030c8c 100644 --- a/tests/custom_cluster/test_hdfs_fd_caching.py +++ b/tests/custom_cluster/test_hdfs_fd_caching.py @@ -18,9 +18,10 @@ import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite -from tests.common.skip import SkipIfS3 +from tests.common.skip import SkipIfS3, SkipIfADLS @SkipIfS3.caching [email protected] class TestHdfsFdCaching(CustomClusterTestSuite): """Tests that if HDFS file handle caching is enabled, file handles are actually cached and the associated metrics return valid results. In addition, tests that the upper bound http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/custom_cluster/test_insert_behaviour.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_insert_behaviour.py b/tests/custom_cluster/test_insert_behaviour.py index 6df8ed7..af6a270 100644 --- a/tests/custom_cluster/test_insert_behaviour.py +++ b/tests/custom_cluster/test_insert_behaviour.py @@ -18,13 +18,14 @@ import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite -from tests.common.skip import SkipIfS3, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfLocal from tests.util.filesystem_utils import IS_ISILON, WAREHOUSE from tests.util.hdfs_util import HdfsConfig, get_hdfs_client, get_hdfs_client_from_conf TEST_TBL = "insert_inherit_permission" @SkipIfS3.hdfs_acls [email protected]_acls class TestInsertBehaviourCustomCluster(CustomClusterTestSuite): @classmethod http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/custom_cluster/test_parquet_max_page_header.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_parquet_max_page_header.py b/tests/custom_cluster/test_parquet_max_page_header.py index b87a10e..913d883 100644 --- a/tests/custom_cluster/test_parquet_max_page_header.py +++ b/tests/custom_cluster/test_parquet_max_page_header.py @@ -24,7 +24,7 @@ import string import subprocess from tests.common.custom_cluster_test_suite import CustomClusterTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon class TestParquetMaxPageHeader(CustomClusterTestSuite): '''This tests large page headers in parquet files. Parquet page header size can @@ -92,6 +92,7 @@ class TestParquetMaxPageHeader(CustomClusterTestSuite): put.wait() @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfIsilon.hive @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("-max_page_header_size=31457280") http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/custom_cluster/test_permanent_udfs.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_permanent_udfs.py b/tests/custom_cluster/test_permanent_udfs.py index 3823c55..100b5ea 100644 --- a/tests/custom_cluster/test_permanent_udfs.py +++ b/tests/custom_cluster/test_permanent_udfs.py @@ -24,7 +24,7 @@ import subprocess from tempfile import mkdtemp from tests.common.custom_cluster_test_suite import CustomClusterTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_uncompressed_text_dimension from tests.util.filesystem_utils import get_fs_path @@ -162,6 +162,7 @@ class TestUdfPersistence(CustomClusterTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfLocal.hive @pytest.mark.execute_serially def test_corrupt_java_udf(self): @@ -182,6 +183,7 @@ class TestUdfPersistence(CustomClusterTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfLocal.hive @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/data_errors/test_data_errors.py ---------------------------------------------------------------------- diff --git a/tests/data_errors/test_data_errors.py b/tests/data_errors/test_data_errors.py index fa3f189..21791d4 100644 --- a/tests/data_errors/test_data_errors.py +++ b/tests/data_errors/test_data_errors.py @@ -25,7 +25,7 @@ import subprocess from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIf, SkipIfS3, SkipIfLocal +from tests.common.skip import SkipIf, SkipIfS3, SkipIfADLS, SkipIfLocal from tests.common.test_dimensions import create_exec_option_dimension class TestDataErrors(ImpalaTestSuite): @@ -105,6 +105,7 @@ class TestHdfsUnknownErrors(ImpalaTestSuite): assert "Safe mode is OFF" in output @SkipIfS3.qualified_path [email protected]_path class TestHdfsScanNodeErrors(TestDataErrors): @classmethod def add_test_dimensions(cls): @@ -122,6 +123,7 @@ class TestHdfsScanNodeErrors(TestDataErrors): self.run_test_case('DataErrorsTest/hdfs-scan-node-errors', vector) @SkipIfS3.qualified_path [email protected]_path @SkipIfLocal.qualified_path class TestHdfsSeqScanNodeErrors(TestHdfsScanNodeErrors): @classmethod @@ -136,6 +138,7 @@ class TestHdfsSeqScanNodeErrors(TestHdfsScanNodeErrors): @SkipIfS3.qualified_path [email protected]_path class TestHdfsRcFileScanNodeErrors(TestHdfsScanNodeErrors): @classmethod def add_test_dimensions(cls): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/failure/test_failpoints.py ---------------------------------------------------------------------- diff --git a/tests/failure/test_failpoints.py b/tests/failure/test_failpoints.py index f67afe4..1f0c1c5 100644 --- a/tests/failure/test_failpoints.py +++ b/tests/failure/test_failpoints.py @@ -26,7 +26,7 @@ from time import sleep from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite, LOG -from tests.common.skip import SkipIf, SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIf, SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension @@ -53,6 +53,7 @@ QUERIES = [ @SkipIf.skip_hbase # -skip_hbase argument specified @SkipIfS3.hbase # S3: missing coverage: failures [email protected] @SkipIfIsilon.hbase # ISILON: missing coverage: failures. @SkipIfLocal.hbase class TestFailpoints(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_compute_stats.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py index f39fd02..3a13859 100644 --- a/tests/metadata/test_compute_stats.py +++ b/tests/metadata/test_compute_stats.py @@ -19,7 +19,7 @@ import pytest from subprocess import check_call from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import ( create_exec_option_dimension, create_single_exec_option_dimension, @@ -70,6 +70,7 @@ class TestComputeStats(ImpalaTestSuite): self.cleanup_db("parquet") @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive def test_compute_stats_impala_2201(self, vector, unique_database): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_ddl.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py index 37ca7cd..d69f603 100644 --- a/tests/metadata/test_ddl.py +++ b/tests/metadata/test_ddl.py @@ -22,9 +22,9 @@ import time from test_ddl_base import TestDdlBase from tests.common.impala_test_suite import LOG from tests.common.parametrize import UniqueDatabase -from tests.common.skip import SkipIf, SkipIfLocal, SkipIfOldAggsJoins +from tests.common.skip import SkipIf, SkipIfADLS, SkipIfLocal, SkipIfOldAggsJoins from tests.common.test_dimensions import create_single_exec_option_dimension -from tests.util.filesystem_utils import WAREHOUSE, IS_HDFS, IS_LOCAL, IS_S3 +from tests.util.filesystem_utils import WAREHOUSE, IS_HDFS, IS_LOCAL, IS_S3, IS_ADLS # Validates DDL statements (create, drop) class TestDdlStatements(TestDdlBase): @@ -53,11 +53,13 @@ class TestDdlStatements(TestDdlBase): format(getpass.getuser(), unique_database)) # Drop the table (with purge) and make sure it doesn't exist in trash self.client.execute("drop table {0}.t2 purge".format(unique_database)) - if not IS_S3: + if not IS_S3 and not IS_ADLS: # In S3, deletes are eventual. So even though we dropped the table, the files - # belonging to this table will still be visible for some unbounded time. This + # belonging to this table may still be visible for some unbounded time. This # happens only with PURGE. A regular DROP TABLE is just a copy of files which is # consistent. + # The ADLS Python client is not strongly consistent, so these files may still be + # visible after a DROP. (Remove after IMPALA-5335 is resolved) assert not self.filesystem_client.exists("test-warehouse/{0}.db/t2/".\ format(unique_database)) assert not self.filesystem_client.exists("test-warehouse/{0}.db/t2/t2.txt".\ @@ -82,6 +84,7 @@ class TestDdlStatements(TestDdlBase): self.filesystem_client.delete_file_dir( "test-warehouse/{0}.db/data_t3".format(unique_database), recursive=True) + @SkipIfADLS.eventually_consistent @SkipIfLocal.hdfs_client def test_drop_cleans_hdfs_dirs(self, unique_database): self.client.execute('use default') @@ -129,6 +132,7 @@ class TestDdlStatements(TestDdlBase): # Re-create database to make unique_database teardown succeed. self._create_db(unique_database) + @SkipIfADLS.eventually_consistent @SkipIfLocal.hdfs_client def test_truncate_cleans_hdfs_files(self, unique_database): # Verify the db directory exists @@ -291,11 +295,13 @@ class TestDdlStatements(TestDdlBase): # Drop the partition (with purge) and make sure it doesn't exist in trash self.client.execute("alter table {0}.t1 drop partition(j=2) purge".\ format(unique_database)); - if not IS_S3: + if not IS_S3 and not IS_ADLS: # In S3, deletes are eventual. So even though we dropped the partition, the files - # belonging to this partition will still be visible for some unbounded time. This + # belonging to this partition may still be visible for some unbounded time. This # happens only with PURGE. A regular DROP TABLE is just a copy of files which is # consistent. + # The ADLS Python client is not strongly consistent, so these files may still be + # visible after a DROP. (Remove after IMPALA-5335 is resolved) assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=2/j2.txt".\ format(unique_database)) assert not self.filesystem_client.exists("test-warehouse/{0}.db/t1/j=2".\ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_hdfs_encryption.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_hdfs_encryption.py b/tests/metadata/test_hdfs_encryption.py index 9ace3c4..ff135b8 100644 --- a/tests/metadata/test_hdfs_encryption.py +++ b/tests/metadata/test_hdfs_encryption.py @@ -19,7 +19,7 @@ import getpass import pytest from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import ( create_single_exec_option_dimension, create_uncompressed_text_dimension) @@ -34,6 +34,7 @@ TMP_DIR = '/%s' % (PYWEBHDFS_TMP_DIR) @SkipIfS3.hdfs_encryption [email protected]_encryption @SkipIfIsilon.hdfs_encryption @SkipIfLocal.hdfs_encryption @pytest.mark.execute_serially http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_hdfs_permissions.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_hdfs_permissions.py b/tests/metadata/test_hdfs_permissions.py index 3c4957e..af5a41c 100644 --- a/tests/metadata/test_hdfs_permissions.py +++ b/tests/metadata/test_hdfs_permissions.py @@ -16,7 +16,7 @@ # under the License. from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfLocal from tests.common.test_dimensions import ( create_single_exec_option_dimension, create_uncompressed_text_dimension) @@ -27,6 +27,7 @@ TBL_LOC = '%s/%s' % (WAREHOUSE, TEST_TBL) @SkipIfS3.hdfs_acls [email protected]_acls @SkipIfLocal.hdfs_client class TestHdfsPermissions(ImpalaTestSuite): @classmethod http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_hms_integration.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py index 05bacfc..bcdc1e6 100644 --- a/tests/metadata/test_hms_integration.py +++ b/tests/metadata/test_hms_integration.py @@ -30,7 +30,7 @@ import string from subprocess import call from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import ( create_single_exec_option_dimension, create_uncompressed_text_dimension) @@ -41,6 +41,7 @@ logging.basicConfig(level=logging.INFO, format='%(threadName)s: %(message)s') LOG = logging.getLogger('test_configuration') @SkipIfS3.hive [email protected] @SkipIfIsilon.hive @SkipIfLocal.hive class TestHmsIntegrationSanity(ImpalaTestSuite): @@ -87,6 +88,7 @@ class TestHmsIntegrationSanity(ImpalaTestSuite): assert 'test_tbl' in self.client.execute("show tables in hms_sanity_db").data @SkipIfS3.hive [email protected] @SkipIfIsilon.hive @SkipIfLocal.hive class TestHmsIntegration(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_metadata_query_statements.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_metadata_query_statements.py b/tests/metadata/test_metadata_query_statements.py index cadc209..bf21ecb 100644 --- a/tests/metadata/test_metadata_query_statements.py +++ b/tests/metadata/test_metadata_query_statements.py @@ -22,7 +22,7 @@ import re from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfIsilon, SkipIfS3, SkipIfLocal +from tests.common.skip import SkipIfIsilon, SkipIfS3, SkipIfADLS, SkipIfLocal from tests.common.test_dimensions import ALL_NODES_ONLY from tests.common.test_dimensions import create_exec_option_dimension from tests.common.test_dimensions import create_uncompressed_text_dimension @@ -75,6 +75,7 @@ class TestMetadataQueryStatements(ImpalaTestSuite): # data doesn't reside in hdfs. @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfLocal.hive def test_describe_formatted(self, vector, unique_database): # For describe formmated, we try to match Hive's output as closely as possible. @@ -148,6 +149,7 @@ class TestMetadataQueryStatements(ImpalaTestSuite): self.client.execute(self.CREATE_DATA_SRC_STMT % (name,)) @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive @pytest.mark.execute_serially # because of invalidate metadata http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_partition_metadata.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_partition_metadata.py b/tests/metadata/test_partition_metadata.py index c9f90e1..0758fde 100644 --- a/tests/metadata/test_partition_metadata.py +++ b/tests/metadata/test_partition_metadata.py @@ -16,7 +16,7 @@ # under the License. from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_single_exec_option_dimension from tests.util.filesystem_utils import WAREHOUSE @@ -78,6 +78,7 @@ class TestPartitionMetadata(ImpalaTestSuite): assert data.split('\t') == ['6', '9'] @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive def test_partition_metadata_compatibility(self, vector, unique_database): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_refresh_partition.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_refresh_partition.py b/tests/metadata/test_refresh_partition.py index 4602ebc..a8b5042 100644 --- a/tests/metadata/test_refresh_partition.py +++ b/tests/metadata/test_refresh_partition.py @@ -17,11 +17,12 @@ from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_dimensions import create_uncompressed_text_dimension -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.util.filesystem_utils import get_fs_path @SkipIfS3.hive [email protected] @SkipIfIsilon.hive @SkipIfLocal.hive class TestRefreshPartition(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/metadata/test_views_compatibility.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_views_compatibility.py b/tests/metadata/test_views_compatibility.py index 3fd38f2..116eb10 100644 --- a/tests/metadata/test_views_compatibility.py +++ b/tests/metadata/test_views_compatibility.py @@ -22,7 +22,7 @@ from subprocess import call from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_uncompressed_text_dimension from tests.util.test_file_parser import QueryTestSectionReader @@ -46,6 +46,7 @@ from tests.util.test_file_parser import QueryTestSectionReader # Missing Coverage: Views created by Hive and Impala being visible and queryble by each # other on non hdfs storage. @SkipIfS3.hive [email protected] @SkipIfIsilon.hive @SkipIfLocal.hive class TestViewCompatibility(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_compressed_formats.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_compressed_formats.py b/tests/query_test/test_compressed_formats.py index efb2c2b..6ea89f5 100644 --- a/tests/query_test/test_compressed_formats.py +++ b/tests/query_test/test_compressed_formats.py @@ -23,7 +23,7 @@ from os.path import join from subprocess import call from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension from tests.util.filesystem_utils import get_fs_path @@ -40,6 +40,7 @@ compression_formats = [ # Missing Coverage: Compressed data written by Hive is queriable by Impala on a non-hdfs # filesystem. @SkipIfS3.hive [email protected] @SkipIfIsilon.hive @SkipIfLocal.hive class TestCompressedFormats(ImpalaTestSuite): @@ -148,6 +149,7 @@ class TestTableWriters(ImpalaTestSuite): self.run_test_case('QueryTest/seq-writer', vector, unique_database) @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive def test_seq_writer_hive_compatibility(self, vector, unique_database): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_hdfs_caching.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py index f446913..96b25ec 100644 --- a/tests/query_test/test_hdfs_caching.py +++ b/tests/query_test/test_hdfs_caching.py @@ -24,13 +24,14 @@ from subprocess import check_call from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_single_exec_option_dimension from tests.util.filesystem_utils import get_fs_path from tests.util.shell_util import exec_process # End to end test that hdfs caching is working. @SkipIfS3.caching # S3: missing coverage: verify SET CACHED gives error [email protected] @SkipIfIsilon.caching @SkipIfLocal.caching class TestHdfsCaching(ImpalaTestSuite): @@ -106,6 +107,7 @@ class TestHdfsCaching(ImpalaTestSuite): # run as a part of exhaustive tests which require the workload to be 'functional-query'. # TODO: Move this to TestHdfsCaching once we make exhaustive tests run for other workloads @SkipIfS3.caching [email protected] @SkipIfIsilon.caching @SkipIfLocal.caching class TestHdfsCachingFallbackPath(ImpalaTestSuite): @@ -114,6 +116,7 @@ class TestHdfsCachingFallbackPath(ImpalaTestSuite): return 'functional-query' @SkipIfS3.hdfs_encryption + @SkipIfADLS.hdfs_encryption @SkipIfIsilon.hdfs_encryption @SkipIfLocal.hdfs_encryption def test_hdfs_caching_fallback_path(self, vector, unique_database, testid_checksum): @@ -164,6 +167,7 @@ class TestHdfsCachingFallbackPath(ImpalaTestSuite): @SkipIfS3.caching [email protected] @SkipIfIsilon.caching @SkipIfLocal.caching class TestHdfsCachingDdl(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_hdfs_fd_caching.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_hdfs_fd_caching.py b/tests/query_test/test_hdfs_fd_caching.py index 4d72714..6961f3b 100644 --- a/tests/query_test/test_hdfs_fd_caching.py +++ b/tests/query_test/test_hdfs_fd_caching.py @@ -20,10 +20,11 @@ import pytest from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3 +from tests.common.skip import SkipIfS3, SkipIfADLS @SkipIfS3.caching [email protected] class TestHdfsFdCaching(ImpalaTestSuite): """ This test suite tests the behavior of HDFS file descriptor caching by evaluating the http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_insert_behaviour.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_insert_behaviour.py b/tests/query_test/test_insert_behaviour.py index 1584343..e70fdee 100644 --- a/tests/query_test/test_insert_behaviour.py +++ b/tests/query_test/test_insert_behaviour.py @@ -22,7 +22,7 @@ import pytest from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.parametrize import UniqueDatabase -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.util.filesystem_utils import WAREHOUSE, get_fs_path, IS_S3 @SkipIfLocal.hdfs_client @@ -45,6 +45,7 @@ class TestInsertBehaviour(ImpalaTestSuite): if method.__name__ == "test_insert_select_with_empty_resultset": self.cleanup_db(self.TEST_DB_NAME) + @SkipIfADLS.eventually_consistent @pytest.mark.execute_serially def test_insert_removes_staging_files(self): TBL_NAME = "insert_overwrite_nopart" @@ -130,6 +131,7 @@ class TestInsertBehaviour(ImpalaTestSuite): assert len(self.filesystem_client.ls(part_dir)) == 1 @SkipIfS3.hdfs_acls + @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls @pytest.mark.xfail(run=False, reason="Fails intermittently on test clusters") @pytest.mark.execute_serially @@ -190,6 +192,7 @@ class TestInsertBehaviour(ImpalaTestSuite): check_has_acls("p1=1/p2=2/p3=30", "default:group:new_leaf_group:-w-") @SkipIfS3.hdfs_acls + @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_insert_file_permissions(self, unique_database): """Test that INSERT correctly respects file permission (minimum ACLs)""" @@ -240,6 +243,7 @@ class TestInsertBehaviour(ImpalaTestSuite): self.execute_query_expect_success(self.client, insert_query) @SkipIfS3.hdfs_acls + @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_insert_acl_permissions(self, unique_database): """Test that INSERT correctly respects ACLs""" @@ -317,6 +321,7 @@ class TestInsertBehaviour(ImpalaTestSuite): self.execute_query_expect_success(self.client, insert_query) @SkipIfS3.hdfs_acls + @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_load_permissions(self, unique_database): # We rely on test_insert_acl_permissions() to exhaustively check that ACL semantics @@ -369,6 +374,7 @@ class TestInsertBehaviour(ImpalaTestSuite): # We expect this to succeed, it's not an error if all files in the dir cannot be read self.execute_query_expect_success(self.client, load_dir_query) + @SkipIfADLS.eventually_consistent @pytest.mark.execute_serially def test_insert_select_with_empty_resultset(self): """Test insert/select query won't trigger partition directory or zero size data file @@ -439,6 +445,7 @@ class TestInsertBehaviour(ImpalaTestSuite): self.execute_query_expect_failure(self.client, insert_query) @SkipIfS3.hdfs_acls + @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_multiple_group_acls(self, unique_database): """Test that INSERT correctly respects multiple group ACLs""" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_insert_parquet.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py index 9d9eb7b..ee24549 100644 --- a/tests/query_test/test_insert_parquet.py +++ b/tests/query_test/test_insert_parquet.py @@ -30,7 +30,7 @@ from parquet.ttypes import ColumnOrder, SortingColumn, TypeDefinedOrder from tests.common.environ import impalad_basedir from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.parametrize import UniqueDatabase -from tests.common.skip import SkipIfIsilon, SkipIfLocal, SkipIfS3 +from tests.common.skip import SkipIfIsilon, SkipIfLocal, SkipIfS3, SkipIfADLS from tests.common.test_dimensions import create_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension from tests.util.filesystem_utils import get_fs_path @@ -161,6 +161,10 @@ class TestInsertParquetVerifySize(ImpalaTestSuite): cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension("compression_codec", *PARQUET_CODECS)) + # ADLS does not have a configurable block size, so the 'PARQUET_FILE_SIZE' option + # that's passed as a hint to Hadoop is ignored for AdlFileSystem. So, we skip this + # test for ADLS. + @SkipIfADLS.hdfs_block_size @SkipIfIsilon.hdfs_block_size @SkipIfLocal.hdfs_client def test_insert_parquet_verify_size(self, vector, unique_database): @@ -297,6 +301,7 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite): @SkipIfIsilon.hive @SkipIfLocal.hive @SkipIfS3.hive [email protected] # TODO: Should we move this to test_parquet_stats.py? class TestHdfsParquetTableStatsWriter(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_join_queries.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_join_queries.py b/tests/query_test/test_join_queries.py index 631db00..db1a4fa 100644 --- a/tests/query_test/test_join_queries.py +++ b/tests/query_test/test_join_queries.py @@ -26,7 +26,8 @@ from tests.common.skip import ( SkipIfIsilon, SkipIfLocal, SkipIfOldAggsJoins, - SkipIfS3) + SkipIfS3, + SkipIfADLS) from tests.common.test_vector import ImpalaTestDimension class TestJoinQueries(ImpalaTestSuite): @@ -60,6 +61,7 @@ class TestJoinQueries(ImpalaTestSuite): self.run_test_case('QueryTest/joins-partitioned', vector) @SkipIfS3.hbase + @SkipIfADLS.hbase @SkipIfIsilon.hbase @SkipIf.skip_hbase @SkipIfLocal.hbase http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_nested_types.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py index ffed8b5..5635898 100644 --- a/tests/query_test/test_nested_types.py +++ b/tests/query_test/test_nested_types.py @@ -22,7 +22,13 @@ from subprocess import check_call from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfOldAggsJoins, SkipIfIsilon, SkipIfS3, SkipIfLocal +from tests.common.skip import ( + SkipIfOldAggsJoins, + SkipIfIsilon, + SkipIfS3, + SkipIfADLS, + SkipIfLocal) + from tests.util.filesystem_utils import WAREHOUSE, get_fs_path @SkipIfOldAggsJoins.nested_types @@ -553,6 +559,7 @@ class TestMaxNestingDepth(ImpalaTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfLocal.hive def test_load_hive_table(self, vector, unique_database): """Tests that Impala rejects Hive-created tables with complex types that exceed http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_observability.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py index 0f82bd5..cdb322d 100644 --- a/tests/query_test/test_observability.py +++ b/tests/query_test/test_observability.py @@ -16,7 +16,7 @@ # under the License. from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal class TestObservability(ImpalaTestSuite): @classmethod @@ -55,6 +55,7 @@ class TestObservability(ImpalaTestSuite): @SkipIfS3.hbase @SkipIfLocal.hbase @SkipIfIsilon.hbase + @SkipIfADLS.hbase def test_scan_summary(self): """IMPALA-4499: Checks that the exec summary for scans show the table name.""" # HDFS table http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_partitioning.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_partitioning.py b/tests/query_test/test_partitioning.py index 04d6c3a..8fc3399 100644 --- a/tests/query_test/test_partitioning.py +++ b/tests/query_test/test_partitioning.py @@ -19,7 +19,7 @@ import pytest from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_single_exec_option_dimension # Tests to validate HDFS partitioning. @@ -46,6 +46,7 @@ class TestPartitioning(ImpalaTestSuite): # Missing Coverage: Impala deals with boolean partitions created by Hive on a non-hdfs # filesystem. @SkipIfS3.hive + @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive def test_boolean_partitions(self, vector, unique_database): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/query_test/test_scanners.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py index 5dbe02a..74a69ac 100644 --- a/tests/query_test/test_scanners.py +++ b/tests/query_test/test_scanners.py @@ -32,7 +32,12 @@ from subprocess import check_call from testdata.common import widetable from tests.common.impala_test_suite import ImpalaTestSuite, LOG -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfOldAggsJoins, SkipIfLocal +from tests.common.skip import ( + SkipIfS3, + SkipIfADLS, + SkipIfIsilon, + SkipIfOldAggsJoins, + SkipIfLocal) from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_result_verifier import ( parse_column_types, @@ -335,6 +340,7 @@ class TestParquet(ImpalaTestSuite): vector, unique_database) @SkipIfS3.hdfs_block_size + @SkipIfADLS.hdfs_block_size @SkipIfIsilon.hdfs_block_size @SkipIfLocal.multiple_impalad def test_misaligned_parquet_row_groups(self, vector): @@ -390,6 +396,7 @@ class TestParquet(ImpalaTestSuite): assert total == num_scanners_with_no_reads @SkipIfS3.hdfs_block_size + @SkipIfADLS.hdfs_block_size @SkipIfIsilon.hdfs_block_size @SkipIfLocal.multiple_impalad def test_multiple_blocks(self, vector): @@ -403,6 +410,7 @@ class TestParquet(ImpalaTestSuite): self._multiple_blocks_helper(table_name, 40000, ranges_per_node=2) @SkipIfS3.hdfs_block_size + @SkipIfADLS.hdfs_block_size @SkipIfIsilon.hdfs_block_size @SkipIfLocal.multiple_impalad def test_multiple_blocks_one_row_group(self, vector): @@ -693,6 +701,7 @@ class TestTextScanRangeLengths(ImpalaTestSuite): # Missing Coverage: No coverage for truncated files errors or scans. @SkipIfS3.hive [email protected] @SkipIfIsilon.hive @SkipIfLocal.hive class TestScanTruncatedFiles(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/stress/test_ddl_stress.py ---------------------------------------------------------------------- diff --git a/tests/stress/test_ddl_stress.py b/tests/stress/test_ddl_stress.py index 9aa6de5..b46f201 100644 --- a/tests/stress/test_ddl_stress.py +++ b/tests/stress/test_ddl_stress.py @@ -18,7 +18,7 @@ import pytest from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal # Number of tables to create per thread NUM_TBLS_PER_THREAD = 10 @@ -48,6 +48,7 @@ class TestDdlStress(ImpalaTestSuite): v.get_value('table_format').compression_codec == 'none')) @SkipIfS3.caching + @SkipIfADLS.caching @SkipIfIsilon.caching @SkipIfLocal.caching @pytest.mark.stress http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/util/adls_util.py ---------------------------------------------------------------------- diff --git a/tests/util/adls_util.py b/tests/util/adls_util.py new file mode 100644 index 0000000..f616074 --- /dev/null +++ b/tests/util/adls_util.py @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ADLS access utilities +# +# This file uses the azure-data-lake-store-python client and provides simple +# functions to the Impala test suite to access Azure Data Lake Store. + +from azure.datalake.store import core, lib, multithread, exceptions +from tests.util.filesystem_base import BaseFilesystem +from tests.util.filesystem_utils import ADLS_CLIENT_ID, ADLS_TENANT_ID, ADLS_CLIENT_SECRET + +class ADLSClient(BaseFilesystem): + + def __init__(self, store): + self.token = lib.auth(tenant_id = ADLS_TENANT_ID, + client_secret = ADLS_CLIENT_SECRET, + client_id = ADLS_CLIENT_ID) + self.adlsclient = core.AzureDLFileSystem(self.token, store_name=store) + + def create_file(self, path, file_data, overwrite=True): + if not overwrite and self.exists(path): return False + with self.adlsclient.open(path, 'wb') as f: + num_bytes = f.write(file_data) + assert num_bytes == len(file_data), "ADLS write failed." + return True + + def make_dir(self, path, permission=None): + self.adlsclient.mkdir(path) + return True + + def copy(self, src, dst): + # The ADLS Python client doesn't support cp() yet, so we have to download and + # reupload to the destination. + src_contents = self.adlsclient.cat(src) + self.create_file(dst, src_contents, overwrite=True) + assert self.exists(dst), \ + 'ADLS copy failed: Destination file {dst} does not exist'.format(dst=dst) + + def ls(self, path): + file_paths = self.adlsclient.ls(path) + files= [] + for f in file_paths: + fname = f.split("/")[-1] + if not fname == '': + files += [fname] + return files + + def exists(self, path): + return self.adlsclient.exists(path) + + def delete_file_dir(self, path, recursive=False): + try: + self.adlsclient.rm(path, recursive) + except exceptions.FileNotFoundError as e: + return False + return True + + def get_all_file_sizes(self, path): + """Returns a list of integers which are all the file sizes of files found under + 'path'.""" + return [self.adlsclient.info(f)['length'] for f in self.ls(path)] http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4086f2c8/tests/util/filesystem_utils.py ---------------------------------------------------------------------- diff --git a/tests/util/filesystem_utils.py b/tests/util/filesystem_utils.py index d435720..77112be 100644 --- a/tests/util/filesystem_utils.py +++ b/tests/util/filesystem_utils.py @@ -29,6 +29,7 @@ IS_S3 = FILESYSTEM == "s3" IS_ISILON = FILESYSTEM == "isilon" IS_LOCAL = FILESYSTEM == "local" IS_HDFS = FILESYSTEM == "hdfs" +IS_ADLS = FILESYSTEM == "adls" # This condition satisfies both the states where one can assume a default fs # - The environment variable is set to an empty string. # - Tne environment variables is unset ( None ) @@ -42,6 +43,12 @@ ISILON_WEBHDFS_PORT = 8082 # S3 specific values S3_BUCKET_NAME = os.getenv("S3_BUCKET") +# ADLS specific values +ADLS_STORE_NAME = os.getenv("azure_data_lake_store_name") +ADLS_CLIENT_ID = os.getenv("azure_client_id") +ADLS_TENANT_ID = os.getenv("azure_tenant_id") +ADLS_CLIENT_SECRET = os.getenv("azure_client_secret") + def get_fs_path(path): return "%s%s" % (FILESYSTEM_PREFIX, path)
