IMPALA-7681. Add Azure Blob File System (ADLS Gen2) support. HADOOP-15407 adds a new FileSystem implementation called "ABFS" for the ADLS Gen2 service. It's in the hadoop-azure module as a replacement for WASB. Filesystem semantics should be the same, so skipped tests and other behavior changes have simply mirrored what is done for ADLS Gen1 by default. Tests skipped on ADLS Gen1 due to eventual consistency of the Python client can be run against ADLS Gen2.
Change-Id: I5120b071760e7655e78902dce8483f8f54de445d Reviewed-on: http://gerrit.cloudera.org:8080/11630 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7a022cf3 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7a022cf3 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7a022cf3 Branch: refs/heads/master Commit: 7a022cf36a2c678dcff02d48db0641e6f74f068f Parents: 0340a15 Author: Sean Mackrory <[email protected]> Authored: Tue Oct 9 15:10:36 2018 -0600 Committer: Impala Public Jenkins <[email protected]> Committed: Sat Oct 20 06:43:00 2018 +0000 ---------------------------------------------------------------------- be/src/exec/hdfs-table-sink.cc | 1 + be/src/runtime/io/disk-io-mgr-test.cc | 3 +- be/src/runtime/io/disk-io-mgr.cc | 7 ++ be/src/runtime/io/disk-io-mgr.h | 4 + be/src/runtime/io/scan-range.cc | 8 +- be/src/util/hdfs-util.cc | 8 ++ be/src/util/hdfs-util.h | 3 + bin/impala-config.sh | 24 ++++ fe/pom.xml | 6 + .../apache/impala/analysis/LoadDataStmt.java | 9 +- .../org/apache/impala/catalog/HdfsTable.java | 1 + .../apache/impala/common/FileSystemUtil.java | 24 ++++ .../org/apache/impala/service/JniFrontend.java | 4 + .../impala/analysis/AnalyzeStmtsTest.java | 2 +- .../common/etc/hadoop/conf/core-site.xml.tmpl | 25 ++++ tests/common/impala_test_suite.py | 6 +- tests/common/skip.py | 22 ++++ .../custom_cluster/test_admission_controller.py | 2 + tests/custom_cluster/test_hdfs_fd_caching.py | 7 +- tests/custom_cluster/test_insert_behaviour.py | 3 +- tests/custom_cluster/test_metadata_replicas.py | 2 + .../test_parquet_max_page_header.py | 3 +- tests/custom_cluster/test_permanent_udfs.py | 8 +- tests/data_errors/test_data_errors.py | 5 +- tests/failure/test_failpoints.py | 4 +- tests/metadata/test_compute_stats.py | 3 +- tests/metadata/test_ddl.py | 4 +- tests/metadata/test_hdfs_encryption.py | 3 +- tests/metadata/test_hdfs_permissions.py | 3 +- tests/metadata/test_hms_integration.py | 4 +- .../metadata/test_metadata_query_statements.py | 4 +- tests/metadata/test_partition_metadata.py | 3 +- tests/metadata/test_refresh_partition.py | 3 +- tests/metadata/test_views_compatibility.py | 3 +- tests/query_test/test_compressed_formats.py | 3 +- tests/query_test/test_hdfs_caching.py | 7 +- tests/query_test/test_insert.py | 4 +- tests/query_test/test_insert_behaviour.py | 9 +- tests/query_test/test_insert_parquet.py | 4 +- tests/query_test/test_join_queries.py | 2 + tests/query_test/test_nested_types.py | 3 + tests/query_test/test_observability.py | 3 +- tests/query_test/test_partitioning.py | 3 +- tests/query_test/test_resource_limits.py | 5 +- tests/query_test/test_scanners.py | 7 ++ tests/stress/test_ddl_stress.py | 3 +- tests/util/abfs_util.py | 113 +++++++++++++++++++ tests/util/filesystem_utils.py | 5 +- 48 files changed, 360 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/be/src/exec/hdfs-table-sink.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-table-sink.cc b/be/src/exec/hdfs-table-sink.cc index 9c46638..24df2ff 100644 --- a/be/src/exec/hdfs-table-sink.cc +++ b/be/src/exec/hdfs-table-sink.cc @@ -378,6 +378,7 @@ Status HdfsTableSink::CreateNewTmpFile(RuntimeState* state, } if (IsS3APath(output_partition->current_file_name.c_str()) || + IsABFSPath(output_partition->current_file_name.c_str()) || IsADLSPath(output_partition->current_file_name.c_str())) { // On S3A, the file cannot be stat'ed until after it's closed, and even so, the block // size reported will be just the filesystem default. Similarly, the block size http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/be/src/runtime/io/disk-io-mgr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/io/disk-io-mgr-test.cc b/be/src/runtime/io/disk-io-mgr-test.cc index 7a9bc23..6afdb5d 100644 --- a/be/src/runtime/io/disk-io-mgr-test.cc +++ b/be/src/runtime/io/disk-io-mgr-test.cc @@ -51,6 +51,7 @@ DECLARE_int64(min_buffer_size); DECLARE_int32(num_remote_hdfs_io_threads); DECLARE_int32(num_s3_io_threads); DECLARE_int32(num_adls_io_threads); +DECLARE_int32(num_abfs_io_threads); #ifndef NDEBUG DECLARE_int32(stress_disk_read_delay_ms); #endif @@ -1565,7 +1566,7 @@ TEST_F(DiskIoMgrTest, ReadIntoClientBufferError) { TEST_F(DiskIoMgrTest, VerifyNumThreadsParameter) { InitRootReservation(LARGE_RESERVATION_LIMIT); const int num_io_threads_for_remote_disks = FLAGS_num_remote_hdfs_io_threads - + FLAGS_num_s3_io_threads + FLAGS_num_adls_io_threads; + + FLAGS_num_s3_io_threads + FLAGS_num_adls_io_threads + FLAGS_num_abfs_io_threads; // Verify num_io_threads_per_rotational_disk and num_io_threads_per_solid_state_disk. // Since we do not have control over which disk is used, we check for either type http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/be/src/runtime/io/disk-io-mgr.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc index fa871d1..fb0c876 100644 --- a/be/src/runtime/io/disk-io-mgr.cc +++ b/be/src/runtime/io/disk-io-mgr.cc @@ -83,6 +83,8 @@ DEFINE_int32(num_remote_hdfs_io_threads, 8, "Number of remote HDFS I/O threads") // open to S3 and use of multiple CPU cores since S3 reads are relatively compute // expensive (SSL and JNI buffer overheads). DEFINE_int32(num_s3_io_threads, 16, "Number of S3 I/O threads"); +// The maximum number of ABFS I/O threads. TODO: choose the default empirically. +DEFINE_int32(num_abfs_io_threads, 16, "Number of ABFS I/O threads"); // The maximum number of ADLS I/O threads. This number is a good default to have for // clusters that may vary widely in size, due to an undocumented concurrency limit // enforced by ADLS for a cluster, which spans between 500-700. For smaller clusters @@ -233,6 +235,9 @@ Status DiskIoMgr::Init() { } else if (i == RemoteS3DiskId()) { num_threads_per_disk = FLAGS_num_s3_io_threads; device_name = "S3 remote"; + } else if (i == RemoteAbfsDiskId()) { + num_threads_per_disk = FLAGS_num_abfs_io_threads; + device_name = "ABFS remote"; } else if (i == RemoteAdlsDiskId()) { num_threads_per_disk = FLAGS_num_adls_io_threads; device_name = "ADLS remote"; @@ -457,10 +462,12 @@ int DiskIoMgr::AssignQueue(const char* file, int disk_id, bool expected_local) { return RemoteDfsDiskId(); } if (IsS3APath(file)) return RemoteS3DiskId(); + if (IsABFSPath(file)) return RemoteAbfsDiskId(); if (IsADLSPath(file)) return RemoteAdlsDiskId(); } // Assign to a local disk queue. DCHECK(!IsS3APath(file)); // S3 is always remote. + DCHECK(!IsABFSPath(file)); // ABFS is always remote. DCHECK(!IsADLSPath(file)); // ADLS is always remote. if (disk_id == -1) { // disk id is unknown, assign it an arbitrary one. http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/be/src/runtime/io/disk-io-mgr.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/io/disk-io-mgr.h b/be/src/runtime/io/disk-io-mgr.h index d5c7643..d36906d 100644 --- a/be/src/runtime/io/disk-io-mgr.h +++ b/be/src/runtime/io/disk-io-mgr.h @@ -286,6 +286,9 @@ class DiskIoMgr : public CacheLineAligned { /// The disk ID (and therefore disk_queues_ index) used for S3 accesses. int RemoteS3DiskId() const { return num_local_disks() + REMOTE_S3_DISK_OFFSET; } + /// The disk ID (and therefore disk_queues_ index) used for ABFS accesses. + int RemoteAbfsDiskId() const { return num_local_disks() + REMOTE_ABFS_DISK_OFFSET; } + /// The disk ID (and therefore disk_queues_ index) used for ADLS accesses. int RemoteAdlsDiskId() const { return num_local_disks() + REMOTE_ADLS_DISK_OFFSET; } @@ -335,6 +338,7 @@ class DiskIoMgr : public CacheLineAligned { REMOTE_DFS_DISK_OFFSET = 0, REMOTE_S3_DISK_OFFSET, REMOTE_ADLS_DISK_OFFSET, + REMOTE_ABFS_DISK_OFFSET, REMOTE_NUM_DISKS }; http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/be/src/runtime/io/scan-range.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/io/scan-range.cc b/be/src/runtime/io/scan-range.cc index 660710e..41915b3 100644 --- a/be/src/runtime/io/scan-range.cc +++ b/be/src/runtime/io/scan-range.cc @@ -29,11 +29,13 @@ using namespace impala; using namespace impala::io; // TODO: Run perf tests and empirically settle on the most optimal default value for the -// read buffer size. Currently setting it as 128k for the same reason as for S3, i.e. +// read buffer sizes. Currently setting them as 128k for the same reason as for S3, i.e. // due to JNI array allocation and memcpy overhead, 128k was emperically found to have the // least overhead. DEFINE_int64(adls_read_chunk_size, 128 * 1024, "The maximum read chunk size to use when " "reading from ADLS."); +DEFINE_int64(abfs_read_chunk_size, 128 * 1024, "The maximum read chunk size to use when " + "reading from ABFS."); // Implementation of the ScanRange functionality. Each ScanRange contains a queue // of ready buffers. For each ScanRange, there is only a single producer and @@ -554,6 +556,10 @@ int64_t ScanRange::MaxReadChunkSize() const { DCHECK(IsADLSPath(file())); return FLAGS_adls_read_chunk_size; } + if (disk_id_ == io_mgr_->RemoteAbfsDiskId()) { + DCHECK(IsABFSPath(file())); + return FLAGS_abfs_read_chunk_size; + } // The length argument of hdfsRead() is an int. Ensure we don't overflow it. return numeric_limits<int>::max(); } http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/be/src/util/hdfs-util.cc ---------------------------------------------------------------------- diff --git a/be/src/util/hdfs-util.cc b/be/src/util/hdfs-util.cc index aa3466f..d344309 100644 --- a/be/src/util/hdfs-util.cc +++ b/be/src/util/hdfs-util.cc @@ -86,6 +86,14 @@ bool IsS3APath(const char* path) { return strncmp(path, "s3a://", 6) == 0; } +bool IsABFSPath(const char* path) { + if (strstr(path, ":/") == NULL) { + return ExecEnv::GetInstance()->default_fs().compare(0, 7, "abfs://") == 0 || + ExecEnv::GetInstance()->default_fs().compare(0, 8, "abfss://") == 0; + } + return strncmp(path, "abfs://", 7) == 0 || strncmp(path, "abfss://", 8) == 0; +} + bool IsADLSPath(const char* path) { if (strstr(path, ":/") == NULL) { return ExecEnv::GetInstance()->default_fs().compare(0, 6, "adl://") == 0; http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/be/src/util/hdfs-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/hdfs-util.h b/be/src/util/hdfs-util.h index 858a9b4..f57efd9 100644 --- a/be/src/util/hdfs-util.h +++ b/be/src/util/hdfs-util.h @@ -50,6 +50,9 @@ bool IsHdfsPath(const char* path); /// Returns true iff the path refers to a location on an S3A filesystem. bool IsS3APath(const char* path); +/// Returns true iff the path refers to a location on an ABFS filesystem. +bool IsABFSPath(const char* path); + /// Returns true iff the path refers to a location on an ADL filesystem. bool IsADLSPath(const char* path); http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/bin/impala-config.sh ---------------------------------------------------------------------- diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 9f8408c..57538e5 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -263,6 +263,8 @@ export azure_tenant_id="${azure_tenant_id-DummyAdlsTenantId}" export azure_client_id="${azure_client_id-DummyAdlsClientId}" export azure_client_secret="${azure_client_secret-DummyAdlsClientSecret}" export azure_data_lake_store_name="${azure_data_lake_store_name-}" +export azure_storage_account_name="${azure_storage_account_name-}" +export azure_storage_container_name="${azure_storage_container_name-}" export HDFS_REPLICATION="${HDFS_REPLICATION-3}" export ISILON_NAMENODE="${ISILON_NAMENODE-}" export DEFAULT_FS="${DEFAULT_FS-hdfs://localhost:20500}" @@ -352,6 +354,28 @@ elif [ "${TARGET_FILESYSTEM}" = "adls" ]; then fi DEFAULT_FS="adl://${azure_data_lake_store_name}.azuredatalakestore.net" export DEFAULT_FS +elif [ "${TARGET_FILESYSTEM}" = "abfs" ]; then + # ABFS is also known as ADLS Gen2, and they can share credentials + # Basic error checking + if [[ "${azure_client_id}" = "DummyAdlsClientId" ||\ + "${azure_tenant_id}" = "DummyAdlsTenantId" ||\ + "${azure_client_secret}" = "DummyAdlsClientSecret" ]]; then + echo "All 3 of the following need to be assigned valid values and belong + to the owner of the Azure storage account in order to access the + filesystem: azure_client_id, azure_tenant_id, azure_client_secret." + return 1 + fi + if [[ "${azure_storage_account_name}" = "" ]]; then + echo "azure_storage_account_name cannot be an empty string for ABFS" + return 1 + fi + if [[ "${azure_storage_container_name}" = "" ]]; then + echo "azure_storage_container_name cannot be an empty string for ABFS" + return 1 + fi + domain="${azure_storage_account_name}.dfs.core.windows.net" + DEFAULT_FS="abfss://${azure_storage_container_name}@${domain}" + export DEFAULT_FS elif [ "${TARGET_FILESYSTEM}" = "isilon" ]; then if [ "${ISILON_NAMENODE}" = "" ]; then echo "In order to access the Isilon filesystem, ISILON_NAMENODE" http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/fe/pom.xml ---------------------------------------------------------------------- diff --git a/fe/pom.xml b/fe/pom.xml index 5199d4f..f44c14f 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -88,6 +88,12 @@ under the License. <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-azure</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-azure-datalake</artifactId> <version>${hadoop.version}</version> <exclusions> http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java b/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java index 128973f..0608987 100644 --- a/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java @@ -24,6 +24,8 @@ import java.util.List; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.adl.AdlFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -143,9 +145,11 @@ public class LoadDataStmt extends StatementBase { Path source = sourceDataPath_.getPath(); FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration()); if (!(fs instanceof DistributedFileSystem) && !(fs instanceof S3AFileSystem) && + !(fs instanceof AzureBlobFileSystem) && + !(fs instanceof SecureAzureBlobFileSystem) && !(fs instanceof AdlFileSystem)) { throw new AnalysisException(String.format("INPATH location '%s' " + - "must point to an HDFS, S3A or ADL filesystem.", sourceDataPath_)); + "must point to an HDFS, S3A, ADL or ABFS filesystem.", sourceDataPath_)); } if (!fs.exists(source)) { throw new AnalysisException(String.format( @@ -157,7 +161,8 @@ public class LoadDataStmt extends StatementBase { // its parent directory (in order to delete the file as part of the move operation). FsPermissionChecker checker = FsPermissionChecker.getInstance(); // TODO: Disable permission checking for S3A as well (HADOOP-13892) - boolean shouldCheckPerms = !(fs instanceof AdlFileSystem); + boolean shouldCheckPerms = !(fs instanceof AdlFileSystem || + fs instanceof AzureBlobFileSystem || fs instanceof SecureAzureBlobFileSystem); if (fs.isDirectory(source)) { if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) { http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 4eba255..1087e87 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -918,6 +918,7 @@ public class HdfsTable extends Table implements FeFsTable { // behavior. So ADLS ACLs are unsupported until the connector is able to map // permissions to hadoop users/groups (HADOOP-14437). if (FileSystemUtil.isADLFileSystem(fs)) return TAccessLevel.READ_WRITE; + if (FileSystemUtil.isABFSFileSystem(fs)) return TAccessLevel.READ_WRITE; while (location != null) { try { http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java index c237426..43d164b 100644 --- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java +++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java @@ -33,6 +33,8 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem; import org.apache.hadoop.fs.adl.AdlFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.client.HdfsAdmin; @@ -306,6 +308,7 @@ public class FileSystemUtil { if (isDistributedFileSystem(fs)) return true; // Blacklist FileSystems that are known to not to include storage UUIDs. return !(fs instanceof S3AFileSystem || fs instanceof LocalFileSystem || + fs instanceof AzureBlobFileSystem || fs instanceof SecureAzureBlobFileSystem || fs instanceof AdlFileSystem); } @@ -338,6 +341,26 @@ public class FileSystemUtil { } /** + * Returns true iff the filesystem is AzureBlobFileSystem or + * SecureAzureBlobFileSystem. This function is unique in that there are 2 + * distinct classes it checks for, but the ony functional difference is the + * use of wire encryption. Some features like OAuth authentication do require + * wire encryption but that does not matter in usages of this function. + */ + public static boolean isABFSFileSystem(FileSystem fs) { + return fs instanceof AzureBlobFileSystem + || fs instanceof SecureAzureBlobFileSystem; + } + + /** + * Returns true iff the path is on AzureBlobFileSystem or + * SecureAzureBlobFileSystem. + */ + public static boolean isABFSFileSystem(Path path) throws IOException { + return isABFSFileSystem(path.getFileSystem(CONF)); + } + + /** * Returns true iff the filesystem is an instance of LocalFileSystem. */ public static boolean isLocalFileSystem(FileSystem fs) { @@ -458,6 +481,7 @@ public class FileSystemUtil { return (FileSystemUtil.isDistributedFileSystem(path) || FileSystemUtil.isLocalFileSystem(path) || FileSystemUtil.isS3AFileSystem(path) || + FileSystemUtil.isABFSFileSystem(path) || FileSystemUtil.isADLFileSystem(path)); } http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/fe/src/main/java/org/apache/impala/service/JniFrontend.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java index 0a066d0..db2ad29 100644 --- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java +++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java @@ -32,6 +32,8 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem; import org.apache.hadoop.fs.adl.AdlFileSystem; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -799,6 +801,8 @@ public class JniFrontend { FileSystem fs = FileSystem.get(CONF); if (!(fs instanceof DistributedFileSystem || fs instanceof S3AFileSystem || + fs instanceof AzureBlobFileSystem || + fs instanceof SecureAzureBlobFileSystem || fs instanceof AdlFileSystem)) { return "Currently configured default filesystem: " + fs.getClass().getSimpleName() + ". " + http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java index d36b97d..4fbffab 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java @@ -3204,7 +3204,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest { AnalysisError(String.format("load data inpath '%s' %s into table " + "tpch.lineitem", "file:///test-warehouse/test.out", overwrite), "INPATH location 'file:/test-warehouse/test.out' must point to an " + - "HDFS, S3A or ADL filesystem."); + "HDFS, S3A, ADL or ABFS filesystem."); // File type / table type mismatch. AnalyzesOk(String.format("load data inpath '%s' %s into table " + http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl ---------------------------------------------------------------------- diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl index b11967a..982b919 100644 --- a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl +++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.tmpl @@ -110,6 +110,31 @@ DEFAULT</value> <value>https://login.windows.net/${azure_tenant_id}/oauth2/token</value> </property> + <property> + <name>fs.azure.account.auth.type</name> + <value>OAuth</value> + </property> + + <property> + <name>fs.azure.account.oauth.provider.type</name> + <value>org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider</value> + </property> + + <property> + <name>fs.azure.account.oauth2.client.id</name> + <value>${azure_client_id}</value> + </property> + + <property> + <name>fs.azure.account.oauth2.client.secret</name> + <value>${azure_client_secret}</value> + </property> + + <property> + <name>fs.azure.account.oauth2.client.endpoint</name> + <value>https://login.microsoftonline.com/${azure_tenant_id}/oauth2/token</value> + </property> + <!-- This property can be used in tests to ascertain that this core-site.xml from the classpath has been loaded. (Ex: TestRequestPoolService) --> <property> http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/common/impala_test_suite.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index 954ca4c..f4f0c03 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -56,6 +56,7 @@ from tests.performance.query_exec_functions import execute_using_jdbc from tests.performance.query_executor import JdbcQueryExecConfig from tests.util.filesystem_utils import ( IS_S3, + IS_ABFS, IS_ADLS, S3_BUCKET_NAME, ADLS_STORE_NAME, @@ -67,6 +68,7 @@ from tests.util.hdfs_util import ( get_hdfs_client_from_conf, NAMENODE) from tests.util.s3_util import S3Client +from tests.util.abfs_util import ABFSClient from tests.util.test_file_parser import ( QueryTestSectionReader, parse_query_test_file, @@ -162,6 +164,8 @@ class ImpalaTestSuite(BaseTestSuite): cls.filesystem_client = cls.hdfs_client if IS_S3: cls.filesystem_client = S3Client(S3_BUCKET_NAME) + elif IS_ABFS: + cls.filesystem_client = ABFSClient() elif IS_ADLS: cls.filesystem_client = ADLSClient(ADLS_STORE_NAME) @@ -789,7 +793,7 @@ class ImpalaTestSuite(BaseTestSuite): # If 'skip_hbase' is specified or the filesystem is isilon, s3 or local, we don't # need the hbase dimension. if pytest.config.option.skip_hbase or TARGET_FILESYSTEM.lower() \ - in ['s3', 'isilon', 'local', 'adls']: + in ['s3', 'isilon', 'local', 'abfs', 'adls']: for tf_dimension in tf_dimensions: if tf_dimension.value.file_format == "hbase": tf_dimensions.remove(tf_dimension) http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/common/skip.py ---------------------------------------------------------------------- diff --git a/tests/common/skip.py b/tests/common/skip.py index 6143fee..2f19bf4 100644 --- a/tests/common/skip.py +++ b/tests/common/skip.py @@ -26,6 +26,7 @@ from functools import partial from tests.common.environ import IMPALAD_BUILD from tests.util.filesystem_utils import ( + IS_ABFS, IS_ADLS, IS_EC, IS_HDFS, @@ -55,6 +56,27 @@ class SkipIfS3: qualified_path = pytest.mark.skipif(IS_S3, reason="Tests rely on HDFS qualified paths, IMPALA-1872") + +class SkipIfABFS: + + # These ones are skipped due to product limitations. + caching = pytest.mark.skipif(IS_ABFS, reason="SET CACHED not implemented for ABFS") + hive = pytest.mark.skipif(IS_ABFS, reason="Hive doesn't work with ABFS") + hdfs_block_size = pytest.mark.skipif(IS_ABFS, reason="ABFS uses it's own block size") + hdfs_acls = pytest.mark.skipif(IS_ABFS, reason="HDFS acls are not supported on ABFS") + jira = partial(pytest.mark.skipif, IS_ABFS) + hdfs_encryption = pytest.mark.skipif(IS_ABFS, + reason="HDFS encryption is not supported with ABFS") + trash = pytest.mark.skipif(IS_ABFS, + reason="Drop/purge not working as expected on ABFS, IMPALA-7726") + + # These ones need test infra work to re-enable. + udfs = pytest.mark.skipif(IS_ABFS, reason="udas/udfs not copied to ABFS") + datasrc = pytest.mark.skipif(IS_ABFS, reason="data sources not copied to ABFS") + hbase = pytest.mark.skipif(IS_ABFS, reason="HBase not started with ABFS") + qualified_path = pytest.mark.skipif(IS_ABFS, + reason="Tests rely on HDFS qualified paths, IMPALA-1872") + class SkipIfADLS: # These ones are skipped due to product limitations. http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/custom_cluster/test_admission_controller.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py index fc74f00..018f66d 100644 --- a/tests/custom_cluster/test_admission_controller.py +++ b/tests/custom_cluster/test_admission_controller.py @@ -36,6 +36,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.resource_pool_config import ResourcePoolConfig from tests.common.skip import ( SkipIfS3, + SkipIfABFS, SkipIfADLS, SkipIfEC, SkipIfNotHdfsMinicluster) @@ -439,6 +440,7 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): ".* is greater than pool max mem resources 10.00 MB", str(ex)) @SkipIfS3.hdfs_block_size + @SkipIfABFS.hdfs_block_size @SkipIfADLS.hdfs_block_size @SkipIfEC.fix_later @pytest.mark.execute_serially http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/custom_cluster/test_hdfs_fd_caching.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_hdfs_fd_caching.py b/tests/custom_cluster/test_hdfs_fd_caching.py index 1afe431..8849363 100644 --- a/tests/custom_cluster/test_hdfs_fd_caching.py +++ b/tests/custom_cluster/test_hdfs_fd_caching.py @@ -22,6 +22,7 @@ from tests.common.skip import SkipIfLocal, SkipIfEC from tests.util.filesystem_utils import ( IS_ISILON, IS_S3, + IS_ABFS, IS_ADLS) from time import sleep @@ -132,7 +133,8 @@ class TestHdfsFdCaching(CustomClusterTestSuite): # Caching only applies to local HDFS files. If this is local HDFS, then verify # that caching works. Otherwise, verify that file handles are not cached. - if IS_S3 or IS_ADLS or IS_ISILON or pytest.config.option.testing_remote_cluster: + if IS_S3 or IS_ABFS or IS_ADLS or IS_ISILON or \ + pytest.config.option.testing_remote_cluster: caching_expected = False else: caching_expected = True @@ -148,7 +150,8 @@ class TestHdfsFdCaching(CustomClusterTestSuite): handle_timeout = 5 # Only test eviction on platforms where caching is enabled. - if IS_S3 or IS_ADLS or IS_ISILON or pytest.config.option.testing_remote_cluster: + if IS_S3 or IS_ABFS or IS_ADLS or IS_ISILON or \ + pytest.config.option.testing_remote_cluster: return caching_expected = True self.run_fd_caching_test(vector, caching_expected, cache_capacity, handle_timeout) http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/custom_cluster/test_insert_behaviour.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_insert_behaviour.py b/tests/custom_cluster/test_insert_behaviour.py index af6a270..7d3fb3e 100644 --- a/tests/custom_cluster/test_insert_behaviour.py +++ b/tests/custom_cluster/test_insert_behaviour.py @@ -18,13 +18,14 @@ import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal from tests.util.filesystem_utils import IS_ISILON, WAREHOUSE from tests.util.hdfs_util import HdfsConfig, get_hdfs_client, get_hdfs_client_from_conf TEST_TBL = "insert_inherit_permission" @SkipIfS3.hdfs_acls [email protected]_acls @SkipIfADLS.hdfs_acls class TestInsertBehaviourCustomCluster(CustomClusterTestSuite): http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/custom_cluster/test_metadata_replicas.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_metadata_replicas.py b/tests/custom_cluster/test_metadata_replicas.py index 9674756..940f371 100644 --- a/tests/custom_cluster/test_metadata_replicas.py +++ b/tests/custom_cluster/test_metadata_replicas.py @@ -22,12 +22,14 @@ from tests.common.environ import specific_build_type_timeout from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.skip import ( SkipIfS3, + SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal) from tests.util.hive_utils import HiveDbWrapper @SkipIfS3.hive [email protected] @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/custom_cluster/test_parquet_max_page_header.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_parquet_max_page_header.py b/tests/custom_cluster/test_parquet_max_page_header.py index 913d883..59e87fb 100644 --- a/tests/custom_cluster/test_parquet_max_page_header.py +++ b/tests/custom_cluster/test_parquet_max_page_header.py @@ -24,7 +24,7 @@ import string import subprocess from tests.common.custom_cluster_test_suite import CustomClusterTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon class TestParquetMaxPageHeader(CustomClusterTestSuite): '''This tests large page headers in parquet files. Parquet page header size can @@ -92,6 +92,7 @@ class TestParquetMaxPageHeader(CustomClusterTestSuite): put.wait() @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfIsilon.hive @pytest.mark.execute_serially http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/custom_cluster/test_permanent_udfs.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_permanent_udfs.py b/tests/custom_cluster/test_permanent_udfs.py index 100b5ea..34a1e00 100644 --- a/tests/custom_cluster/test_permanent_udfs.py +++ b/tests/custom_cluster/test_permanent_udfs.py @@ -24,7 +24,7 @@ import subprocess from tempfile import mkdtemp from tests.common.custom_cluster_test_suite import CustomClusterTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_uncompressed_text_dimension from tests.util.filesystem_utils import get_fs_path @@ -162,6 +162,7 @@ class TestUdfPersistence(CustomClusterTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfLocal.hive @pytest.mark.execute_serially @@ -183,6 +184,7 @@ class TestUdfPersistence(CustomClusterTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfLocal.hive @pytest.mark.execute_serially @@ -246,6 +248,8 @@ class TestUdfPersistence(CustomClusterTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfABFS.hive + @SkipIfADLS.hive @SkipIfLocal.hive @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( @@ -307,6 +311,8 @@ class TestUdfPersistence(CustomClusterTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfABFS.hive + @SkipIfADLS.hive @SkipIfLocal.hive @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/data_errors/test_data_errors.py ---------------------------------------------------------------------- diff --git a/tests/data_errors/test_data_errors.py b/tests/data_errors/test_data_errors.py index d8c662a..52e921f 100644 --- a/tests/data_errors/test_data_errors.py +++ b/tests/data_errors/test_data_errors.py @@ -25,7 +25,7 @@ import subprocess from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIf, SkipIfS3, SkipIfADLS, SkipIfLocal +from tests.common.skip import SkipIf, SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal from tests.common.test_dimensions import create_exec_option_dimension class TestDataErrors(ImpalaTestSuite): @@ -106,6 +106,7 @@ class TestHdfsUnknownErrors(ImpalaTestSuite): assert "Safe mode is OFF" in output @SkipIfS3.qualified_path [email protected]_path @SkipIfADLS.qualified_path class TestHdfsScanNodeErrors(TestDataErrors): @classmethod @@ -124,6 +125,7 @@ class TestHdfsScanNodeErrors(TestDataErrors): self.run_test_case('DataErrorsTest/hdfs-scan-node-errors', vector) @SkipIfS3.qualified_path [email protected]_path @SkipIfADLS.qualified_path @SkipIfLocal.qualified_path class TestHdfsSeqScanNodeErrors(TestHdfsScanNodeErrors): @@ -139,6 +141,7 @@ class TestHdfsSeqScanNodeErrors(TestHdfsScanNodeErrors): @SkipIfS3.qualified_path [email protected]_path @SkipIfADLS.qualified_path class TestHdfsRcFileScanNodeErrors(TestHdfsScanNodeErrors): @classmethod http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/failure/test_failpoints.py ---------------------------------------------------------------------- diff --git a/tests/failure/test_failpoints.py b/tests/failure/test_failpoints.py index ef9ed07..d6cc1da 100644 --- a/tests/failure/test_failpoints.py +++ b/tests/failure/test_failpoints.py @@ -26,7 +26,8 @@ from time import sleep from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite, LOG -from tests.common.skip import SkipIf, SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIf, SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, \ + SkipIfLocal from tests.common.test_dimensions import create_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension @@ -53,6 +54,7 @@ QUERIES = [ @SkipIf.skip_hbase # -skip_hbase argument specified @SkipIfS3.hbase # S3: missing coverage: failures [email protected] @SkipIfADLS.hbase @SkipIfIsilon.hbase # ISILON: missing coverage: failures. @SkipIfLocal.hbase http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_compute_stats.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py index c3d182d..9adc8cd 100644 --- a/tests/metadata/test_compute_stats.py +++ b/tests/metadata/test_compute_stats.py @@ -20,7 +20,7 @@ from subprocess import check_call from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import ( create_exec_option_dimension, create_single_exec_option_dimension, @@ -71,6 +71,7 @@ class TestComputeStats(ImpalaTestSuite): self.cleanup_db("parquet") @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_ddl.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py index 40ec569..b510af1 100644 --- a/tests/metadata/test_ddl.py +++ b/tests/metadata/test_ddl.py @@ -25,7 +25,7 @@ from test_ddl_base import TestDdlBase from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import LOG from tests.common.parametrize import UniqueDatabase -from tests.common.skip import SkipIf, SkipIfADLS, SkipIfLocal +from tests.common.skip import SkipIf, SkipIfABFS, SkipIfADLS, SkipIfLocal from tests.common.test_dimensions import create_single_exec_option_dimension from tests.util.filesystem_utils import WAREHOUSE, IS_HDFS, IS_S3, IS_ADLS from tests.common.impala_cluster import ImpalaCluster @@ -33,6 +33,7 @@ from tests.common.impala_cluster import ImpalaCluster # Validates DDL statements (create, drop) class TestDdlStatements(TestDdlBase): @SkipIfLocal.hdfs_client + @SkipIfABFS.trash def test_drop_table_with_purge(self, unique_database): """This test checks if the table data is permamently deleted in DROP TABLE <tbl> PURGE queries""" @@ -425,6 +426,7 @@ class TestDdlStatements(TestDdlBase): use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector)) @SkipIfLocal.hdfs_client + @SkipIfABFS.trash def test_drop_partition_with_purge(self, vector, unique_database): """Verfies whether alter <tbl> drop partition purge actually skips trash""" self.client.execute( http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_hdfs_encryption.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_hdfs_encryption.py b/tests/metadata/test_hdfs_encryption.py index 27eaaee..fef4ac1 100644 --- a/tests/metadata/test_hdfs_encryption.py +++ b/tests/metadata/test_hdfs_encryption.py @@ -19,7 +19,7 @@ import getpass import pytest from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import ( create_single_exec_option_dimension, create_uncompressed_text_dimension) @@ -34,6 +34,7 @@ TMP_DIR = '/%s' % (PYWEBHDFS_TMP_DIR) @SkipIfS3.hdfs_encryption [email protected]_encryption @SkipIfADLS.hdfs_encryption @SkipIfIsilon.hdfs_encryption @SkipIfLocal.hdfs_encryption http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_hdfs_permissions.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_hdfs_permissions.py b/tests/metadata/test_hdfs_permissions.py index a49b56e..d495fc4 100644 --- a/tests/metadata/test_hdfs_permissions.py +++ b/tests/metadata/test_hdfs_permissions.py @@ -16,7 +16,7 @@ # under the License. from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal from tests.common.test_dimensions import ( create_single_exec_option_dimension, create_uncompressed_text_dimension) @@ -28,6 +28,7 @@ TBL_LOC = '%s/%s' % (WAREHOUSE, TEST_TBL) @SkipIfS3.hdfs_acls [email protected]_acls @SkipIfADLS.hdfs_acls @SkipIfLocal.hdfs_client class TestHdfsPermissions(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_hms_integration.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py index 3ac5cef..800799c 100644 --- a/tests/metadata/test_hms_integration.py +++ b/tests/metadata/test_hms_integration.py @@ -30,13 +30,14 @@ import string from subprocess import call from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import ( create_single_exec_option_dimension, create_uncompressed_text_dimension) from tests.util.hive_utils import HiveDbWrapper, HiveTableWrapper @SkipIfS3.hive [email protected] @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive @@ -84,6 +85,7 @@ class TestHmsIntegrationSanity(ImpalaTestSuite): assert 'test_tbl' in self.client.execute("show tables in hms_sanity_db").data @SkipIfS3.hive [email protected] @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_metadata_query_statements.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_metadata_query_statements.py b/tests/metadata/test_metadata_query_statements.py index bf21ecb..81fd60d 100644 --- a/tests/metadata/test_metadata_query_statements.py +++ b/tests/metadata/test_metadata_query_statements.py @@ -22,7 +22,7 @@ import re from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfIsilon, SkipIfS3, SkipIfADLS, SkipIfLocal +from tests.common.skip import SkipIfIsilon, SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal from tests.common.test_dimensions import ALL_NODES_ONLY from tests.common.test_dimensions import create_exec_option_dimension from tests.common.test_dimensions import create_uncompressed_text_dimension @@ -75,6 +75,7 @@ class TestMetadataQueryStatements(ImpalaTestSuite): # data doesn't reside in hdfs. @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfLocal.hive def test_describe_formatted(self, vector, unique_database): @@ -149,6 +150,7 @@ class TestMetadataQueryStatements(ImpalaTestSuite): self.client.execute(self.CREATE_DATA_SRC_STMT % (name,)) @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_partition_metadata.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_partition_metadata.py b/tests/metadata/test_partition_metadata.py index 302b0c9..280b7d0 100644 --- a/tests/metadata/test_partition_metadata.py +++ b/tests/metadata/test_partition_metadata.py @@ -17,7 +17,7 @@ import pytest from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import (create_single_exec_option_dimension, create_uncompressed_text_dimension) from tests.util.filesystem_utils import get_fs_path, WAREHOUSE, FILESYSTEM_PREFIX @@ -89,6 +89,7 @@ class TestPartitionMetadata(ImpalaTestSuite): assert data.split('\t') == ['6', '9'] @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_refresh_partition.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_refresh_partition.py b/tests/metadata/test_refresh_partition.py index 2c31b93..4b2c67b 100644 --- a/tests/metadata/test_refresh_partition.py +++ b/tests/metadata/test_refresh_partition.py @@ -17,11 +17,12 @@ from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_dimensions import create_uncompressed_text_dimension -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.util.filesystem_utils import get_fs_path @SkipIfS3.hive [email protected] @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/metadata/test_views_compatibility.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_views_compatibility.py b/tests/metadata/test_views_compatibility.py index ac208d7..a0103e2 100644 --- a/tests/metadata/test_views_compatibility.py +++ b/tests/metadata/test_views_compatibility.py @@ -23,7 +23,7 @@ from subprocess import call from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_uncompressed_text_dimension from tests.util.test_file_parser import QueryTestSectionReader @@ -47,6 +47,7 @@ from tests.util.test_file_parser import QueryTestSectionReader # Missing Coverage: Views created by Hive and Impala being visible and queryble by each # other on non hdfs storage. @SkipIfS3.hive [email protected] @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_compressed_formats.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_compressed_formats.py b/tests/query_test/test_compressed_formats.py index 2896632..97a8782 100644 --- a/tests/query_test/test_compressed_formats.py +++ b/tests/query_test/test_compressed_formats.py @@ -23,7 +23,7 @@ from os.path import join from subprocess import call from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension from tests.util.filesystem_utils import get_fs_path @@ -40,6 +40,7 @@ compression_formats = [ # Missing Coverage: Compressed data written by Hive is queriable by Impala on a non-hdfs # filesystem. @SkipIfS3.hive [email protected] @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_hdfs_caching.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py index 16940a0..176fe09 100644 --- a/tests/query_test/test_hdfs_caching.py +++ b/tests/query_test/test_hdfs_caching.py @@ -25,13 +25,15 @@ from subprocess import check_call from tests.common.environ import specific_build_type_timeout from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite, LOG -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal, SkipIfEC +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, \ + SkipIfLocal, SkipIfEC from tests.common.test_dimensions import create_single_exec_option_dimension from tests.util.filesystem_utils import get_fs_path from tests.util.shell_util import exec_process # End to end test that hdfs caching is working. @SkipIfS3.caching # S3: missing coverage: verify SET CACHED gives error [email protected] @SkipIfADLS.caching @SkipIfIsilon.caching @SkipIfLocal.caching @@ -109,6 +111,7 @@ class TestHdfsCaching(ImpalaTestSuite): # run as a part of exhaustive tests which require the workload to be 'functional-query'. # TODO: Move this to TestHdfsCaching once we make exhaustive tests run for other workloads @SkipIfS3.caching [email protected] @SkipIfADLS.caching @SkipIfIsilon.caching @SkipIfLocal.caching @@ -118,6 +121,7 @@ class TestHdfsCachingFallbackPath(ImpalaTestSuite): return 'functional-query' @SkipIfS3.hdfs_encryption + @SkipIfABFS.hdfs_encryption @SkipIfADLS.hdfs_encryption @SkipIfIsilon.hdfs_encryption @SkipIfLocal.hdfs_encryption @@ -169,6 +173,7 @@ class TestHdfsCachingFallbackPath(ImpalaTestSuite): @SkipIfS3.caching [email protected] @SkipIfADLS.caching @SkipIfIsilon.caching @SkipIfLocal.caching http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_insert.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_insert.py b/tests/query_test/test_insert.py index 46d5249..8ded894 100644 --- a/tests/query_test/test_insert.py +++ b/tests/query_test/test_insert.py @@ -22,7 +22,7 @@ import pytest from testdata.common import widetable from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfEC, SkipIfLocal, SkipIfNotHdfsMinicluster +from tests.common.skip import SkipIfABFS, SkipIfEC, SkipIfLocal, SkipIfNotHdfsMinicluster from tests.common.test_dimensions import ( create_exec_option_dimension, create_uncompressed_text_dimension) @@ -112,6 +112,8 @@ class TestInsertQueries(ImpalaTestSuite): @pytest.mark.execute_serially # Erasure coding doesn't respect memory limit @SkipIfEC.fix_later + # ABFS partition names cannot end in periods + @SkipIfABFS.jira(reason="HADOOP-15860") def test_insert(self, vector): if (vector.get_value('table_format').file_format == 'parquet'): vector.get_value('exec_option')['COMPRESSION_CODEC'] = \ http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_insert_behaviour.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_insert_behaviour.py b/tests/query_test/test_insert_behaviour.py index e42d33a..bd0dd4e 100644 --- a/tests/query_test/test_insert_behaviour.py +++ b/tests/query_test/test_insert_behaviour.py @@ -23,7 +23,7 @@ import re from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.parametrize import UniqueDatabase -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.util.filesystem_utils import WAREHOUSE, get_fs_path, IS_S3 @SkipIfLocal.hdfs_client @@ -132,6 +132,7 @@ class TestInsertBehaviour(ImpalaTestSuite): assert len(self.filesystem_client.ls(part_dir)) == 1 @SkipIfS3.hdfs_acls + @SkipIfABFS.hdfs_acls @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls @pytest.mark.xfail(run=False, reason="Fails intermittently on test clusters") @@ -193,6 +194,7 @@ class TestInsertBehaviour(ImpalaTestSuite): check_has_acls("p1=1/p2=2/p3=30", "default:group:new_leaf_group:-w-") @SkipIfS3.hdfs_acls + @SkipIfABFS.hdfs_acls @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_insert_file_permissions(self, unique_database): @@ -244,6 +246,7 @@ class TestInsertBehaviour(ImpalaTestSuite): self.execute_query_expect_success(self.client, insert_query) @SkipIfS3.hdfs_acls + @SkipIfABFS.hdfs_acls @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_mixed_partition_permissions(self, unique_database): @@ -324,6 +327,7 @@ class TestInsertBehaviour(ImpalaTestSuite): load_data(self.execute_query_expect_success, "added_part") @SkipIfS3.hdfs_acls + @SkipIfABFS.hdfs_acls @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_readonly_table_dir(self, unique_database): @@ -355,6 +359,7 @@ class TestInsertBehaviour(ImpalaTestSuite): assert re.search(r'Impala does not have WRITE access.*' + table_path, str(err)) @SkipIfS3.hdfs_acls + @SkipIfABFS.hdfs_acls @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_insert_acl_permissions(self, unique_database): @@ -433,6 +438,7 @@ class TestInsertBehaviour(ImpalaTestSuite): self.execute_query_expect_success(self.client, insert_query) @SkipIfS3.hdfs_acls + @SkipIfABFS.hdfs_acls @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_load_permissions(self, unique_database): @@ -557,6 +563,7 @@ class TestInsertBehaviour(ImpalaTestSuite): self.execute_query_expect_failure(self.client, insert_query) @SkipIfS3.hdfs_acls + @SkipIfABFS.hdfs_acls @SkipIfADLS.hdfs_acls @SkipIfIsilon.hdfs_acls def test_multiple_group_acls(self, unique_database): http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_insert_parquet.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py index 4523b1c..0bf28fe 100644 --- a/tests/query_test/test_insert_parquet.py +++ b/tests/query_test/test_insert_parquet.py @@ -29,7 +29,8 @@ from parquet.ttypes import ColumnOrder, SortingColumn, TypeDefinedOrder from tests.common.environ import impalad_basedir from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.parametrize import UniqueDatabase -from tests.common.skip import SkipIfEC, SkipIfIsilon, SkipIfLocal, SkipIfS3, SkipIfADLS +from tests.common.skip import SkipIfEC, SkipIfIsilon, SkipIfLocal, SkipIfS3, SkipIfABFS, \ + SkipIfADLS from tests.common.test_dimensions import create_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension from tests.util.filesystem_utils import get_fs_path @@ -360,6 +361,7 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite): @SkipIfIsilon.hive @SkipIfLocal.hive @SkipIfS3.hive [email protected] @SkipIfADLS.hive # TODO: Should we move this to test_parquet_stats.py? class TestHdfsParquetTableStatsWriter(ImpalaTestSuite): http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_join_queries.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_join_queries.py b/tests/query_test/test_join_queries.py index 30c659c..51020c7 100644 --- a/tests/query_test/test_join_queries.py +++ b/tests/query_test/test_join_queries.py @@ -26,6 +26,7 @@ from tests.common.skip import ( SkipIfIsilon, SkipIfLocal, SkipIfS3, + SkipIfABFS, SkipIfADLS) from tests.common.test_vector import ImpalaTestDimension @@ -62,6 +63,7 @@ class TestJoinQueries(ImpalaTestSuite): self.run_test_case('QueryTest/single-node-joins-with-limits-exhaustive', new_vector) @SkipIfS3.hbase + @SkipIfABFS.hbase @SkipIfADLS.hbase @SkipIfIsilon.hbase @SkipIf.skip_hbase http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_nested_types.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py index 0603745..4fccd9d 100644 --- a/tests/query_test/test_nested_types.py +++ b/tests/query_test/test_nested_types.py @@ -26,6 +26,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import ( SkipIfIsilon, SkipIfS3, + SkipIfABFS, SkipIfADLS, SkipIfEC, SkipIfLocal, @@ -95,6 +96,7 @@ class TestNestedTypes(ImpalaTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfLocal.hive def test_upper_case_field_name(self, unique_database): @@ -568,6 +570,7 @@ class TestMaxNestingDepth(ImpalaTestSuite): @SkipIfIsilon.hive @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfLocal.hive def test_load_hive_table(self, vector, unique_database): http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_observability.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py index 7e5c757..c351e02 100644 --- a/tests/query_test/test_observability.py +++ b/tests/query_test/test_observability.py @@ -17,7 +17,7 @@ from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.util.filesystem_utils import IS_EC import logging import pytest @@ -63,6 +63,7 @@ class TestObservability(ImpalaTestSuite): @SkipIfS3.hbase @SkipIfLocal.hbase @SkipIfIsilon.hbase + @SkipIfABFS.hbase @SkipIfADLS.hbase def test_scan_summary(self): """IMPALA-4499: Checks that the exec summary for scans show the table name.""" http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_partitioning.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_partitioning.py b/tests/query_test/test_partitioning.py index 267d9e0..bb554ed 100644 --- a/tests/query_test/test_partitioning.py +++ b/tests/query_test/test_partitioning.py @@ -20,7 +20,7 @@ import pytest from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal from tests.common.test_dimensions import create_single_exec_option_dimension # Tests to validate HDFS partitioning. @@ -47,6 +47,7 @@ class TestPartitioning(ImpalaTestSuite): # Missing Coverage: Impala deals with boolean partitions created by Hive on a non-hdfs # filesystem. @SkipIfS3.hive + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_resource_limits.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_resource_limits.py b/tests/query_test/test_resource_limits.py index 58ae4a5..265421d 100644 --- a/tests/query_test/test_resource_limits.py +++ b/tests/query_test/test_resource_limits.py @@ -16,7 +16,8 @@ # under the License. from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfEC, SkipIfKudu, SkipIfLocal, SkipIfS3 +from tests.common.skip import SkipIfEC, SkipIfKudu, SkipIfLocal, SkipIfS3, SkipIfABFS, \ + SkipIfADLS from tests.common.test_dimensions import create_parquet_dimension @@ -45,6 +46,8 @@ class TestResourceLimits(ImpalaTestSuite): self.run_test_case('QueryTest/query-resource-limits', vector) @SkipIfS3.hbase + @SkipIfADLS.hbase + @SkipIfABFS.hbase @SkipIfLocal.multiple_impalad def test_resource_limits_hbase(self, vector): self.run_test_case('QueryTest/query-resource-limits-hbase', vector) http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/query_test/test_scanners.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py index 0000867..b75c3cc 100644 --- a/tests/query_test/test_scanners.py +++ b/tests/query_test/test_scanners.py @@ -35,6 +35,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite, LOG from tests.common.skip import ( SkipIf, SkipIfS3, + SkipIfABFS, SkipIfADLS, SkipIfEC, SkipIfIsilon, @@ -351,6 +352,7 @@ class TestParquet(ImpalaTestSuite): assert len(result.data) == 1 assert "4294967294" in result.data + @SkipIfABFS.hive @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive @@ -473,6 +475,7 @@ class TestParquet(ImpalaTestSuite): vector, unique_database) @SkipIfS3.hdfs_block_size + @SkipIfABFS.hdfs_block_size @SkipIfADLS.hdfs_block_size @SkipIfIsilon.hdfs_block_size @SkipIfLocal.multiple_impalad @@ -530,6 +533,7 @@ class TestParquet(ImpalaTestSuite): assert total == num_scanners_with_no_reads @SkipIfS3.hdfs_block_size + @SkipIfABFS.hdfs_block_size @SkipIfADLS.hdfs_block_size @SkipIfIsilon.hdfs_block_size @SkipIfLocal.multiple_impalad @@ -545,6 +549,7 @@ class TestParquet(ImpalaTestSuite): self._multiple_blocks_helper(table_name, 40000, ranges_per_node=2) @SkipIfS3.hdfs_block_size + @SkipIfABFS.hdfs_block_size @SkipIfADLS.hdfs_block_size @SkipIfIsilon.hdfs_block_size @SkipIfLocal.multiple_impalad @@ -906,6 +911,7 @@ class TestTextScanRangeLengths(ImpalaTestSuite): # Missing Coverage: No coverage for truncated files errors or scans. @SkipIfS3.hive [email protected] @SkipIfADLS.hive @SkipIfIsilon.hive @SkipIfLocal.hive @@ -984,6 +990,7 @@ class TestOrc(ImpalaTestSuite): lambda v: v.get_value('table_format').file_format == 'orc') @SkipIfS3.hdfs_block_size + @SkipIfABFS.hdfs_block_size @SkipIfADLS.hdfs_block_size @SkipIfEC.fix_later @SkipIfIsilon.hdfs_block_size http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/stress/test_ddl_stress.py ---------------------------------------------------------------------- diff --git a/tests/stress/test_ddl_stress.py b/tests/stress/test_ddl_stress.py index b46f201..ea077a6 100644 --- a/tests/stress/test_ddl_stress.py +++ b/tests/stress/test_ddl_stress.py @@ -18,7 +18,7 @@ import pytest from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal +from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal # Number of tables to create per thread NUM_TBLS_PER_THREAD = 10 @@ -48,6 +48,7 @@ class TestDdlStress(ImpalaTestSuite): v.get_value('table_format').compression_codec == 'none')) @SkipIfS3.caching + @SkipIfABFS.caching @SkipIfADLS.caching @SkipIfIsilon.caching @SkipIfLocal.caching http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/util/abfs_util.py ---------------------------------------------------------------------- diff --git a/tests/util/abfs_util.py b/tests/util/abfs_util.py new file mode 100644 index 0000000..8567888 --- /dev/null +++ b/tests/util/abfs_util.py @@ -0,0 +1,113 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ABFS access utilities +# +# This file uses the Hadoop CLI to provide simple functions to the Impala test +# suite to whatever the default filesystem is + +import re +import subprocess +import tempfile + +from tests.util.filesystem_base import BaseFilesystem + + +class ABFSClient(BaseFilesystem): + + def _hadoop_fs_shell(self, command): + hadoop_command = ['hadoop', 'fs'] + command + process = subprocess.Popen(hadoop_command, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + status = process.returncode + return (status, stdout, stderr) + + def create_file(self, path, file_data, overwrite=True): + fixed_path = self._normalize_path(path) + if not overwrite and self.exists(fixed_path): return False + f = tempfile.NamedTemporaryFile(delete=False) + tmp_path = f.name + f.write(file_data) + f.close() + (status, stdout, stderr) = \ + self._hadoop_fs_shell(['-put', tmp_path, fixed_path]) + return status == 0 + + def make_dir(self, path, permission=None): + fixed_path = self._normalize_path(path) + self._hadoop_fs_shell(['-mkdir', '-p', fixed_path]) + return True + + def copy(self, src, dst): + fixed_src = self._normalize_path(src) + fixed_dst = self._normalize_path(dst) + (status, stdout, stderr) = \ + self._hadoop_fs_shell(['-cp', fixed_src, fixed_dst]) + assert status == 0, \ + 'ABFS copy failed: ' + stderr + "; " + stdout + assert self.exists(dst), \ + 'ABFS copy failed: Destination file {dst} does not exist'\ + .format(dst=dst) + + def _inner_ls(self, path): + fixed_path = self._normalize_path(path) + (status, stdout, stderr) = self._hadoop_fs_shell(['-ls', fixed_path]) + # Trim the "Found X items" line and trailing new-line + entries = stdout.split("\n")[1:-1] + files = [] + for entry in entries: + fields = re.split(" +", entry) + files.append({ + 'name': fields[7], + 'length': int(fields[4]), + 'mode': fields[0] + }) + return files + + def ls(self, path): + fixed_path = self._normalize_path(path) + files = [] + for f in self._inner_ls(fixed_path): + fname = f['name'].split("/")[-1] + if not fname == '': + files += [fname] + return files + + def exists(self, path): + fixed_path = self._normalize_path(path) + (status, stdout, stderr) = self._hadoop_fs_shell(['-test', '-e', fixed_path]) + return status == 0 + + def delete_file_dir(self, path, recursive=False): + fixed_path = self._normalize_path(path) + rm_command = ['-rm', fixed_path] + if recursive: + rm_command = ['-rm', '-r', fixed_path] + (status, stdout, stderr) = self._hadoop_fs_shell(rm_command) + return status == 0 + + def get_all_file_sizes(self, path): + """Returns a list of integers which are all the file sizes of files found + under 'path'.""" + fixed_path = self._normalize_path(path) + return [f['length'] for f in + self._inner_ls(fixed_path) if f['mode'][0] == "-"] + + def _normalize_path(self, path): + # Paths passed in may lack a leading slash + return path if path.startswith('/') else '/' + path http://git-wip-us.apache.org/repos/asf/impala/blob/7a022cf3/tests/util/filesystem_utils.py ---------------------------------------------------------------------- diff --git a/tests/util/filesystem_utils.py b/tests/util/filesystem_utils.py index aef08a3..a7b0809 100644 --- a/tests/util/filesystem_utils.py +++ b/tests/util/filesystem_utils.py @@ -30,6 +30,7 @@ IS_ISILON = FILESYSTEM == "isilon" IS_LOCAL = FILESYSTEM == "local" IS_HDFS = FILESYSTEM == "hdfs" IS_ADLS = FILESYSTEM == "adls" +IS_ABFS = FILESYSTEM == "abfs" IS_EC = os.getenv("ERASURE_CODING") == "true" # This condition satisfies both the states where one can assume a default fs # - The environment variable is set to an empty string. @@ -44,7 +45,9 @@ ISILON_WEBHDFS_PORT = 8082 # S3 specific values S3_BUCKET_NAME = os.getenv("S3_BUCKET") -# ADLS specific values +# ADLS / ABFS specific values +ABFS_ACCOUNT_NAME = os.getenv("azure_storage_account_name") +ABFS_CONTAINER_NAME = os.getenv("azure_storage_container_name") ADLS_STORE_NAME = os.getenv("azure_data_lake_store_name") ADLS_CLIENT_ID = os.getenv("azure_client_id") ADLS_TENANT_ID = os.getenv("azure_tenant_id")
