This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8292e4afdd4b6f5fcfbf2291f97c988c07e1a421
Author: Tamas Mate <[email protected]>
AuthorDate: Thu Jan 26 12:32:32 2023 +0100

    IMPALA-11864: Iceberg LOAD DATA should not load S3 hidden files
    
    Loading data from S3 did not skip hidden files because the
    FileSystemUtil.listFiles() call was returning a RemoteIterator, which
    compared to RecursingIterator does not filter the hidden files. This
    would make a load fail because the hidden files likely have invalid
    magic string.
    
    This commit adds an extra condition to skip hidden files when creating
    the CREATE subquery.
    
    Testing:
     - Added E2E test
     - Ran E2E test on S3 build
    
    Change-Id: Iffd179383c2bb2529f6f9b5f8bf5cba5f3553652
    Reviewed-on: http://gerrit.cloudera.org:8080/19441
    Reviewed-by: Daniel Becker <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Noemi Pap-Takacs <[email protected]>
    Reviewed-by: Zoltan Borok-Nagy <[email protected]>
---
 .../java/org/apache/impala/analysis/LoadDataStmt.java    |  3 ++-
 .../functional-query/queries/QueryTest/iceberg-load.test | 16 +++++++++++++++-
 tests/query_test/test_iceberg.py                         |  8 ++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java
index d23d1951d..479ba9142 100644
--- a/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/LoadDataStmt.java
@@ -255,7 +255,8 @@ public class LoadDataStmt extends StatementBase {
             sourcePath, true, "");
         while (fileStatuses.hasNext()) {
           FileStatus fileStatus = fileStatuses.next();
-          if (fileStatus.isFile()) {
+          String fileName = fileStatus.getPath().getName();
+          if (fileStatus.isFile() && !FileSystemUtil.isHiddenFile(fileName)) {
             filePathForLike = fileStatus.getPath();
             break;
           }
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
index c5ad08e6a..14f19d77f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-load.test
@@ -122,7 +122,6 @@ row_regex:.*AnalysisException: Target table .* has fewer 
columns \(1\) than the
 ====
 ---- QUERY
 # Test 9: Partitioned Iceberg table
----- QUERY
 create table test_iceberg_load_partitioned like 
functional_parquet.iceberg_partitioned
 stored as iceberg;
 ====
@@ -139,4 +138,19 @@ select count(*) from test_iceberg_load_partitioned;
 1
 ---- TYPES
 BIGINT
+====
+---- QUERY
+# Test 10: hidden files should be skipped and the one data file should be 
loaded
+load data inpath '/tmp/$DATABASE/hidden/' into table 
test_iceberg_load_partitioned;
+---- RESULTS
+'Loaded 1 file(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+select count(*) from test_iceberg_load_partitioned;
+---- RESULTS
+2
+---- TYPES
+BIGINT
 ====
\ No newline at end of file
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 9ae75885f..0ed9a6d8f 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -885,6 +885,14 @@ class TestIcebergTable(IcebergTestSuite):
     DST_DIR = "/tmp/" + unique_database + "/partitioned/"
     self.filesystem_client.make_dir(DST_DIR, permission=777)
     self.filesystem_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
+    # Test 10 init: hidden files
+    DST_DIR = "/tmp/" + unique_database + "/hidden/"
+    self.filesystem_client.make_dir(DST_DIR, permission=777)
+    self.filesystem_client.create_file(DST_DIR + "_hidden.1", "Test data 123")
+    self.filesystem_client.create_file(DST_DIR + "_hidden_2.1", "Test data 
123")
+    self.filesystem_client.create_file(DST_DIR + ".hidden_3", "Test data 123")
+    self.filesystem_client.create_file(DST_DIR + ".hidden_4.1", "Test data 
123")
+    self.filesystem_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
 
     # Init test table
     create_iceberg_table_from_directory(self.client, unique_database,

Reply via email to