This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ebe3749996 [fix](tvf)support s3,local compress_type and append 
regression test (#24055)
ebe3749996 is described below

commit ebe37499966f279423305c1a36b89b2bc917de3c
Author: daidai <2017501...@qq.com>
AuthorDate: Wed Sep 13 00:32:59 2023 +0800

    [fix](tvf)support s3,local compress_type and append regression test (#24055)
    
    support s3,local compress_type and append regression test.
---
 .../ExternalFileTableValuedFunction.java           |  12 +-
 .../tablefunction/HdfsTableValuedFunction.java     |  24 ++-
 .../HttpStreamTableValuedFunction.java             |  19 ++-
 .../tablefunction/LocalTableValuedFunction.java    |  23 ++-
 .../doris/tablefunction/S3TableValuedFunction.java |  76 ++++-----
 .../tvf/compress/test_tvf.csv.bz2                  | Bin 0 -> 60731 bytes
 .../tvf/compress/test_tvf.csv.deflate              | Bin 0 -> 74687 bytes
 .../external_table_p2/tvf/compress/test_tvf.csv.gz | Bin 0 -> 74828 bytes
 .../tvf/compress/test_tvf.csv.lz4                  | Bin 0 -> 112626 bytes
 .../tvf/compress/test_tvf.csv.snappy               | Bin 0 -> 107203 bytes
 .../tvf/test_local_tvf_compression.out             | 150 +++++++++++++++++
 .../tvf/test_path_partition_keys.out               |  68 ++++++++
 .../tvf/test_path_partition_keys/dt1=cyw/a.csv     |   3 +
 .../tvf/test_path_partition_keys/dt1=cyw/b.csv     |   3 +
 .../tvf/test_path_partition_keys/dt1=hello/c.csv   |   3 +
 .../test_path_partition_keys/dt2=two/dt1=cyw/a.csv |   3 +
 .../test_path_partition_keys/dt2=two/dt1=cyw/b.csv |   3 +
 .../dt2=two/dt1=hello/c.csv                        |   3 +
 .../tvf/test_s3_tvf_compression.out                | 144 +++++++++++++++++
 .../tvf/test_local_tvf_compression.groovy          | 127 +++++++++++++++
 .../tvf/test_path_partition_keys.groovy            | 178 +++++++++++++++++++++
 .../tvf/test_s3_tvf_compression.groovy             | 171 ++++++++++++++++++++
 22 files changed, 937 insertions(+), 73 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index b866ac860e..7811808419 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -123,6 +123,8 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
             .add(TRIM_DOUBLE_QUOTES)
             .add(SKIP_LINES)
             .add(CSV_SCHEMA)
+            .add(COMPRESS_TYPE)
+            .add(PATH_PARTITION_KEYS)
             .build();
 
     // Columns got from file and path(if has)
@@ -135,6 +137,8 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
 
     protected List<TBrokerFileStatus> fileStatuses = Lists.newArrayList();
     protected Map<String, String> locationProperties;
+    protected String filePath;
+
 
     private TFileFormatType fileFormatType;
     private TFileCompressType compressionType;
@@ -198,8 +202,9 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
         }
     }
 
+    //The keys in the passed validParams map need to be lowercase.
     protected void parseProperties(Map<String, String> validParams) throws 
AnalysisException {
-        String formatString = validParams.getOrDefault(FORMAT, 
"").toLowerCase();
+        String formatString = validParams.getOrDefault(FORMAT, "");
         switch (formatString) {
             case "csv":
                 this.fileFormatType = TFileFormatType.FORMAT_CSV_PLAIN;
@@ -233,11 +238,6 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
                 throw new AnalysisException("format:" + formatString + " is 
not supported.");
         }
 
-        if (getTFileType() == TFileType.FILE_STREAM && 
(formatString.equals("parquet")
-                || formatString.equals("avro")
-                || formatString.equals("orc"))) {
-            throw new AnalysisException("current http_stream does not yet 
support parquet, avro and orc");
-        }
         columnSeparator = validParams.getOrDefault(COLUMN_SEPARATOR, 
DEFAULT_COLUMN_SEPARATOR);
         lineDelimiter = validParams.getOrDefault(LINE_DELIMITER, 
DEFAULT_LINE_DELIMITER);
         jsonRoot = validParams.getOrDefault(JSON_ROOT, "");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
index 019a5cb739..385d9d11ad 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
@@ -56,26 +56,23 @@ public class HdfsTableValuedFunction extends 
ExternalFileTableValuedFunction {
             .build();
 
     private URI hdfsUri;
-    private String filePath;
 
     public HdfsTableValuedFunction(Map<String, String> params) throws 
AnalysisException {
         Map<String, String> fileParams = new CaseInsensitiveMap();
         locationProperties = Maps.newHashMap();
         for (String key : params.keySet()) {
-            if (FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) {
-                fileParams.put(key, params.get(key));
-            } else {
+            String lowerKey = key.toLowerCase();
+            if (FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
+                fileParams.put(lowerKey, params.get(key));
+            } else if (LOCATION_PROPERTIES.contains(lowerKey)) {
+                locationProperties.put(lowerKey, params.get(key));
+            } else if (HdfsResource.HADOOP_FS_NAME.equalsIgnoreCase(key)) {
                 // because HADOOP_FS_NAME contains upper and lower case
-                if (HdfsResource.HADOOP_FS_NAME.equalsIgnoreCase(key)) {
-                    locationProperties.put(HdfsResource.HADOOP_FS_NAME, 
params.get(key));
-                } else {
-                    locationProperties.put(key, params.get(key));
-                }
+                locationProperties.put(HdfsResource.HADOOP_FS_NAME, 
params.get(key));
+            } else {
+                throw new AnalysisException(key + " is invalid property");
             }
         }
-        if (params.containsKey(PATH_PARTITION_KEYS)) {
-            fileParams.put(PATH_PARTITION_KEYS, 
params.get(PATH_PARTITION_KEYS));
-        }
 
         if (!locationProperties.containsKey(HDFS_URI)) {
             throw new AnalysisException(String.format("Configuration '%s' is 
required.", HDFS_URI));
@@ -84,7 +81,8 @@ public class HdfsTableValuedFunction extends 
ExternalFileTableValuedFunction {
         hdfsUri = URI.create(locationProperties.get(HDFS_URI));
         filePath = locationProperties.get(HdfsResource.HADOOP_FS_NAME) + 
hdfsUri.getPath();
 
-        parseProperties(fileParams);
+        super.parseProperties(fileParams);
+
         parseFile();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java
index 8a4c5cb2d8..bb32c82653 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java
@@ -22,10 +22,10 @@ import org.apache.doris.analysis.StorageBackend.StorageType;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.thrift.TFileType;
 
-import org.apache.commons.collections.map.CaseInsensitiveMap;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.util.HashMap;
 import java.util.Map;
 
 /**
@@ -37,14 +37,23 @@ public class HttpStreamTableValuedFunction extends 
ExternalFileTableValuedFuncti
     public static final String NAME = "http_stream";
 
     public HttpStreamTableValuedFunction(Map<String, String> params) throws 
AnalysisException {
-        Map<String, String> validParams = new CaseInsensitiveMap();
+        Map<String, String> fileParams = new HashMap<>();
         for (String key : params.keySet()) {
-            if (!FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) {
+            String lowerKey = key.toLowerCase();
+            if (!FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
                 throw new AnalysisException(key + " is invalid property");
             }
-            validParams.put(key, params.get(key));
+            fileParams.put(lowerKey, params.get(key));
         }
-        parseProperties(validParams);
+
+        String formatString = fileParams.getOrDefault(FORMAT, "");
+        if (formatString.equals("parquet")
+                || formatString.equals("avro")
+                || formatString.equals("orc")) {
+            throw new AnalysisException("current http_stream does not yet 
support parquet, avro and orc");
+        }
+
+        super.parseProperties(fileParams);
     }
 
     // =========== implement abstract methods of 
ExternalFileTableValuedFunction =================
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java
index f6693317ba..129c3f930c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java
@@ -56,32 +56,31 @@ public class LocalTableValuedFunction extends 
ExternalFileTableValuedFunction {
             .add(BACKEND_ID)
             .build();
 
-    private String filePath;
     private long backendId;
 
     public LocalTableValuedFunction(Map<String, String> params) throws 
AnalysisException {
-        Map<String, String> fileFormatParams = new CaseInsensitiveMap();
+        Map<String, String> fileParams = new CaseInsensitiveMap();
         locationProperties = Maps.newHashMap();
         for (String key : params.keySet()) {
-            if (FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) {
-                fileFormatParams.put(key, params.get(key));
-            } else if (LOCATION_PROPERTIES.contains(key.toLowerCase())) {
-                locationProperties.put(key.toLowerCase(), params.get(key));
+            String lowerKey = key.toLowerCase();
+            if (FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
+                fileParams.put(lowerKey, params.get(key));
+            } else if (LOCATION_PROPERTIES.contains(lowerKey)) {
+                locationProperties.put(lowerKey, params.get(key));
             } else {
                 throw new AnalysisException(key + " is invalid property");
             }
         }
 
-        if (!locationProperties.containsKey(FILE_PATH)) {
-            throw new AnalysisException(String.format("Configuration '%s' is 
required.", FILE_PATH));
-        }
-        if (!locationProperties.containsKey(BACKEND_ID)) {
-            throw new AnalysisException(String.format("Configuration '%s' is 
required.", BACKEND_ID));
+        for (String key : LOCATION_PROPERTIES) {
+            if (!locationProperties.containsKey(key)) {
+                throw new AnalysisException(String.format("Configuration '%s' 
is required.", key));
+            }
         }
 
         filePath = locationProperties.get(FILE_PATH);
         backendId = Long.parseLong(locationProperties.get(BACKEND_ID));
-        parseProperties(fileFormatParams);
+        super.parseProperties(fileParams);
 
         getFileListFromBackend();
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
index 300c51c7ad..74c8ae5c4a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
@@ -57,7 +57,7 @@ public class S3TableValuedFunction extends 
ExternalFileTableValuedFunction {
             ImmutableSet.of(S3Properties.SESSION_TOKEN, 
PropertyConverter.USE_PATH_STYLE, S3Properties.REGION,
                     PATH_PARTITION_KEYS);
 
-    private static final ImmutableSet<String> PROPERTIES_SET = 
ImmutableSet.<String>builder()
+    private static final ImmutableSet<String> LOCATION_PROPERTIES = 
ImmutableSet.<String>builder()
             .add(S3_URI)
             .add(S3Properties.ENDPOINT)
             .addAll(DEPRECATED_KEYS)
@@ -70,31 +70,56 @@ public class S3TableValuedFunction extends 
ExternalFileTableValuedFunction {
     private String virtualBucket = "";
 
     public S3TableValuedFunction(Map<String, String> params) throws 
AnalysisException {
-        Map<String, String> tvfParams = getValidParams(params);
-        forceVirtualHosted = isVirtualHosted(tvfParams);
-        s3uri = getS3Uri(tvfParams);
+
+        Map<String, String> fileParams = new HashMap<>();
+        for (Map.Entry<String, String> entry : params.entrySet()) {
+            String key = entry.getKey();
+            String lowerKey = key.toLowerCase();
+            if (!LOCATION_PROPERTIES.contains(lowerKey) && 
!FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
+                throw new AnalysisException("Invalid property: " + key);
+            }
+            if (DEPRECATED_KEYS.contains(lowerKey)) {
+                lowerKey = S3Properties.S3_PREFIX + lowerKey;
+            }
+            fileParams.put(lowerKey, entry.getValue());
+        }
+
+        if (!fileParams.containsKey(S3_URI)) {
+            throw new AnalysisException("Missing required property: " + 
S3_URI);
+        }
+
+        forceVirtualHosted = isVirtualHosted(fileParams);
+        s3uri = getS3Uri(fileParams);
         final String endpoint = forceVirtualHosted
                 ? getEndpointAndSetVirtualBucket(params)
                 : s3uri.getBucketScheme();
-        if (!tvfParams.containsKey(S3Properties.REGION)) {
+        if (!fileParams.containsKey(S3Properties.REGION)) {
             String region = S3Properties.getRegionOfEndpoint(endpoint);
-            tvfParams.put(S3Properties.REGION, region);
+            fileParams.put(S3Properties.REGION, region);
         }
         CloudCredentialWithEndpoint credential = new 
CloudCredentialWithEndpoint(endpoint,
-                tvfParams.get(S3Properties.REGION),
-                tvfParams.get(S3Properties.ACCESS_KEY),
-                tvfParams.get(S3Properties.SECRET_KEY));
-        if (tvfParams.containsKey(S3Properties.SESSION_TOKEN)) {
-            
credential.setSessionToken(tvfParams.get(S3Properties.SESSION_TOKEN));
+                fileParams.get(S3Properties.REGION),
+                fileParams.get(S3Properties.ACCESS_KEY),
+                fileParams.get(S3Properties.SECRET_KEY));
+        if (fileParams.containsKey(S3Properties.SESSION_TOKEN)) {
+            
credential.setSessionToken(fileParams.get(S3Properties.SESSION_TOKEN));
         }
 
         // set S3 location properties
         // these five properties is necessary, no one can be lost.
         locationProperties = S3Properties.credentialToMap(credential);
-        String usePathStyle = 
tvfParams.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false");
+        String usePathStyle = 
fileParams.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false");
         locationProperties.put(PropertyConverter.USE_PATH_STYLE, usePathStyle);
 
-        parseProperties(tvfParams);
+        super.parseProperties(fileParams);
+
+        if (forceVirtualHosted) {
+            filePath = NAME + S3URI.SCHEME_DELIM + virtualBucket + 
S3URI.PATH_DELIM
+                + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey();
+        } else {
+            filePath = NAME + S3URI.SCHEME_DELIM + s3uri.getKey();
+        }
+
         if (FeConstants.runningUnitTest) {
             // Just check
             FileSystemFactory.getS3FileSystem(locationProperties);
@@ -103,25 +128,6 @@ public class S3TableValuedFunction extends 
ExternalFileTableValuedFunction {
         }
     }
 
-    private static Map<String, String> getValidParams(Map<String, String> 
params) throws AnalysisException {
-        Map<String, String> validParams = new HashMap<>();
-        for (Map.Entry<String, String> entry : params.entrySet()) {
-            String key = entry.getKey();
-            String lowerKey = key.toLowerCase();
-            if (!PROPERTIES_SET.contains(lowerKey) && 
!FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
-                throw new AnalysisException("Invalid property: " + key);
-            }
-            if (DEPRECATED_KEYS.contains(lowerKey)) {
-                lowerKey = S3Properties.S3_PREFIX + lowerKey;
-            }
-            validParams.put(lowerKey, entry.getValue());
-        }
-        if (!validParams.containsKey(S3_URI)) {
-            throw new AnalysisException("Missing required property: " + 
S3_URI);
-        }
-        return S3Properties.requiredS3TVFProperties(validParams);
-    }
-
     private String getEndpointAndSetVirtualBucket(Map<String, String> params) 
throws AnalysisException {
         Preconditions.checkState(forceVirtualHosted, "only invoked when force 
virtual hosted.");
         String[] fileds = s3uri.getVirtualBucket().split("\\.", 2);
@@ -167,11 +173,7 @@ public class S3TableValuedFunction extends 
ExternalFileTableValuedFunction {
     @Override
     public String getFilePath() {
         // must be "s3://..."
-        if (forceVirtualHosted) {
-            return NAME + S3URI.SCHEME_DELIM + virtualBucket + S3URI.PATH_DELIM
-                    + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey();
-        }
-        return NAME + S3URI.SCHEME_DELIM + s3uri.getKey();
+        return filePath;
     }
 
     @Override
diff --git 
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2
new file mode 100644
index 0000000000..b0bff9aa47
Binary files /dev/null and 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2 differ
diff --git 
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate
new file mode 100644
index 0000000000..d47c707da0
Binary files /dev/null and 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate 
differ
diff --git 
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz
new file mode 100644
index 0000000000..1f35b6ba8f
Binary files /dev/null and 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz differ
diff --git 
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4
new file mode 100644
index 0000000000..8341cce4fd
Binary files /dev/null and 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4 differ
diff --git 
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy
new file mode 100644
index 0000000000..9ac2b7ae29
Binary files /dev/null and 
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy differ
diff --git 
a/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out 
b/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out
new file mode 100644
index 0000000000..19699b0dc5
--- /dev/null
+++ b/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out
@@ -0,0 +1,150 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !gz_1 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+10     218729  goZsLvvWFOIjlzSAitC     2024-06-10      4.137732740231178
+100    813423  zICskqgcdPc     2024-03-23      8.486529018746493
+1000   612650  RzOXeYpKOmuJOogUyeIEDNDmvq      2023-12-05      
7.8741752707933435
+1001   29486   WoUAFJFuJNnwyqMnoDhX    2024-03-11      9.758244908785949
+1002   445363  OdTEeeWtxfcRwx  2024-08-01      0.3934945460194128
+1003   707035  JAYnKxusVpGzYueACf      2023-11-14      5.377110182643222
+1004   227858  JIFyjKzmbjkt    2024-03-24      5.748037621519263
+1005   539305  PlruLkSUSXZgaHafFriklrhCi       2023-11-08      
4.122635188836725
+1006   145518  KCwqEcSCGuXrHerwn       2024-06-22      8.482290064407216
+1007   939028  KzXhEMelsKVLbDMsEKh     2024-01-01      8.144449761594585
+1008   913569  CHlqPKqkIdqwBCBUHreXbFAkCt      2024-05-25      
1.5683842369495904
+
+-- !gz_2 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+
+-- !bz2_1 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+10     218729  goZsLvvWFOIjlzSAitC     2024-06-10      4.137732740231178
+100    813423  zICskqgcdPc     2024-03-23      8.486529018746493
+1000   612650  RzOXeYpKOmuJOogUyeIEDNDmvq      2023-12-05      
7.8741752707933435
+1001   29486   WoUAFJFuJNnwyqMnoDhX    2024-03-11      9.758244908785949
+1002   445363  OdTEeeWtxfcRwx  2024-08-01      0.3934945460194128
+1003   707035  JAYnKxusVpGzYueACf      2023-11-14      5.377110182643222
+1004   227858  JIFyjKzmbjkt    2024-03-24      5.748037621519263
+1005   539305  PlruLkSUSXZgaHafFriklrhCi       2023-11-08      
4.122635188836725
+1006   145518  KCwqEcSCGuXrHerwn       2024-06-22      8.482290064407216
+1007   939028  KzXhEMelsKVLbDMsEKh     2024-01-01      8.144449761594585
+1008   913569  CHlqPKqkIdqwBCBUHreXbFAkCt      2024-05-25      
1.5683842369495904
+1009   757881  AjcSyYMIMzS     2024-05-04      7.5674012939461255
+101    326164  QWLnalYNmYDt    2024-01-07      3.8159876011523854
+1010   427079  AlRUfmxfAuoLnPqUTvQVMtrS        2024-06-04      
3.8087069699523313
+
+-- !bz2_2 --
+1476   2023-09-07
+1521   2023-09-07
+259    2023-09-07
+50     2023-09-07
+71     2023-09-07
+785    2023-09-07
+869    2023-09-07
+1064   2023-09-08
+126    2023-09-08
+137    2023-09-08
+1425   2023-09-08
+804    2023-09-08
+1240   2023-09-09
+1565   2023-09-09
+1688   2023-09-09
+
+-- !lz4_1 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+10     218729  goZsLvvWFOIjlzSAitC     2024-06-10      4.137732740231178
+100    813423  zICskqgcdPc     2024-03-23      8.486529018746493
+1000   612650  RzOXeYpKOmuJOogUyeIEDNDmvq      2023-12-05      
7.8741752707933435
+1001   29486   WoUAFJFuJNnwyqMnoDhX    2024-03-11      9.758244908785949
+1002   445363  OdTEeeWtxfcRwx  2024-08-01      0.3934945460194128
+1003   707035  JAYnKxusVpGzYueACf      2023-11-14      5.377110182643222
+1004   227858  JIFyjKzmbjkt    2024-03-24      5.748037621519263
+1005   539305  PlruLkSUSXZgaHafFriklrhCi       2023-11-08      
4.122635188836725
+1006   145518  KCwqEcSCGuXrHerwn       2024-06-22      8.482290064407216
+1007   939028  KzXhEMelsKVLbDMsEKh     2024-01-01      8.144449761594585
+1008   913569  CHlqPKqkIdqwBCBUHreXbFAkCt      2024-05-25      
1.5683842369495904
+1009   757881  AjcSyYMIMzS     2024-05-04      7.5674012939461255
+101    326164  QWLnalYNmYDt    2024-01-07      3.8159876011523854
+1010   427079  AlRUfmxfAuoLnPqUTvQVMtrS        2024-06-04      
3.8087069699523313
+1011   252076  gHmFDhtytYzWETIxdpkpMUpnLd      2023-09-17      
6.773606843056635
+1012   819615  rFfRHquexplDJvSeUK      2023-11-02      3.220639250504097
+1013   413456  uvNPelHXYjJKiOkwdNbmUkGzxiiqLo  2024-03-15      
8.305048700108081
+1014   308042  vnzcsvHxnWFhvLwJkAtUqe  2024-06-15      1.5668867233009998
+1015   603837  VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU  2024-08-17      
3.8287482122289007
+
+-- !lz4_2 --
+694832 buHDwfGeNHfpRFdNaogneddi
+950297 OulifcGqzIILdOGcHZlWaCiHlEB
+143630 jqtiiLUUvSGeTkxsHL
+664267 eeVExxxcioSmmX
+890760 DYwfhhbkWATuSr
+79734  hgXsiaeVOkXdWUQvNnNjLPsdiD
+855390 axGECHeiluHLBUKPEKqDheksZ
+276590 lVQfdliXrLiJOpjlWM
+585845 ztkLoqCHmOuanAdOUV
+218729 goZsLvvWFOIjlzSAitC
+303099 xRBcfDbimqmycPY
+353815 CTDIqGYPRei
+165056 NMqtBlPfByAWyMpLdp
+172440 GjCGMSYnDVp
+887563 CxqhRyCsNhLjfyV
+248229 rCbtJQHJifNyhTEVrwESIQDGBylUWG
+444180 imAEgaSWymXzsCjSZQpPSy
+453083 XJzGEouGptILvnSTmVbOt
+988672 RtONQThrfkeepz
+977907 HMIJjkgcmNZVxdQaKqpMsgJYws
+
+-- !deflate_1 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+10     218729  goZsLvvWFOIjlzSAitC     2024-06-10      4.137732740231178
+100    813423  zICskqgcdPc     2024-03-23      8.486529018746493
+1000   612650  RzOXeYpKOmuJOogUyeIEDNDmvq      2023-12-05      
7.8741752707933435
+1001   29486   WoUAFJFuJNnwyqMnoDhX    2024-03-11      9.758244908785949
+1002   445363  OdTEeeWtxfcRwx  2024-08-01      0.3934945460194128
+1003   707035  JAYnKxusVpGzYueACf      2023-11-14      5.377110182643222
+1004   227858  JIFyjKzmbjkt    2024-03-24      5.748037621519263
+1005   539305  PlruLkSUSXZgaHafFriklrhCi       2023-11-08      
4.122635188836725
+1006   145518  KCwqEcSCGuXrHerwn       2024-06-22      8.482290064407216
+1007   939028  KzXhEMelsKVLbDMsEKh     2024-01-01      8.144449761594585
+1008   913569  CHlqPKqkIdqwBCBUHreXbFAkCt      2024-05-25      
1.5683842369495904
+
+-- !deflate_2 --
+2023-09-07     7
+2023-09-08     5
+2023-09-09     6
+2023-09-10     6
+2023-09-11     4
+2023-09-12     8
+2023-09-13     4
+2023-09-14     6
+2023-09-15     6
+2023-09-16     5
+2023-09-17     15
+2023-09-18     7
+
+-- !snappy_1 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+10     218729  goZsLvvWFOIjlzSAitC     2024-06-10      4.137732740231178
+100    813423  zICskqgcdPc     2024-03-23      8.486529018746493
+1000   612650  RzOXeYpKOmuJOogUyeIEDNDmvq      2023-12-05      
7.8741752707933435
+1001   29486   WoUAFJFuJNnwyqMnoDhX    2024-03-11      9.758244908785949
+1002   445363  OdTEeeWtxfcRwx  2024-08-01      0.3934945460194128
+1003   707035  JAYnKxusVpGzYueACf      2023-11-14      5.377110182643222
+1004   227858  JIFyjKzmbjkt    2024-03-24      5.748037621519263
+1005   539305  PlruLkSUSXZgaHafFriklrhCi       2023-11-08      
4.122635188836725
+1006   145518  KCwqEcSCGuXrHerwn       2024-06-22      8.482290064407216
+1007   939028  KzXhEMelsKVLbDMsEKh     2024-01-01      8.144449761594585
+1008   913569  CHlqPKqkIdqwBCBUHreXbFAkCt      2024-05-25      
1.5683842369495904
+1009   757881  AjcSyYMIMzS     2024-05-04      7.5674012939461255
+101    326164  QWLnalYNmYDt    2024-01-07      3.8159876011523854
+1010   427079  AlRUfmxfAuoLnPqUTvQVMtrS        2024-06-04      
3.8087069699523313
+1011   252076  gHmFDhtytYzWETIxdpkpMUpnLd      2023-09-17      
6.773606843056635
+1012   819615  rFfRHquexplDJvSeUK      2023-11-02      3.220639250504097
+1013   413456  uvNPelHXYjJKiOkwdNbmUkGzxiiqLo  2024-03-15      
8.305048700108081
+1014   308042  vnzcsvHxnWFhvLwJkAtUqe  2024-06-15      1.5668867233009998
+1015   603837  VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU  2024-08-17      
3.8287482122289007
+1016   912679  eEjldPhxojSjTnE 2024-01-09      1.3717891874157961
+1017   630392  TcczYHXbwaCYzFSfXJlhsFjN        2023-10-07      
4.733337480058437
+
+-- !snappy_2 --
+
diff --git 
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out
new file mode 100644
index 0000000000..6ac8589d90
--- /dev/null
+++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out
@@ -0,0 +1,68 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !hdfs_1 --
+1      hello   cyw
+2      aaaaaaa cyw
+3      1121399 cyw
+33     qqqqq   cyw
+44     tttttttttt      cyw
+55     qwr     cyw
+
+-- !hdfs_2 --
+
+-- !hdfs_3 --
+hello  1111    1
+hello  11111   1
+hello  33333   1
+
+-- !hdfs_4 --
+1111   mkdir   iiiiii  hello
+11111  8888888 hello   hello
+33333  helloworld      999999  hello
+
+-- !hdfs_5 --
+1      hello   0       two     cyw
+2      aaaaaaa 9       two     cyw
+3      1121399 1       two     cyw
+33     qqqqq   666     two     cyw
+44     tttttttttt      77      two     cyw
+55     qwr     91      two     cyw
+
+-- !local_1 --
+1      hello   cyw
+2      aaaaaaa cyw
+3      1121399 cyw
+
+-- !local_2 --
+1      hello   cyw
+2      aaaaaaa cyw
+
+-- !local_3 --
+1111   hello
+11111  hello
+33333  hello
+
+-- !local_4 --
+two    hello   1111    mkdir
+two    hello   11111   8888888
+two    hello   33333   helloworld
+
+-- !s3_1 --
+cyw
+cyw
+cyw
+
+-- !s3_2 --
+1111   hello
+11111  hello
+33333  hello
+
+-- !s3_3 --
+1111   mkdir   iiiiii  hello
+11111  8888888 hello   hello
+33333  helloworld      999999  hello
+
+-- !s3_4 --
+33     qqqqq   666     two     cyw
+44     tttttttttt      77      two     cyw
+55     qwr     91      two     cyw
+
diff --git 
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv
 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv
new file mode 100644
index 0000000000..b8537e591b
--- /dev/null
+++ 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv
@@ -0,0 +1,3 @@
+1,hello
+2,aaaaaaa
+3,1121399
diff --git 
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv
 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv
new file mode 100644
index 0000000000..0743633d2f
--- /dev/null
+++ 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv
@@ -0,0 +1,3 @@
+33,qqqqq
+44,tttttttttt
+55,qwr
diff --git 
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv
 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv
new file mode 100644
index 0000000000..b51cbf9041
--- /dev/null
+++ 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv
@@ -0,0 +1,3 @@
+11111,8888888
+33333,helloworld
+1111,mkdir
diff --git 
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv
 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv
new file mode 100644
index 0000000000..3b2ba1cf44
--- /dev/null
+++ 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv
@@ -0,0 +1,3 @@
+1,hello,0
+2,aaaaaaa,9
+3,1121399,1
diff --git 
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv
 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv
new file mode 100644
index 0000000000..e5573bf50c
--- /dev/null
+++ 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv
@@ -0,0 +1,3 @@
+33,qqqqq,666
+44,tttttttttt,77
+55,qwr,91
diff --git 
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv
 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv
new file mode 100644
index 0000000000..ff4b3f9ac6
--- /dev/null
+++ 
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv
@@ -0,0 +1,3 @@
+11111,8888888,hello
+33333,helloworld,999999
+1111,mkdir,iiiiii
diff --git 
a/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out 
b/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out
new file mode 100644
index 0000000000..1308b7ffef
--- /dev/null
+++ b/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out
@@ -0,0 +1,144 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !gz_1 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+10     218729  goZsLvvWFOIjlzSAitC     2024-06-10      4.137732740231178
+100    813423  zICskqgcdPc     2024-03-23      8.486529018746493
+1000   612650  RzOXeYpKOmuJOogUyeIEDNDmvq      2023-12-05      
7.8741752707933435
+1001   29486   WoUAFJFuJNnwyqMnoDhX    2024-03-11      9.758244908785949
+1002   445363  OdTEeeWtxfcRwx  2024-08-01      0.3934945460194128
+1003   707035  JAYnKxusVpGzYueACf      2023-11-14      5.377110182643222
+1004   227858  JIFyjKzmbjkt    2024-03-24      5.748037621519263
+1005   539305  PlruLkSUSXZgaHafFriklrhCi       2023-11-08      
4.122635188836725
+1006   145518  KCwqEcSCGuXrHerwn       2024-06-22      8.482290064407216
+1007   939028  KzXhEMelsKVLbDMsEKh     2024-01-01      8.144449761594585
+1008   913569  CHlqPKqkIdqwBCBUHreXbFAkCt      2024-05-25      
1.5683842369495904
+1009   757881  AjcSyYMIMzS     2024-05-04      7.5674012939461255
+101    326164  QWLnalYNmYDt    2024-01-07      3.8159876011523854
+1010   427079  AlRUfmxfAuoLnPqUTvQVMtrS        2024-06-04      
3.8087069699523313
+1011   252076  gHmFDhtytYzWETIxdpkpMUpnLd      2023-09-17      
6.773606843056635
+1012   819615  rFfRHquexplDJvSeUK      2023-11-02      3.220639250504097
+1013   413456  uvNPelHXYjJKiOkwdNbmUkGzxiiqLo  2024-03-15      
8.305048700108081
+1014   308042  vnzcsvHxnWFhvLwJkAtUqe  2024-06-15      1.5668867233009998
+1015   603837  VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU  2024-08-17      
3.8287482122289007
+
+-- !gz_2 --
+1      2024-02-09
+2      2024-08-31
+3      2024-05-06
+4      2023-10-07
+5      2024-01-11
+6      2023-11-11
+7      2024-02-17
+8      2023-11-16
+9      2024-08-16
+10     2024-06-10
+11     2024-01-04
+12     2023-12-18
+13     2024-05-15
+14     2024-06-30
+15     2024-05-06
+16     2024-07-26
+17     2024-02-08
+18     2024-08-11
+19     2024-05-27
+20     2023-12-18
+
+-- !bz2_1 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+10     218729  goZsLvvWFOIjlzSAitC     2024-06-10      4.137732740231178
+100    813423  zICskqgcdPc     2024-03-23      8.486529018746493
+1000   612650  RzOXeYpKOmuJOogUyeIEDNDmvq      2023-12-05      
7.8741752707933435
+1001   29486   WoUAFJFuJNnwyqMnoDhX    2024-03-11      9.758244908785949
+1002   445363  OdTEeeWtxfcRwx  2024-08-01      0.3934945460194128
+1003   707035  JAYnKxusVpGzYueACf      2023-11-14      5.377110182643222
+1004   227858  JIFyjKzmbjkt    2024-03-24      5.748037621519263
+1005   539305  PlruLkSUSXZgaHafFriklrhCi       2023-11-08      
4.122635188836725
+1006   145518  KCwqEcSCGuXrHerwn       2024-06-22      8.482290064407216
+1007   939028  KzXhEMelsKVLbDMsEKh     2024-01-01      8.144449761594585
+1008   913569  CHlqPKqkIdqwBCBUHreXbFAkCt      2024-05-25      
1.5683842369495904
+1009   757881  AjcSyYMIMzS     2024-05-04      7.5674012939461255
+101    326164  QWLnalYNmYDt    2024-01-07      3.8159876011523854
+1010   427079  AlRUfmxfAuoLnPqUTvQVMtrS        2024-06-04      
3.8087069699523313
+
+-- !bz2_2 --
+1476   2023-09-07
+1521   2023-09-07
+259    2023-09-07
+50     2023-09-07
+71     2023-09-07
+785    2023-09-07
+869    2023-09-07
+1064   2023-09-08
+126    2023-09-08
+137    2023-09-08
+1425   2023-09-08
+804    2023-09-08
+1240   2023-09-09
+
+-- !lz4_1 --
+1      694832  buHDwfGeNHfpRFdNaogneddi        2024-02-09      
4.899588807225554
+10     218729  goZsLvvWFOIjlzSAitC     2024-06-10      4.137732740231178
+100    813423  zICskqgcdPc     2024-03-23      8.486529018746493
+1000   612650  RzOXeYpKOmuJOogUyeIEDNDmvq      2023-12-05      
7.8741752707933435
+1001   29486   WoUAFJFuJNnwyqMnoDhX    2024-03-11      9.758244908785949
+1002   445363  OdTEeeWtxfcRwx  2024-08-01      0.3934945460194128
+1003   707035  JAYnKxusVpGzYueACf      2023-11-14      5.377110182643222
+1004   227858  JIFyjKzmbjkt    2024-03-24      5.748037621519263
+1005   539305  PlruLkSUSXZgaHafFriklrhCi       2023-11-08      
4.122635188836725
+1006   145518  KCwqEcSCGuXrHerwn       2024-06-22      8.482290064407216
+1007   939028  KzXhEMelsKVLbDMsEKh     2024-01-01      8.144449761594585
+1008   913569  CHlqPKqkIdqwBCBUHreXbFAkCt      2024-05-25      
1.5683842369495904
+1009   757881  AjcSyYMIMzS     2024-05-04      7.5674012939461255
+101    326164  QWLnalYNmYDt    2024-01-07      3.8159876011523854
+
+-- !lz4_2 --
+1      buHDwfGeNHfpRFdNaogneddi
+
+-- !deflate_1 --
+4611713315956779722    0       [159]   
+4611737294102341731    1       [18,348,1010]   
+4611746138795773784    0       [18]    
+4611784761593342388    0       []      
+4611801970150944452    0       []      
+4611823514689510950    0       []      {"Превьюшки":{"doc
+4611838050999642253    0       [18]    
+4611870011201662970    0       [18,348,1010]   
+4611987206053671537    0       [18]    {"Превьюшки
+4612024970660173441    0       [18,868]        
+4612121739736542264    0       [18,348,1010]   
+4612128194949363638    0       []      
+4612152063486747092    0       [3]     
+4612190315946473296    1       [18,348,1010]   
+4612251026602549726    0       [32,62,45,48,120,194,159,348]   
+4612255738481454387    0       []      
+4612315312096080662    0       []      {"Правая колонка":
+
+-- !deflate_2 --
+4611713315956779722    0
+4611737294102341731    1
+4611746138795773784    1
+4611784761593342388    1
+4611801970150944452    1
+
+-- !snappy_1 --
+4611713315956779722    0       [159]   
+4611737294102341731    1       [18,348,1010]   
+4611746138795773784    0       [18]    
+4611784761593342388    0       []      
+4611801970150944452    0       []      
+4611823514689510950    0       []      {"Превьюшки":{"doc
+4611838050999642253    0       [18]    
+4611870011201662970    0       [18,348,1010]   
+4611987206053671537    0       [18]    {"Превьюшки
+4612024970660173441    0       [18,868]        
+4612121739736542264    0       [18,348,1010]   
+4612128194949363638    0       []      
+4612152063486747092    0       [3]     
+4612190315946473296    1       [18,348,1010]   
+4612251026602549726    0       [32,62,45,48,120,194,159,348]   
+4612255738481454387    0       []      
+4612315312096080662    0       []      {"Правая колонка":
+
+-- !snappy_2 --
+0
+
diff --git 
a/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy
 
b/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy
new file mode 100644
index 0000000000..0f783900df
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy
@@ -0,0 +1,127 @@
+import org.junit.Assert
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_local_tvf_compression", 
"p2,external,tvf,external_remote,external_remote_tvf") {
+    List<List<Object>> backends =  sql """ show backends """
+    assertTrue(backends.size() > 0)
+    def be_id = backends[0][0]
+    def dataFilePath = context.config.dataPath + 
"/external_table_p2/tvf/compress"
+
+    def outFilePath="/compress"
+
+    for (List<Object> backend : backends) {
+         def be_host = backend[1]
+         scpFiles ("root", be_host, dataFilePath, outFilePath, false);
+    }
+
+    String filename = "test_tvf.csv"
+
+
+    String compress_type = "gz" 
+    qt_gz_1 """
+    select * from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5  limit 
12;            
+    """
+    
+    qt_gz_2 """
+    select * from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}") where c1="1" order by 
c1,c2,c3,c4,c5  limit 12;            
+    """
+
+
+
+    compress_type = "bz2" 
+    qt_bz2_1 """
+    select * from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 15; 
           
+    """
+    qt_bz2_2 """
+    select c1,c4 from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}") order by cast(c4 as date),c1 
limit 15;            
+    """
+
+
+
+
+    compress_type = "lz4";
+    
+    qt_lz4_1 """
+    select * from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}FRAME") order by c1,c2,c3,c4,c5 
limit 20;            
+    """
+    qt_lz4_2 """
+    select c2,c3 from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}FRAME")  where c2!="abcsdasdsadsad"  
order by cast(c1 as int),c2,c3  limit 20;            
+    """
+
+
+
+    compress_type = "deflate";
+    qt_deflate_1 """ 
+        select * from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 12 
;            
+    """
+    qt_deflate_2 """ 
+        select c4,count(*) from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}") group by c4 order by c4 limit 12 
;            
+    """ 
+   
+
+    
+    compress_type = "snappy";
+    qt_snappy_1 """ 
+        select * from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}block") order by c1,c2,c3,c4,c5  
limit 22 ;            
+    """    
+    qt_snappy_2 """ 
+        select c2,c3 from local(
+        "file_path" = "${outFilePath}/${filename}.${compress_type}",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "compress_type" ="${compress_type}block") where c2="abcd" order by c3 
limit 22 ;            
+    """
+    
+}
diff --git 
a/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy 
b/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy
new file mode 100644
index 0000000000..0c4c74e924
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_path_partition_keys", 
"p2,external,tvf,external_remote,external_remote_tvf") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String nameNodeHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String hdfsPort = context.config.otherConfigs.get("extHdfsPort")
+
+        String baseUri = 
"hdfs://${nameNodeHost}:${hdfsPort}/catalog/tvf/csv/test_path_partition_keys"
+        String baseFs = "hdfs://${nameNodeHost}:${hdfsPort}"
+
+        order_qt_hdfs_1 """
+        select * from HDFS(
+            "uri" = "${baseUri}/dt1=cyw/*",
+            "fs.defaultFS"= "${baseFs}",
+            "hadoop.username" = "hadoop",
+            "format" = "csv",
+            "path_partition_keys"="dt1" ) order by c1,c2 ;
+        """ 
+
+        order_qt_hdfs_2 """
+        select * from HDFS(
+            "uri" = "${baseUri}/dt1=cyw/*",
+            "fs.defaultFS"= "${baseFs}",
+            "hadoop.username" = "hadoop",
+            "format" = "csv",
+            "path_partition_keys"="dt1") where dt1!="cyw" order by c1,c2 limit 
3;
+        """ 
+
+        order_qt_hdfs_3 """
+        select dt1,c1,count(*) from HDFS(
+            "uri" = "${baseUri}/dt1=hello/*",
+            "fs.defaultFS"= "${baseFs}",
+            "hadoop.username" = "hadoop",
+            "format" = "csv",
+            "path_partition_keys"="dt1") group by c1,dt1 order by c1;
+        """ 
+    
+        order_qt_hdfs_4 """
+        select * from HDFS(
+            "uri" = "${baseUri}/dt2=two/dt1=hello/*",
+            "fs.defaultFS"= "${baseFs}",
+            "hadoop.username" = "hadoop",
+            "format" = "csv",
+            "path_partition_keys"="dt1") order by c1;
+        """ 
+
+        order_qt_hdfs_5 """
+        select * from HDFS(
+            "uri" = "${baseUri}/dt2=two/dt1=cyw/*",
+            "fs.defaultFS"= "${baseFs}",
+            "hadoop.username" = "hadoop",
+            "format" = "csv",
+            "path_partition_keys"="dt2,dt1");
+        """
+
+    }
+    
+    List<List<Object>> backends =  sql """ show backends """
+    assertTrue(backends.size() > 0)
+    def be_id = backends[0][0]
+    def dataFilePath = context.config.dataPath + 
"/external_table_p2/test_path_partition_keys/"
+
+    def outFilePath="/test_path_partition_keys"
+
+    for (List<Object> backend : backends) {
+         def be_host = backend[1]
+         scpFiles ("root", be_host, dataFilePath, outFilePath, false);
+    }
+
+    order_qt_local_1 """
+    select * from local(
+        "file_path" = "${outFilePath}/dt1=cyw/a.csv",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "path_partition_keys"="dt1") order by c1,c2;
+    """
+    
+    order_qt_local_2 """
+    select * from local(
+        "file_path" = "${outFilePath}/dt1=cyw/*",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "path_partition_keys"="dt1") order by c1,c2  limit 2;
+    """
+    
+    order_qt_local_3 """
+    select c1,dt1 from local(
+        "file_path" = "${outFilePath}/dt1=hello/c.csv",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "path_partition_keys"="dt1") order by c1,c2  limit 7;
+    """
+
+    order_qt_local_4 """
+    select dt2,dt1,c1,c2 from local(
+        "file_path" = "${outFilePath}/dt2=two/dt1=hello/c.csv",
+        "backend_id" = "${be_id}",
+        "format" = "csv",
+        "path_partition_keys"="dt2,dt1") order by c1,c2  limit 9;
+    """
+    
+
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String s3_endpoint = getS3Endpoint()
+    String region = getS3Region()
+    String bucket = context.config.otherConfigs.get("s3BucketName");
+    
+    sql """ set query_timeout=3600; """ 
+
+    order_qt_s3_1 """ 
+    select dt1 from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt1=cyw/b.csv";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "path_partition_keys"="dt1") 
+    """
+
+
+
+    order_qt_s3_2 """ 
+    select c1,dt1 from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt1=hello/c.csv";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "path_partition_keys"="dt1") limit 3;
+    """
+
+
+    order_qt_s3_3 """ 
+    select * from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "path_partition_keys"="dt1") limit 3;
+    """
+
+
+    order_qt_s3_4 """ 
+    select *from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "path_partition_keys"="dt2,dt1") limit 3;
+    """
+}
diff --git 
a/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy 
b/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy
new file mode 100644
index 0000000000..57cfdb136d
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_s3_tvf_compression", 
"p2,external,tvf,external_remote,external_remote_tvf") {
+    
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String s3_endpoint = getS3Endpoint()
+    String region = getS3Region()
+    String bucket = context.config.otherConfigs.get("s3BucketName");
+    
+    sql """ set query_timeout=3600; """ 
+
+    String compress_type = "gz" 
+    qt_gz_1 """ 
+    select * from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 20;
+    """
+
+
+    qt_gz_2 """ 
+    select c1,c4 from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "compress_type" ="${compress_type}") order by cast(c1 as int),c4 limit 
20;
+    """
+
+
+
+    compress_type = "bz2";
+    qt_bz2_1 """ 
+    select * from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 15;
+    """
+
+
+    qt_bz2_2 """ 
+    select c1,c4 from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "compress_type" ="${compress_type}")  where c1!="100"  order by 
cast(c4 as date),c1 limit 13;
+    """
+
+
+
+    compress_type = "lz4";
+    qt_lz4_1 """ 
+    select * from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "compress_type" ="${compress_type}FRAME") order by c1,c2,c3,c4,c5  
limit 14;
+    """
+    
+
+    qt_lz4_2 """ 
+    select c1,c3 from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "compress_type" ="${compress_type}FRAME")  where 
c3="buHDwfGeNHfpRFdNaogneddi" order by c3,c1  limit 14;
+    """
+
+
+    String select_field = "c1,c12,c23,c40";
+    String orderBy_limit = "order by c1,c12,c23,c40  limit 17 ";
+
+    compress_type = "deflate";
+    qt_deflate_1 """ 
+    select ${select_field} from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "column_separator" = '\001',
+        "compress_type" ="${compress_type}") ${orderBy_limit};
+    """
+
+    qt_deflate_2 """ 
+    select c1,c2 from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "column_separator" = '\001',
+        "use_path_style" = "true",
+        "compress_type" ="${compress_type}") group by c1,c2  order by c1,c2 
limit 5;
+    """
+
+
+
+   
+    compress_type = "snappy";
+    qt_snappy_1 """ 
+    select ${select_field} from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "column_separator" = '\001',
+        "compress_type" ="${compress_type}block") ${orderBy_limit};
+    """
+
+
+    qt_snappy_2 """ 
+    select count(*) from 
+    s3(     
+        "URI" = 
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}";,
    
+        "s3.access_key" = "${ak}",     
+        "s3.secret_key" = "${sk}",     
+        "REGION" = "${region}",    
+        "FORMAT" = "csv",
+        "use_path_style" = "true",
+        "column_separator" = '\001',
+        "compress_type" ="${compress_type}block") where c2 ="abccc";
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to