This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ebe3749996 [fix](tvf)support s3,local compress_type and append
regression test (#24055)
ebe3749996 is described below
commit ebe37499966f279423305c1a36b89b2bc917de3c
Author: daidai <[email protected]>
AuthorDate: Wed Sep 13 00:32:59 2023 +0800
[fix](tvf)support s3,local compress_type and append regression test (#24055)
support s3,local compress_type and append regression test.
---
.../ExternalFileTableValuedFunction.java | 12 +-
.../tablefunction/HdfsTableValuedFunction.java | 24 ++-
.../HttpStreamTableValuedFunction.java | 19 ++-
.../tablefunction/LocalTableValuedFunction.java | 23 ++-
.../doris/tablefunction/S3TableValuedFunction.java | 76 ++++-----
.../tvf/compress/test_tvf.csv.bz2 | Bin 0 -> 60731 bytes
.../tvf/compress/test_tvf.csv.deflate | Bin 0 -> 74687 bytes
.../external_table_p2/tvf/compress/test_tvf.csv.gz | Bin 0 -> 74828 bytes
.../tvf/compress/test_tvf.csv.lz4 | Bin 0 -> 112626 bytes
.../tvf/compress/test_tvf.csv.snappy | Bin 0 -> 107203 bytes
.../tvf/test_local_tvf_compression.out | 150 +++++++++++++++++
.../tvf/test_path_partition_keys.out | 68 ++++++++
.../tvf/test_path_partition_keys/dt1=cyw/a.csv | 3 +
.../tvf/test_path_partition_keys/dt1=cyw/b.csv | 3 +
.../tvf/test_path_partition_keys/dt1=hello/c.csv | 3 +
.../test_path_partition_keys/dt2=two/dt1=cyw/a.csv | 3 +
.../test_path_partition_keys/dt2=two/dt1=cyw/b.csv | 3 +
.../dt2=two/dt1=hello/c.csv | 3 +
.../tvf/test_s3_tvf_compression.out | 144 +++++++++++++++++
.../tvf/test_local_tvf_compression.groovy | 127 +++++++++++++++
.../tvf/test_path_partition_keys.groovy | 178 +++++++++++++++++++++
.../tvf/test_s3_tvf_compression.groovy | 171 ++++++++++++++++++++
22 files changed, 937 insertions(+), 73 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index b866ac860e..7811808419 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -123,6 +123,8 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
.add(TRIM_DOUBLE_QUOTES)
.add(SKIP_LINES)
.add(CSV_SCHEMA)
+ .add(COMPRESS_TYPE)
+ .add(PATH_PARTITION_KEYS)
.build();
// Columns got from file and path(if has)
@@ -135,6 +137,8 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
protected List<TBrokerFileStatus> fileStatuses = Lists.newArrayList();
protected Map<String, String> locationProperties;
+ protected String filePath;
+
private TFileFormatType fileFormatType;
private TFileCompressType compressionType;
@@ -198,8 +202,9 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
}
}
+ //The keys in the passed validParams map need to be lowercase.
protected void parseProperties(Map<String, String> validParams) throws
AnalysisException {
- String formatString = validParams.getOrDefault(FORMAT,
"").toLowerCase();
+ String formatString = validParams.getOrDefault(FORMAT, "");
switch (formatString) {
case "csv":
this.fileFormatType = TFileFormatType.FORMAT_CSV_PLAIN;
@@ -233,11 +238,6 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
throw new AnalysisException("format:" + formatString + " is
not supported.");
}
- if (getTFileType() == TFileType.FILE_STREAM &&
(formatString.equals("parquet")
- || formatString.equals("avro")
- || formatString.equals("orc"))) {
- throw new AnalysisException("current http_stream does not yet
support parquet, avro and orc");
- }
columnSeparator = validParams.getOrDefault(COLUMN_SEPARATOR,
DEFAULT_COLUMN_SEPARATOR);
lineDelimiter = validParams.getOrDefault(LINE_DELIMITER,
DEFAULT_LINE_DELIMITER);
jsonRoot = validParams.getOrDefault(JSON_ROOT, "");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
index 019a5cb739..385d9d11ad 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
@@ -56,26 +56,23 @@ public class HdfsTableValuedFunction extends
ExternalFileTableValuedFunction {
.build();
private URI hdfsUri;
- private String filePath;
public HdfsTableValuedFunction(Map<String, String> params) throws
AnalysisException {
Map<String, String> fileParams = new CaseInsensitiveMap();
locationProperties = Maps.newHashMap();
for (String key : params.keySet()) {
- if (FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) {
- fileParams.put(key, params.get(key));
- } else {
+ String lowerKey = key.toLowerCase();
+ if (FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
+ fileParams.put(lowerKey, params.get(key));
+ } else if (LOCATION_PROPERTIES.contains(lowerKey)) {
+ locationProperties.put(lowerKey, params.get(key));
+ } else if (HdfsResource.HADOOP_FS_NAME.equalsIgnoreCase(key)) {
// because HADOOP_FS_NAME contains upper and lower case
- if (HdfsResource.HADOOP_FS_NAME.equalsIgnoreCase(key)) {
- locationProperties.put(HdfsResource.HADOOP_FS_NAME,
params.get(key));
- } else {
- locationProperties.put(key, params.get(key));
- }
+ locationProperties.put(HdfsResource.HADOOP_FS_NAME,
params.get(key));
+ } else {
+ throw new AnalysisException(key + " is invalid property");
}
}
- if (params.containsKey(PATH_PARTITION_KEYS)) {
- fileParams.put(PATH_PARTITION_KEYS,
params.get(PATH_PARTITION_KEYS));
- }
if (!locationProperties.containsKey(HDFS_URI)) {
throw new AnalysisException(String.format("Configuration '%s' is
required.", HDFS_URI));
@@ -84,7 +81,8 @@ public class HdfsTableValuedFunction extends
ExternalFileTableValuedFunction {
hdfsUri = URI.create(locationProperties.get(HDFS_URI));
filePath = locationProperties.get(HdfsResource.HADOOP_FS_NAME) +
hdfsUri.getPath();
- parseProperties(fileParams);
+ super.parseProperties(fileParams);
+
parseFile();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java
index 8a4c5cb2d8..bb32c82653 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java
@@ -22,10 +22,10 @@ import org.apache.doris.analysis.StorageBackend.StorageType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.thrift.TFileType;
-import org.apache.commons.collections.map.CaseInsensitiveMap;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.util.HashMap;
import java.util.Map;
/**
@@ -37,14 +37,23 @@ public class HttpStreamTableValuedFunction extends
ExternalFileTableValuedFuncti
public static final String NAME = "http_stream";
public HttpStreamTableValuedFunction(Map<String, String> params) throws
AnalysisException {
- Map<String, String> validParams = new CaseInsensitiveMap();
+ Map<String, String> fileParams = new HashMap<>();
for (String key : params.keySet()) {
- if (!FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) {
+ String lowerKey = key.toLowerCase();
+ if (!FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
throw new AnalysisException(key + " is invalid property");
}
- validParams.put(key, params.get(key));
+ fileParams.put(lowerKey, params.get(key));
}
- parseProperties(validParams);
+
+ String formatString = fileParams.getOrDefault(FORMAT, "");
+ if (formatString.equals("parquet")
+ || formatString.equals("avro")
+ || formatString.equals("orc")) {
+ throw new AnalysisException("current http_stream does not yet
support parquet, avro and orc");
+ }
+
+ super.parseProperties(fileParams);
}
// =========== implement abstract methods of
ExternalFileTableValuedFunction =================
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java
index f6693317ba..129c3f930c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java
@@ -56,32 +56,31 @@ public class LocalTableValuedFunction extends
ExternalFileTableValuedFunction {
.add(BACKEND_ID)
.build();
- private String filePath;
private long backendId;
public LocalTableValuedFunction(Map<String, String> params) throws
AnalysisException {
- Map<String, String> fileFormatParams = new CaseInsensitiveMap();
+ Map<String, String> fileParams = new CaseInsensitiveMap();
locationProperties = Maps.newHashMap();
for (String key : params.keySet()) {
- if (FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) {
- fileFormatParams.put(key, params.get(key));
- } else if (LOCATION_PROPERTIES.contains(key.toLowerCase())) {
- locationProperties.put(key.toLowerCase(), params.get(key));
+ String lowerKey = key.toLowerCase();
+ if (FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
+ fileParams.put(lowerKey, params.get(key));
+ } else if (LOCATION_PROPERTIES.contains(lowerKey)) {
+ locationProperties.put(lowerKey, params.get(key));
} else {
throw new AnalysisException(key + " is invalid property");
}
}
- if (!locationProperties.containsKey(FILE_PATH)) {
- throw new AnalysisException(String.format("Configuration '%s' is
required.", FILE_PATH));
- }
- if (!locationProperties.containsKey(BACKEND_ID)) {
- throw new AnalysisException(String.format("Configuration '%s' is
required.", BACKEND_ID));
+ for (String key : LOCATION_PROPERTIES) {
+ if (!locationProperties.containsKey(key)) {
+ throw new AnalysisException(String.format("Configuration '%s'
is required.", key));
+ }
}
filePath = locationProperties.get(FILE_PATH);
backendId = Long.parseLong(locationProperties.get(BACKEND_ID));
- parseProperties(fileFormatParams);
+ super.parseProperties(fileParams);
getFileListFromBackend();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
index 300c51c7ad..74c8ae5c4a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
@@ -57,7 +57,7 @@ public class S3TableValuedFunction extends
ExternalFileTableValuedFunction {
ImmutableSet.of(S3Properties.SESSION_TOKEN,
PropertyConverter.USE_PATH_STYLE, S3Properties.REGION,
PATH_PARTITION_KEYS);
- private static final ImmutableSet<String> PROPERTIES_SET =
ImmutableSet.<String>builder()
+ private static final ImmutableSet<String> LOCATION_PROPERTIES =
ImmutableSet.<String>builder()
.add(S3_URI)
.add(S3Properties.ENDPOINT)
.addAll(DEPRECATED_KEYS)
@@ -70,31 +70,56 @@ public class S3TableValuedFunction extends
ExternalFileTableValuedFunction {
private String virtualBucket = "";
public S3TableValuedFunction(Map<String, String> params) throws
AnalysisException {
- Map<String, String> tvfParams = getValidParams(params);
- forceVirtualHosted = isVirtualHosted(tvfParams);
- s3uri = getS3Uri(tvfParams);
+
+ Map<String, String> fileParams = new HashMap<>();
+ for (Map.Entry<String, String> entry : params.entrySet()) {
+ String key = entry.getKey();
+ String lowerKey = key.toLowerCase();
+ if (!LOCATION_PROPERTIES.contains(lowerKey) &&
!FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
+ throw new AnalysisException("Invalid property: " + key);
+ }
+ if (DEPRECATED_KEYS.contains(lowerKey)) {
+ lowerKey = S3Properties.S3_PREFIX + lowerKey;
+ }
+ fileParams.put(lowerKey, entry.getValue());
+ }
+
+ if (!fileParams.containsKey(S3_URI)) {
+ throw new AnalysisException("Missing required property: " +
S3_URI);
+ }
+
+ forceVirtualHosted = isVirtualHosted(fileParams);
+ s3uri = getS3Uri(fileParams);
final String endpoint = forceVirtualHosted
? getEndpointAndSetVirtualBucket(params)
: s3uri.getBucketScheme();
- if (!tvfParams.containsKey(S3Properties.REGION)) {
+ if (!fileParams.containsKey(S3Properties.REGION)) {
String region = S3Properties.getRegionOfEndpoint(endpoint);
- tvfParams.put(S3Properties.REGION, region);
+ fileParams.put(S3Properties.REGION, region);
}
CloudCredentialWithEndpoint credential = new
CloudCredentialWithEndpoint(endpoint,
- tvfParams.get(S3Properties.REGION),
- tvfParams.get(S3Properties.ACCESS_KEY),
- tvfParams.get(S3Properties.SECRET_KEY));
- if (tvfParams.containsKey(S3Properties.SESSION_TOKEN)) {
-
credential.setSessionToken(tvfParams.get(S3Properties.SESSION_TOKEN));
+ fileParams.get(S3Properties.REGION),
+ fileParams.get(S3Properties.ACCESS_KEY),
+ fileParams.get(S3Properties.SECRET_KEY));
+ if (fileParams.containsKey(S3Properties.SESSION_TOKEN)) {
+
credential.setSessionToken(fileParams.get(S3Properties.SESSION_TOKEN));
}
// set S3 location properties
// these five properties is necessary, no one can be lost.
locationProperties = S3Properties.credentialToMap(credential);
- String usePathStyle =
tvfParams.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false");
+ String usePathStyle =
fileParams.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false");
locationProperties.put(PropertyConverter.USE_PATH_STYLE, usePathStyle);
- parseProperties(tvfParams);
+ super.parseProperties(fileParams);
+
+ if (forceVirtualHosted) {
+ filePath = NAME + S3URI.SCHEME_DELIM + virtualBucket +
S3URI.PATH_DELIM
+ + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey();
+ } else {
+ filePath = NAME + S3URI.SCHEME_DELIM + s3uri.getKey();
+ }
+
if (FeConstants.runningUnitTest) {
// Just check
FileSystemFactory.getS3FileSystem(locationProperties);
@@ -103,25 +128,6 @@ public class S3TableValuedFunction extends
ExternalFileTableValuedFunction {
}
}
- private static Map<String, String> getValidParams(Map<String, String>
params) throws AnalysisException {
- Map<String, String> validParams = new HashMap<>();
- for (Map.Entry<String, String> entry : params.entrySet()) {
- String key = entry.getKey();
- String lowerKey = key.toLowerCase();
- if (!PROPERTIES_SET.contains(lowerKey) &&
!FILE_FORMAT_PROPERTIES.contains(lowerKey)) {
- throw new AnalysisException("Invalid property: " + key);
- }
- if (DEPRECATED_KEYS.contains(lowerKey)) {
- lowerKey = S3Properties.S3_PREFIX + lowerKey;
- }
- validParams.put(lowerKey, entry.getValue());
- }
- if (!validParams.containsKey(S3_URI)) {
- throw new AnalysisException("Missing required property: " +
S3_URI);
- }
- return S3Properties.requiredS3TVFProperties(validParams);
- }
-
private String getEndpointAndSetVirtualBucket(Map<String, String> params)
throws AnalysisException {
Preconditions.checkState(forceVirtualHosted, "only invoked when force
virtual hosted.");
String[] fileds = s3uri.getVirtualBucket().split("\\.", 2);
@@ -167,11 +173,7 @@ public class S3TableValuedFunction extends
ExternalFileTableValuedFunction {
@Override
public String getFilePath() {
// must be "s3://..."
- if (forceVirtualHosted) {
- return NAME + S3URI.SCHEME_DELIM + virtualBucket + S3URI.PATH_DELIM
- + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey();
- }
- return NAME + S3URI.SCHEME_DELIM + s3uri.getKey();
+ return filePath;
}
@Override
diff --git
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2
new file mode 100644
index 0000000000..b0bff9aa47
Binary files /dev/null and
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2 differ
diff --git
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate
new file mode 100644
index 0000000000..d47c707da0
Binary files /dev/null and
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate
differ
diff --git
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz
new file mode 100644
index 0000000000..1f35b6ba8f
Binary files /dev/null and
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz differ
diff --git
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4
new file mode 100644
index 0000000000..8341cce4fd
Binary files /dev/null and
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4 differ
diff --git
a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy
new file mode 100644
index 0000000000..9ac2b7ae29
Binary files /dev/null and
b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy differ
diff --git
a/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out
b/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out
new file mode 100644
index 0000000000..19699b0dc5
--- /dev/null
+++ b/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out
@@ -0,0 +1,150 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !gz_1 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178
+100 813423 zICskqgcdPc 2024-03-23 8.486529018746493
+1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05
7.8741752707933435
+1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949
+1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128
+1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222
+1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263
+1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08
4.122635188836725
+1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216
+1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585
+1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25
1.5683842369495904
+
+-- !gz_2 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+
+-- !bz2_1 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178
+100 813423 zICskqgcdPc 2024-03-23 8.486529018746493
+1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05
7.8741752707933435
+1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949
+1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128
+1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222
+1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263
+1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08
4.122635188836725
+1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216
+1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585
+1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25
1.5683842369495904
+1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255
+101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854
+1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04
3.8087069699523313
+
+-- !bz2_2 --
+1476 2023-09-07
+1521 2023-09-07
+259 2023-09-07
+50 2023-09-07
+71 2023-09-07
+785 2023-09-07
+869 2023-09-07
+1064 2023-09-08
+126 2023-09-08
+137 2023-09-08
+1425 2023-09-08
+804 2023-09-08
+1240 2023-09-09
+1565 2023-09-09
+1688 2023-09-09
+
+-- !lz4_1 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178
+100 813423 zICskqgcdPc 2024-03-23 8.486529018746493
+1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05
7.8741752707933435
+1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949
+1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128
+1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222
+1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263
+1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08
4.122635188836725
+1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216
+1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585
+1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25
1.5683842369495904
+1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255
+101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854
+1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04
3.8087069699523313
+1011 252076 gHmFDhtytYzWETIxdpkpMUpnLd 2023-09-17
6.773606843056635
+1012 819615 rFfRHquexplDJvSeUK 2023-11-02 3.220639250504097
+1013 413456 uvNPelHXYjJKiOkwdNbmUkGzxiiqLo 2024-03-15
8.305048700108081
+1014 308042 vnzcsvHxnWFhvLwJkAtUqe 2024-06-15 1.5668867233009998
+1015 603837 VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU 2024-08-17
3.8287482122289007
+
+-- !lz4_2 --
+694832 buHDwfGeNHfpRFdNaogneddi
+950297 OulifcGqzIILdOGcHZlWaCiHlEB
+143630 jqtiiLUUvSGeTkxsHL
+664267 eeVExxxcioSmmX
+890760 DYwfhhbkWATuSr
+79734 hgXsiaeVOkXdWUQvNnNjLPsdiD
+855390 axGECHeiluHLBUKPEKqDheksZ
+276590 lVQfdliXrLiJOpjlWM
+585845 ztkLoqCHmOuanAdOUV
+218729 goZsLvvWFOIjlzSAitC
+303099 xRBcfDbimqmycPY
+353815 CTDIqGYPRei
+165056 NMqtBlPfByAWyMpLdp
+172440 GjCGMSYnDVp
+887563 CxqhRyCsNhLjfyV
+248229 rCbtJQHJifNyhTEVrwESIQDGBylUWG
+444180 imAEgaSWymXzsCjSZQpPSy
+453083 XJzGEouGptILvnSTmVbOt
+988672 RtONQThrfkeepz
+977907 HMIJjkgcmNZVxdQaKqpMsgJYws
+
+-- !deflate_1 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178
+100 813423 zICskqgcdPc 2024-03-23 8.486529018746493
+1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05
7.8741752707933435
+1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949
+1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128
+1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222
+1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263
+1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08
4.122635188836725
+1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216
+1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585
+1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25
1.5683842369495904
+
+-- !deflate_2 --
+2023-09-07 7
+2023-09-08 5
+2023-09-09 6
+2023-09-10 6
+2023-09-11 4
+2023-09-12 8
+2023-09-13 4
+2023-09-14 6
+2023-09-15 6
+2023-09-16 5
+2023-09-17 15
+2023-09-18 7
+
+-- !snappy_1 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178
+100 813423 zICskqgcdPc 2024-03-23 8.486529018746493
+1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05
7.8741752707933435
+1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949
+1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128
+1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222
+1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263
+1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08
4.122635188836725
+1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216
+1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585
+1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25
1.5683842369495904
+1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255
+101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854
+1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04
3.8087069699523313
+1011 252076 gHmFDhtytYzWETIxdpkpMUpnLd 2023-09-17
6.773606843056635
+1012 819615 rFfRHquexplDJvSeUK 2023-11-02 3.220639250504097
+1013 413456 uvNPelHXYjJKiOkwdNbmUkGzxiiqLo 2024-03-15
8.305048700108081
+1014 308042 vnzcsvHxnWFhvLwJkAtUqe 2024-06-15 1.5668867233009998
+1015 603837 VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU 2024-08-17
3.8287482122289007
+1016 912679 eEjldPhxojSjTnE 2024-01-09 1.3717891874157961
+1017 630392 TcczYHXbwaCYzFSfXJlhsFjN 2023-10-07
4.733337480058437
+
+-- !snappy_2 --
+
diff --git
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out
new file mode 100644
index 0000000000..6ac8589d90
--- /dev/null
+++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out
@@ -0,0 +1,68 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !hdfs_1 --
+1 hello cyw
+2 aaaaaaa cyw
+3 1121399 cyw
+33 qqqqq cyw
+44 tttttttttt cyw
+55 qwr cyw
+
+-- !hdfs_2 --
+
+-- !hdfs_3 --
+hello 1111 1
+hello 11111 1
+hello 33333 1
+
+-- !hdfs_4 --
+1111 mkdir iiiiii hello
+11111 8888888 hello hello
+33333 helloworld 999999 hello
+
+-- !hdfs_5 --
+1 hello 0 two cyw
+2 aaaaaaa 9 two cyw
+3 1121399 1 two cyw
+33 qqqqq 666 two cyw
+44 tttttttttt 77 two cyw
+55 qwr 91 two cyw
+
+-- !local_1 --
+1 hello cyw
+2 aaaaaaa cyw
+3 1121399 cyw
+
+-- !local_2 --
+1 hello cyw
+2 aaaaaaa cyw
+
+-- !local_3 --
+1111 hello
+11111 hello
+33333 hello
+
+-- !local_4 --
+two hello 1111 mkdir
+two hello 11111 8888888
+two hello 33333 helloworld
+
+-- !s3_1 --
+cyw
+cyw
+cyw
+
+-- !s3_2 --
+1111 hello
+11111 hello
+33333 hello
+
+-- !s3_3 --
+1111 mkdir iiiiii hello
+11111 8888888 hello hello
+33333 helloworld 999999 hello
+
+-- !s3_4 --
+33 qqqqq 666 two cyw
+44 tttttttttt 77 two cyw
+55 qwr 91 two cyw
+
diff --git
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv
new file mode 100644
index 0000000000..b8537e591b
--- /dev/null
+++
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv
@@ -0,0 +1,3 @@
+1,hello
+2,aaaaaaa
+3,1121399
diff --git
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv
new file mode 100644
index 0000000000..0743633d2f
--- /dev/null
+++
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv
@@ -0,0 +1,3 @@
+33,qqqqq
+44,tttttttttt
+55,qwr
diff --git
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv
new file mode 100644
index 0000000000..b51cbf9041
--- /dev/null
+++
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv
@@ -0,0 +1,3 @@
+11111,8888888
+33333,helloworld
+1111,mkdir
diff --git
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv
new file mode 100644
index 0000000000..3b2ba1cf44
--- /dev/null
+++
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv
@@ -0,0 +1,3 @@
+1,hello,0
+2,aaaaaaa,9
+3,1121399,1
diff --git
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv
new file mode 100644
index 0000000000..e5573bf50c
--- /dev/null
+++
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv
@@ -0,0 +1,3 @@
+33,qqqqq,666
+44,tttttttttt,77
+55,qwr,91
diff --git
a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv
new file mode 100644
index 0000000000..ff4b3f9ac6
--- /dev/null
+++
b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv
@@ -0,0 +1,3 @@
+11111,8888888,hello
+33333,helloworld,999999
+1111,mkdir,iiiiii
diff --git
a/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out
b/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out
new file mode 100644
index 0000000000..1308b7ffef
--- /dev/null
+++ b/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out
@@ -0,0 +1,144 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !gz_1 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178
+100 813423 zICskqgcdPc 2024-03-23 8.486529018746493
+1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05
7.8741752707933435
+1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949
+1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128
+1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222
+1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263
+1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08
4.122635188836725
+1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216
+1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585
+1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25
1.5683842369495904
+1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255
+101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854
+1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04
3.8087069699523313
+1011 252076 gHmFDhtytYzWETIxdpkpMUpnLd 2023-09-17
6.773606843056635
+1012 819615 rFfRHquexplDJvSeUK 2023-11-02 3.220639250504097
+1013 413456 uvNPelHXYjJKiOkwdNbmUkGzxiiqLo 2024-03-15
8.305048700108081
+1014 308042 vnzcsvHxnWFhvLwJkAtUqe 2024-06-15 1.5668867233009998
+1015 603837 VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU 2024-08-17
3.8287482122289007
+
+-- !gz_2 --
+1 2024-02-09
+2 2024-08-31
+3 2024-05-06
+4 2023-10-07
+5 2024-01-11
+6 2023-11-11
+7 2024-02-17
+8 2023-11-16
+9 2024-08-16
+10 2024-06-10
+11 2024-01-04
+12 2023-12-18
+13 2024-05-15
+14 2024-06-30
+15 2024-05-06
+16 2024-07-26
+17 2024-02-08
+18 2024-08-11
+19 2024-05-27
+20 2023-12-18
+
+-- !bz2_1 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178
+100 813423 zICskqgcdPc 2024-03-23 8.486529018746493
+1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05
7.8741752707933435
+1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949
+1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128
+1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222
+1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263
+1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08
4.122635188836725
+1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216
+1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585
+1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25
1.5683842369495904
+1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255
+101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854
+1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04
3.8087069699523313
+
+-- !bz2_2 --
+1476 2023-09-07
+1521 2023-09-07
+259 2023-09-07
+50 2023-09-07
+71 2023-09-07
+785 2023-09-07
+869 2023-09-07
+1064 2023-09-08
+126 2023-09-08
+137 2023-09-08
+1425 2023-09-08
+804 2023-09-08
+1240 2023-09-09
+
+-- !lz4_1 --
+1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09
4.899588807225554
+10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178
+100 813423 zICskqgcdPc 2024-03-23 8.486529018746493
+1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05
7.8741752707933435
+1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949
+1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128
+1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222
+1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263
+1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08
4.122635188836725
+1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216
+1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585
+1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25
1.5683842369495904
+1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255
+101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854
+
+-- !lz4_2 --
+1 buHDwfGeNHfpRFdNaogneddi
+
+-- !deflate_1 --
+4611713315956779722 0 [159]
+4611737294102341731 1 [18,348,1010]
+4611746138795773784 0 [18]
+4611784761593342388 0 []
+4611801970150944452 0 []
+4611823514689510950 0 [] {"Превьюшки":{"doc
+4611838050999642253 0 [18]
+4611870011201662970 0 [18,348,1010]
+4611987206053671537 0 [18] {"Превьюшки
+4612024970660173441 0 [18,868]
+4612121739736542264 0 [18,348,1010]
+4612128194949363638 0 []
+4612152063486747092 0 [3]
+4612190315946473296 1 [18,348,1010]
+4612251026602549726 0 [32,62,45,48,120,194,159,348]
+4612255738481454387 0 []
+4612315312096080662 0 [] {"Правая колонка":
+
+-- !deflate_2 --
+4611713315956779722 0
+4611737294102341731 1
+4611746138795773784 1
+4611784761593342388 1
+4611801970150944452 1
+
+-- !snappy_1 --
+4611713315956779722 0 [159]
+4611737294102341731 1 [18,348,1010]
+4611746138795773784 0 [18]
+4611784761593342388 0 []
+4611801970150944452 0 []
+4611823514689510950 0 [] {"Превьюшки":{"doc
+4611838050999642253 0 [18]
+4611870011201662970 0 [18,348,1010]
+4611987206053671537 0 [18] {"Превьюшки
+4612024970660173441 0 [18,868]
+4612121739736542264 0 [18,348,1010]
+4612128194949363638 0 []
+4612152063486747092 0 [3]
+4612190315946473296 1 [18,348,1010]
+4612251026602549726 0 [32,62,45,48,120,194,159,348]
+4612255738481454387 0 []
+4612315312096080662 0 [] {"Правая колонка":
+
+-- !snappy_2 --
+0
+
diff --git
a/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy
b/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy
new file mode 100644
index 0000000000..0f783900df
--- /dev/null
+++
b/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy
@@ -0,0 +1,127 @@
+import org.junit.Assert
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_local_tvf_compression",
"p2,external,tvf,external_remote,external_remote_tvf") {
+ List<List<Object>> backends = sql """ show backends """
+ assertTrue(backends.size() > 0)
+ def be_id = backends[0][0]
+ def dataFilePath = context.config.dataPath +
"/external_table_p2/tvf/compress"
+
+ def outFilePath="/compress"
+
+ for (List<Object> backend : backends) {
+ def be_host = backend[1]
+ scpFiles ("root", be_host, dataFilePath, outFilePath, false);
+ }
+
+ String filename = "test_tvf.csv"
+
+
+ String compress_type = "gz"
+ qt_gz_1 """
+ select * from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit
12;
+ """
+
+ qt_gz_2 """
+ select * from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}") where c1="1" order by
c1,c2,c3,c4,c5 limit 12;
+ """
+
+
+
+ compress_type = "bz2"
+ qt_bz2_1 """
+ select * from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 15;
+ """
+ qt_bz2_2 """
+ select c1,c4 from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}") order by cast(c4 as date),c1
limit 15;
+ """
+
+
+
+
+ compress_type = "lz4";
+
+ qt_lz4_1 """
+ select * from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}FRAME") order by c1,c2,c3,c4,c5
limit 20;
+ """
+ qt_lz4_2 """
+ select c2,c3 from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}FRAME") where c2!="abcsdasdsadsad"
order by cast(c1 as int),c2,c3 limit 20;
+ """
+
+
+
+ compress_type = "deflate";
+ qt_deflate_1 """
+ select * from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 12
;
+ """
+ qt_deflate_2 """
+ select c4,count(*) from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}") group by c4 order by c4 limit 12
;
+ """
+
+
+
+ compress_type = "snappy";
+ qt_snappy_1 """
+ select * from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}block") order by c1,c2,c3,c4,c5
limit 22 ;
+ """
+ qt_snappy_2 """
+ select c2,c3 from local(
+ "file_path" = "${outFilePath}/${filename}.${compress_type}",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "compress_type" ="${compress_type}block") where c2="abcd" order by c3
limit 22 ;
+ """
+
+}
diff --git
a/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy
b/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy
new file mode 100644
index 0000000000..0c4c74e924
--- /dev/null
+++
b/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_path_partition_keys",
"p2,external,tvf,external_remote,external_remote_tvf") {
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String nameNodeHost = context.config.otherConfigs.get("extHiveHmsHost")
+ String hdfsPort = context.config.otherConfigs.get("extHdfsPort")
+
+ String baseUri =
"hdfs://${nameNodeHost}:${hdfsPort}/catalog/tvf/csv/test_path_partition_keys"
+ String baseFs = "hdfs://${nameNodeHost}:${hdfsPort}"
+
+ order_qt_hdfs_1 """
+ select * from HDFS(
+ "uri" = "${baseUri}/dt1=cyw/*",
+ "fs.defaultFS"= "${baseFs}",
+ "hadoop.username" = "hadoop",
+ "format" = "csv",
+ "path_partition_keys"="dt1" ) order by c1,c2 ;
+ """
+
+ order_qt_hdfs_2 """
+ select * from HDFS(
+ "uri" = "${baseUri}/dt1=cyw/*",
+ "fs.defaultFS"= "${baseFs}",
+ "hadoop.username" = "hadoop",
+ "format" = "csv",
+ "path_partition_keys"="dt1") where dt1!="cyw" order by c1,c2 limit
3;
+ """
+
+ order_qt_hdfs_3 """
+ select dt1,c1,count(*) from HDFS(
+ "uri" = "${baseUri}/dt1=hello/*",
+ "fs.defaultFS"= "${baseFs}",
+ "hadoop.username" = "hadoop",
+ "format" = "csv",
+ "path_partition_keys"="dt1") group by c1,dt1 order by c1;
+ """
+
+ order_qt_hdfs_4 """
+ select * from HDFS(
+ "uri" = "${baseUri}/dt2=two/dt1=hello/*",
+ "fs.defaultFS"= "${baseFs}",
+ "hadoop.username" = "hadoop",
+ "format" = "csv",
+ "path_partition_keys"="dt1") order by c1;
+ """
+
+ order_qt_hdfs_5 """
+ select * from HDFS(
+ "uri" = "${baseUri}/dt2=two/dt1=cyw/*",
+ "fs.defaultFS"= "${baseFs}",
+ "hadoop.username" = "hadoop",
+ "format" = "csv",
+ "path_partition_keys"="dt2,dt1");
+ """
+
+ }
+
+ List<List<Object>> backends = sql """ show backends """
+ assertTrue(backends.size() > 0)
+ def be_id = backends[0][0]
+ def dataFilePath = context.config.dataPath +
"/external_table_p2/test_path_partition_keys/"
+
+ def outFilePath="/test_path_partition_keys"
+
+ for (List<Object> backend : backends) {
+ def be_host = backend[1]
+ scpFiles ("root", be_host, dataFilePath, outFilePath, false);
+ }
+
+ order_qt_local_1 """
+ select * from local(
+ "file_path" = "${outFilePath}/dt1=cyw/a.csv",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "path_partition_keys"="dt1") order by c1,c2;
+ """
+
+ order_qt_local_2 """
+ select * from local(
+ "file_path" = "${outFilePath}/dt1=cyw/*",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "path_partition_keys"="dt1") order by c1,c2 limit 2;
+ """
+
+ order_qt_local_3 """
+ select c1,dt1 from local(
+ "file_path" = "${outFilePath}/dt1=hello/c.csv",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "path_partition_keys"="dt1") order by c1,c2 limit 7;
+ """
+
+ order_qt_local_4 """
+ select dt2,dt1,c1,c2 from local(
+ "file_path" = "${outFilePath}/dt2=two/dt1=hello/c.csv",
+ "backend_id" = "${be_id}",
+ "format" = "csv",
+ "path_partition_keys"="dt2,dt1") order by c1,c2 limit 9;
+ """
+
+
+ String ak = getS3AK()
+ String sk = getS3SK()
+ String s3_endpoint = getS3Endpoint()
+ String region = getS3Region()
+ String bucket = context.config.otherConfigs.get("s3BucketName");
+
+ sql """ set query_timeout=3600; """
+
+ order_qt_s3_1 """
+ select dt1 from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt1=cyw/b.csv",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "path_partition_keys"="dt1")
+ """
+
+
+
+ order_qt_s3_2 """
+ select c1,dt1 from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt1=hello/c.csv",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "path_partition_keys"="dt1") limit 3;
+ """
+
+
+ order_qt_s3_3 """
+ select * from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "path_partition_keys"="dt1") limit 3;
+ """
+
+
+ order_qt_s3_4 """
+ select *from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "path_partition_keys"="dt2,dt1") limit 3;
+ """
+}
diff --git
a/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy
b/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy
new file mode 100644
index 0000000000..57cfdb136d
--- /dev/null
+++
b/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_s3_tvf_compression",
"p2,external,tvf,external_remote,external_remote_tvf") {
+
+ String ak = getS3AK()
+ String sk = getS3SK()
+ String s3_endpoint = getS3Endpoint()
+ String region = getS3Region()
+ String bucket = context.config.otherConfigs.get("s3BucketName");
+
+ sql """ set query_timeout=3600; """
+
+ String compress_type = "gz"
+ qt_gz_1 """
+ select * from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 20;
+ """
+
+
+ qt_gz_2 """
+ select c1,c4 from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "compress_type" ="${compress_type}") order by cast(c1 as int),c4 limit
20;
+ """
+
+
+
+ compress_type = "bz2";
+ qt_bz2_1 """
+ select * from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 15;
+ """
+
+
+ qt_bz2_2 """
+ select c1,c4 from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "compress_type" ="${compress_type}") where c1!="100" order by
cast(c4 as date),c1 limit 13;
+ """
+
+
+
+ compress_type = "lz4";
+ qt_lz4_1 """
+ select * from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "compress_type" ="${compress_type}FRAME") order by c1,c2,c3,c4,c5
limit 14;
+ """
+
+
+ qt_lz4_2 """
+ select c1,c3 from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "compress_type" ="${compress_type}FRAME") where
c3="buHDwfGeNHfpRFdNaogneddi" order by c3,c1 limit 14;
+ """
+
+
+ String select_field = "c1,c12,c23,c40";
+ String orderBy_limit = "order by c1,c12,c23,c40 limit 17 ";
+
+ compress_type = "deflate";
+ qt_deflate_1 """
+ select ${select_field} from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "column_separator" = '\001',
+ "compress_type" ="${compress_type}") ${orderBy_limit};
+ """
+
+ qt_deflate_2 """
+ select c1,c2 from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "column_separator" = '\001',
+ "use_path_style" = "true",
+ "compress_type" ="${compress_type}") group by c1,c2 order by c1,c2
limit 5;
+ """
+
+
+
+
+ compress_type = "snappy";
+ qt_snappy_1 """
+ select ${select_field} from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "column_separator" = '\001',
+ "compress_type" ="${compress_type}block") ${orderBy_limit};
+ """
+
+
+ qt_snappy_2 """
+ select count(*) from
+ s3(
+ "URI" =
"https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "REGION" = "${region}",
+ "FORMAT" = "csv",
+ "use_path_style" = "true",
+ "column_separator" = '\001',
+ "compress_type" ="${compress_type}block") where c2 ="abccc";
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]