This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b0c5250bf9 [Enhancement](tvf) support trim_double_quotes and
skip_lines for S3 and HDFS table valued function (#17224)
b0c5250bf9 is described below
commit b0c5250bf9624f0d6f5438e405462530157fdffb
Author: gitccl <[email protected]>
AuthorDate: Wed Mar 1 23:41:31 2023 +0800
[Enhancement](tvf) support trim_double_quotes and skip_lines for S3 and
HDFS table valued function (#17224)
support trim_double_quotes and skip_lines for S3 and HDFS table valued
function
---
docs/en/docs/sql-manual/sql-functions/table-functions/hdfs.md | 5 +++++
docs/en/docs/sql-manual/sql-functions/table-functions/s3.md | 5 +++++
.../docs/sql-manual/sql-functions/table-functions/hdfs.md | 5 +++++
.../zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md | 5 +++++
.../doris/tablefunction/ExternalFileTableValuedFunction.java | 11 ++++++++++-
5 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/hdfs.md
b/docs/en/docs/sql-manual/sql-functions/table-functions/hdfs.md
index 7742f676f4..8c671f7d14 100644
--- a/docs/en/docs/sql-manual/sql-functions/table-functions/hdfs.md
+++ b/docs/en/docs/sql-manual/sql-functions/table-functions/hdfs.md
@@ -76,6 +76,11 @@ File format parameters:
- `num_as_string`: (optional) default `false`
- `fuzzy_parse`: (optional) default `false`
+ <version since="dev">The following 2 parameters are used for loading in
csv format</version>
+
+- `trim_double_quotes`: Boolean type (optional), the default value is `false`.
True means that the outermost double quotes of each field in the csv file are
trimmed.
+- `skip_lines`: Integer type (optional), the default value is 0. It will skip
some lines in the head of csv file. It will be disabled when the format is
`csv_with_names` or `csv_with_names_and_types`.
+
### Examples
Read and access csv format files on hdfs storage.
diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
index 488a91020c..5e716d6980 100644
--- a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
+++ b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
@@ -78,6 +78,11 @@ file format parameter:
- `num_as_string`: (optional) default `"false"`
- `fuzzy_parse`: (optional) default `"false"`
+ <version since="dev">The following 2 parameters are used for loading in
csv format</version>
+
+- `trim_double_quotes`: Boolean type (optional), the default value is `false`.
True means that the outermost double quotes of each field in the csv file are
trimmed.
+- `skip_lines`: Integer type (optional), the default value is 0. It will skip
some lines in the head of csv file. It will be disabled when the format is
`csv_with_names` or `csv_with_names_and_types`.
+
### Example
Read and access csv format files on S3-compatible object storage.
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/hdfs.md
b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/hdfs.md
index 5ac5061240..3929579e42 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/hdfs.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/hdfs.md
@@ -78,6 +78,11 @@ hdfs(
- `num_as_string`: (选填) 默认为 `false`
- `fuzzy_parse`: (选填) 默认为 `false`
+ <version since="dev">下面2个参数是用于csv格式的导入</version>
+
+- `trim_double_quotes`: 布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 csv 文件每个字段最外层的双引号
+- `skip_lines`: 整数类型,选填,默认值为0,含义为跳过csv文件的前几行。当设置format设置为 `csv_with_names` 或
`csv_with_names_and_types` 时,该参数会失效
+
### Examples
读取并访问 HDFS 存储上的csv格式文件
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
index 542bfeb8b4..6854c92f6a 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
@@ -80,6 +80,11 @@ S3 tvf中的每一个参数都是一个 `"key"="value"` 对。
- `num_as_string`: (选填) 默认为 `false`
- `fuzzy_parse`: (选填) 默认为 `false`
+ <version since="dev">下面2个参数是用于csv格式的导入</version>
+
+- `trim_double_quotes`: 布尔类型,选填,默认值为 `false`,为 `true` 时表示裁剪掉 csv 文件每个字段最外层的双引号
+- `skip_lines`: 整数类型,选填,默认值为0,含义为跳过csv文件的前几行。当设置format设置为 `csv_with_names` 或
`csv_with_names_and_types` 时,该参数会失效
+
### Example
读取并访问 S3 兼容的对象存储上的csv格式文件
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index adfbeceafb..3cc34f34e7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -80,6 +80,8 @@ public abstract class ExternalFileTableValuedFunction extends
TableValuedFunctio
protected static final String READ_JSON_BY_LINE = "read_json_by_line";
protected static final String NUM_AS_STRING = "num_as_string";
protected static final String FUZZY_PARSE = "fuzzy_parse";
+ protected static final String TRIM_DOUBLE_QUOTES = "trim_double_quotes";
+ protected static final String SKIP_LINES = "skip_lines";
protected static final ImmutableSet<String> FILE_FORMAT_PROPERTIES = new
ImmutableSet.Builder<String>()
.add(FORMAT)
@@ -91,6 +93,8 @@ public abstract class ExternalFileTableValuedFunction extends
TableValuedFunctio
.add(FUZZY_PARSE)
.add(COLUMN_SEPARATOR)
.add(LINE_DELIMITER)
+ .add(TRIM_DOUBLE_QUOTES)
+ .add(SKIP_LINES)
.build();
@@ -109,7 +113,8 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
private boolean readJsonByLine;
private boolean numAsString;
private boolean fuzzyParse;
-
+ private boolean trimDoubleQuotes;
+ private int skipLines;
public abstract TFileType getTFileType();
@@ -180,6 +185,8 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
stripOuterArray =
Boolean.valueOf(validParams.get(STRIP_OUTER_ARRAY)).booleanValue();
numAsString =
Boolean.valueOf(validParams.get(NUM_AS_STRING)).booleanValue();
fuzzyParse =
Boolean.valueOf(validParams.get(FUZZY_PARSE)).booleanValue();
+ trimDoubleQuotes =
Boolean.valueOf(validParams.get(TRIM_DOUBLE_QUOTES)).booleanValue();
+ skipLines = Integer.valueOf(validParams.getOrDefault(SKIP_LINES,
"0")).intValue();
}
public List<TBrokerFileStatus> getFileStatuses() {
@@ -194,6 +201,8 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
fileAttributes.setTextParams(fileTextScanRangeParams);
if (this.fileFormatType == TFileFormatType.FORMAT_CSV_PLAIN) {
fileAttributes.setHeaderType(this.headerType);
+ fileAttributes.setTrimDoubleQuotes(trimDoubleQuotes);
+ fileAttributes.setSkipLines(skipLines);
} else if (this.fileFormatType == TFileFormatType.FORMAT_JSON) {
fileAttributes.setJsonRoot(jsonRoot);
fileAttributes.setJsonpaths(jsonPaths);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]