This is an automated email from the ASF dual-hosted git repository.
wanghailin pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new 08f969eb03 [Improve][format][text] add dateTimeFormatter to parse
ISO8601 (#5974)
08f969eb03 is described below
commit 08f969eb0344bfa481632a7e58eb0bb0bd998f61
Author: halo.kim <[email protected]>
AuthorDate: Thu Dec 14 14:41:30 2023 +0900
[Improve][format][text] add dateTimeFormatter to parse ISO8601 (#5974)
---
.../seatunnel/common/utils/DateTimeUtils.java | 6 +-
.../e2e/connector/file/local/LocalFileIT.java | 6 ++
.../src/test/resources/text/e2e_time_format.txt | 5 ++
.../text/local_file_time_format_assert.conf | 95 ++++++++++++++++++++++
4 files changed, 111 insertions(+), 1 deletion(-)
diff --git
a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/DateTimeUtils.java
b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/DateTimeUtils.java
index 5801654cf5..bf81ed626c 100644
---
a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/DateTimeUtils.java
+++
b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/DateTimeUtils.java
@@ -40,6 +40,9 @@ public class DateTimeUtils {
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_NO_SPLIT,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_NO_SPLIT.value));
+ FORMATTER_MAP.put(
+ Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601,
+
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601.value));
}
public static LocalDateTime parse(String dateTime, Formatter formatter) {
@@ -54,7 +57,8 @@ public class DateTimeUtils {
YYYY_MM_DD_HH_MM_SS("yyyy-MM-dd HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_SPOT("yyyy.MM.dd HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_SLASH("yyyy/MM/dd HH:mm:ss"),
- YYYY_MM_DD_HH_MM_SS_NO_SPLIT("yyyyMMddHHmmss");
+ YYYY_MM_DD_HH_MM_SS_NO_SPLIT("yyyyMMddHHmmss"),
+ YYYY_MM_DD_HH_MM_SS_ISO8601("yyyy-MM-dd'T'HH:mm:ss");
private final String value;
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
index bb80160f14..09a4ae52eb 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
@@ -66,6 +66,11 @@ public class LocalFileIT extends TestSuiteBase {
"/seatunnel/read/text_delimiter/e2e.txt",
container);
+ ContainerUtil.copyFileIntoContainers(
+ "/text/e2e_time_format.txt",
+ "/seatunnel/read/text_time_format/e2e.txt",
+ container);
+
Path txtLzo =
convertToLzoFile(ContainerUtil.getResourcesFile("/text/e2e.txt"));
ContainerUtil.copyFileIntoContainers(
txtLzo, "/seatunnel/read/lzo_text/e2e.txt", container);
@@ -103,6 +108,7 @@ public class LocalFileIT extends TestSuiteBase {
helper.execute("/text/fake_to_local_file_text.conf");
helper.execute("/text/local_file_text_lzo_to_assert.conf");
helper.execute("/text/local_file_delimiter_assert.conf");
+ helper.execute("/text/local_file_time_format_assert.conf");
// test read skip header
helper.execute("/text/local_file_text_skip_headers.conf");
// test read local text file
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/e2e_time_format.txt
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/e2e_time_format.txt
new file mode 100644
index 0000000000..b53cbf2e99
--- /dev/null
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/e2e_time_format.txt
@@ -0,0 +1,5 @@
+PgxkWerPquZxADbwRoyZXWZYjOZGvPkcRgcvBHHlSezTHszfCM9312330451016879764123434177920993828271669125781ofduatrue02080228995824162301554221532246172.5053808E386.563348638622289E306KjJjD2023-03-2048955630047163560901.2848899427908584092023-01-11T06:25:29OxqxAMLLAWTMzSvpVKDBXwVuLuVMdhJAbNYRqEmrsQBARdHLAo98774360215016679841391554731369111688804353367rsgcotrue1212280190712202440694969264535828985.9302515E371.2125301856008725E308tVuZI2023-08-032
[...]
+zxMhGtbuHzxGFwmfFHIUAFvvTgUvQqetaDxOzAavJELHDSdPEV101244983376266331014538704017395315171492457270otcMntrue1024860139917168158893375714893248001.4333913E381.4334353544948444E308VdcYj2023-05-1991883965802194963022.6890574501331289452023-04-26T00:46:03PRIEJkcMnYJRsURrfhCbSgtGebklCfMXxzhZOZMudVetgtUCXc7736457412116475204164682112718260472701764785855oCRKRtrue851925389193674632146772472708622432.5017376E371.4791889801142986E308KIZKN2023-09-1
[...]
+EIYLFVjmjZXKcbLQtzXKMzIqLccyubcQygIssDqfcwotNQDdfH1836526392121945431313063532901700703821233811949qIlEotrue92987344051191848244308123217417652.6358307E379.12573038650651E307wrQCE2023-06-1169873404793136392100.0758355471497874132023-02-25T07:13:57IRAHziGvkRHEaUmcameBKDUCNFEjmKaafwSGblGdJGGyzQivvd12711189912021715577886030065553480147504046565RpOswfalse122122444030768933777305146193430843.3350248E381.2526133143299848E308kzyBq2023-07-1557
[...]
+tfaoRtCwuXCoiKkBcvPOoixYBZnaUlPQMFaRjxhigVLzmBrskw190529529814451211117678789994558371211783348ccGkzfalse1112829829981778213199660821898045981.2857434E383.343575138440927E307SsSaC2023-10-2658282015679301802224.6155516408553745142023-01-26T13:15:35IETWTtUXEMkdNCiBvZPKghKHXjQUvSMaMsKYCmzsLRjFhEQXyv76798692084328842150475226014007534741586287890wbzKKtrue6625604192054124846725009551245517061.307359E381.6429413197552776E308QdOjL2023-02-22576
[...]
+hdTngggfdRvAAMngAsZUYTEQuTFQEHdIzjOIEGIoYrTYZLIvey760974310142710026829414079475220181644270624MLIllfalse3622155133605466673524332669773532608.235333E379.308989713025347E307nrzoy2023-05-2418552644397825116718.5869443937920164442023-01-08T10:11:24bzXNzJVCPXkxeiQSpYXaVJoHWTJnKJbeIiuknfLOtQAGrKUoFr5144561031691489776106356671519647880411104465196MwxgFtrue501558654910648188780747761689955443.637149E371.4784398529023391E308cZRyO2023-06-04192
[...]
\ No newline at end of file
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/local_file_time_format_assert.conf
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/local_file_time_format_assert.conf
new file mode 100644
index 0000000000..ef69e2f4d0
--- /dev/null
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/text/local_file_time_format_assert.conf
@@ -0,0 +1,95 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+env {
+ execution.parallelism = 1
+ spark.app.name = "SeaTunnel"
+ spark.executor.instances = 2
+ spark.executor.cores = 1
+ spark.executor.memory = "1g"
+ spark.master = local
+ job.mode = "BATCH"
+}
+
+source {
+ LocalFile {
+ path = "/seatunnel/read/text_time_format"
+ file_format_type = "text"
+ schema = {
+ fields {
+ c_map = "map<string, string>"
+ c_array = "array<int>"
+ c_string = string
+ c_boolean = boolean
+ c_tinyint = tinyint
+ c_smallint = smallint
+ c_int = int
+ c_bigint = bigint
+ c_float = float
+ c_double = double
+ c_bytes = bytes
+ c_date = date
+ c_decimal = "decimal(38, 18)"
+ c_timestamp = timestamp
+ c_row = {
+ c_map = "map<string, string>"
+ c_array = "array<int>"
+ c_string = string
+ c_boolean = boolean
+ c_tinyint = tinyint
+ c_smallint = smallint
+ c_int = int
+ c_bigint = bigint
+ c_float = float
+ c_double = double
+ c_bytes = bytes
+ c_date = date
+ c_decimal = "decimal(38, 18)"
+ c_timestamp = timestamp
+ }
+ }
+ }
+ read_columns = [c_timestamp]
+ datetime_format = "yyyy-MM-dd'T'HH:mm:ss"
+ result_table_name = "fake"
+ }
+}
+
+sink {
+ Assert {
+ rules {
+ row_rules = [
+ {
+ rule_type = MAX_ROW
+ rule_value = 5
+ }
+ ],
+ field_rules = [
+ {
+ field_name = c_timestamp
+ field_type = timestamp
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
+