This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new ec86e023c98 [Fix](Csv-Reader)Fix the issue of BE core dump caused by
improper configuration of column_seperator and line_delimiter. (#33693) (#34027)
ec86e023c98 is described below
commit ec86e023c9898f37c252ee3a65dcbb8d79555f89
Author: Tiewei Fang <[email protected]>
AuthorDate: Wed Apr 24 21:44:40 2024 +0800
[Fix](Csv-Reader)Fix the issue of BE core dump caused by improper
configuration of column_seperator and line_delimiter. (#33693) (#34027)
backport: #33693
---
be/src/vec/exec/format/csv/csv_reader.cpp | 8 ++-
.../data/load_p0/stream_load/special_seperator.csv | 2 +
.../stream_load/test_csv_special_seperator.out | 5 ++
.../stream_load/test_csv_special_seperator.groovy | 69 ++++++++++++++++++++++
4 files changed, 81 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index d9b166509f8..51fce61c9bc 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -80,9 +80,11 @@ void EncloseCsvTextFieldSplitter::do_split(const Slice&
line, std::vector<Slice>
splitted_values);
value_start_offset = idx + value_sep_len;
}
- // process the last column
- process_value_func(data, value_start_offset, line.size -
value_start_offset, trimming_char,
- splitted_values);
+ if (line.size >= value_start_offset) {
+ // process the last column
+ process_value_func(data, value_start_offset, line.size -
value_start_offset, trimming_char,
+ splitted_values);
+ }
}
void PlainCsvTextFieldSplitter::_split_field_single_char(const Slice& line,
diff --git a/regression-test/data/load_p0/stream_load/special_seperator.csv
b/regression-test/data/load_p0/stream_load/special_seperator.csv
new file mode 100644
index 00000000000..85cfeba67e9
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/special_seperator.csv
@@ -0,0 +1,2 @@
+1|@|"100115"|@|"5501391"|@|"{\"avgSendTime\":2500,\"backupRecipientPhone\":\"[\\\"11345671255_4561\\\",\\\"14536625234_5370\\\"]\",\"caution\":\"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7
14536625234_8204\\uff0c\\u624b\\u673a\\u53f7 123****1234
\\u987e\\u5ba2\\u9700\\u8981\\u9910\\u5177\",\"cityId\":351600,\"ctime\":1142353434,\"daySeq\":\"16\",\"deliveryTime\":0,\"detail\":\"[{\\\"actual_price\\\":1.1,\\\"app_food_code\\\":\\\"\\\\u9ec4\\\\u7116\\\\u9e21+\\\\u5343\\\\u5f20+\\\\u706b\\\\
[...]
+2|@|"100115"|@|"4442066"|@|"{\"avgSendTime\":3636,\"backupRecipientPhone\":\"[\\\"11342355223_6672\\\"]\",\"caution\":\"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7
14536625234_3939\\uff0c\\u624b\\u673a\\u53f7 135****5187
\\u987e\\u5ba2\\u9700\\u89812\\u4efd\\u9910\\u5177\",\"cityId\":510725,\"ctime\":1124567897,\"daySeq\":\"1\",\"deliveryTime\":0,\"detail\":\"[{\\\"actual_price\\\":16,\\\"app_food_code\\\":\\\"\\\\u4e2d\\\\u9ebb\\\\u6284\\\\u624b\\\\u4e09\\\\u4e24\\\",\\\"attr_counts\\\":
[...]
diff --git
a/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out
b/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out
new file mode 100644
index 00000000000..c7dc0937b28
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select1 --
+1 100115 5501391
{"avgSendTime":2500,"backupRecipientPhone":"[\\"11345671255_4561\\",\\"14536625234_5370\\"]","caution":"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7
14536625234_8204\\uff0c\\u624b\\u673a\\u53f7 123****1234
\\u987e\\u5ba2\\u9700\\u8981\\u9910\\u5177","cityId":351600,"ctime":1142353434,"daySeq":"16","deliveryTime":0,"detail":"[{\\"actual_price\\":1.1,\\"app_food_code\\":\\"\\\\u9ec4\\\\u7116\\\\u9e21+\\\\u5343\\\\u5f20+\\\\u706b\\\\u817f\\\\u80a0+\\\\u7c73\\\\u996d\\",\\"att
[...]
+2 100115 4442066
{"avgSendTime":3636,"backupRecipientPhone":"[\\"11342355223_6672\\"]","caution":"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7
14536625234_3939\\uff0c\\u624b\\u673a\\u53f7 135****5187
\\u987e\\u5ba2\\u9700\\u89812\\u4efd\\u9910\\u5177","cityId":510725,"ctime":1124567897,"daySeq":"1","deliveryTime":0,"detail":"[{\\"actual_price\\":16,\\"app_food_code\\":\\"\\\\u4e2d\\\\u9ebb\\\\u6284\\\\u624b\\\\u4e09\\\\u4e24\\",\\"attr_counts\\":\\"\\",\\"attr_names\\":\\"\\",\\"box_num\\"
[...]
+
diff --git
a/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy
b/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy
new file mode 100644
index 00000000000..671db175dde
--- /dev/null
+++
b/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_csv_special_seperator", "p0") {
+ def tableName = "test_csv_special_seperator"
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE if not exists `${tableName}` (
+ `id` bigint(20) NOT NULL,
+ `developerid` varchar(64) DEFAULT NULL COMMENT '',
+ `epoiid` varchar(64) DEFAULT NULL COMMENT '',
+ `orderjson` string COMMENT '',
+ `addtime` datetime NOT NULL,
+ `syn` tinyint(1) DEFAULT '0' COMMENT '',
+ `shopid` varchar(16) DEFAULT NULL COMMENT '',
+ `shopname` varchar(255) DEFAULT NULL COMMENT '',
+ `orderid` varchar(32) DEFAULT NULL COMMENT '',
+ `orderindex` varchar(16) DEFAULT NULL COMMENT '',
+ `ordervid` varchar(32) DEFAULT NULL COMMENT '',
+ `totalprice` varchar(8) DEFAULT NULL COMMENT '',
+ `sn` string COMMENT '打印机',
+ `printtype` int(1) DEFAULT NULL COMMENT '',
+ `is_print` int(1) DEFAULT '0' COMMENT '',
+ `is_cancel` tinyint(1) DEFAULT '0' COMMENT '',
+ `p_data` string COMMENT '',
+ `c_code` varchar(5) DEFAULT NULL COMMENT '',
+ `c_data` string COMMENT '',
+ `c_confirmtimes` int(2) DEFAULT '0' COMMENT ''
+ )
+ ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT ''
+ DISTRIBUTED BY HASH(`orderid`) BUCKETS 100
+ PROPERTIES (
+ "replication_num" = "1",
+ "compression" = "ZSTD"
+ );
+ """
+
+ streamLoad {
+ table "${tableName}"
+ set 'column_separator', "|@|"
+ set 'line_delimiter', "|@|\\n"
+ set 'trim_double_quotes', 'true'
+ set 'enclose', "\""
+ set 'escape', '\\'
+ set 'max_filter_ratio', '0'
+
+ file "special_seperator.csv"
+ }
+
+ sql "sync"
+ order_qt_select1 """ SELECT * FROM ${tableName} ORDER BY id;"""
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]