This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 419e922a69 [fix](json)Fix the bug that does not stop when reading json
files (#23062)
419e922a69 is described below
commit 419e922a6925bb02bf44c4d5e806c0fbcfa1fef7
Author: daidai <[email protected]>
AuthorDate: Fri Aug 18 18:23:19 2023 +0800
[fix](json)Fix the bug that does not stop when reading json files (#23062)
* [fix](json)Fix the bug that does not stop when reading json files
---
be/src/vec/exec/format/json/new_json_reader.cpp | 1 +
.../data/external_table_p0/tvf/test_hdfs_tvf.out | 37 ++++++++++++++++++++
.../external_table_p0/tvf/test_hdfs_tvf.groovy | 40 ++++++++++++++++++++++
3 files changed, 78 insertions(+)
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp
b/be/src/vec/exec/format/json/new_json_reader.cpp
index eb5c602238..b02c30807d 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1054,6 +1054,7 @@ Status
NewJsonReader::_read_one_message(std::unique_ptr<uint8_t[]>* file_buf, si
file_buf->reset(new uint8_t[file_size]);
Slice result(file_buf->get(), file_size);
RETURN_IF_ERROR(_file_reader->read_at(_current_offset, result,
read_size, _io_ctx));
+ _current_offset += *read_size;
break;
}
case TFileType::FILE_STREAM: {
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
index 9fb4acdf35..9ae369b977 100644
--- a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
+++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
@@ -257,6 +257,43 @@
8 chengdu 2345678
9 xian 2345679
+-- !json_limit1 --
+1 beijing 2345671
+10 hefei 23456710
+11 \N 23456711
+12 hefei \N
+2 shanghai 2345672
+3 guangzhou 2345673
+4 shenzhen 2345674
+5 hangzhou 2345675
+6 nanjing 2345676
+7 wuhan 2345677
+8 chengdu 2345678
+9 xian 2345679
+
+-- !json_limit2 --
+1 beijing 1454547
+10 hefei 2345676
+2 shanghai 1244264
+3 guangzhou 528369
+4 shenzhen 594201
+5 hangzhou 594201
+6 nanjing 2345672
+7 wuhan 2345673
+8 chengdu 2345674
+9 xian 2345675
+
+-- !json_limit3 --
+1 {"id": 1, "city": "beijing", "code": 2345671}
+2 {"id": 2, "city": "shanghai", "code": 2345672}
+3 {"id": 3, "city": "hangzhou", "code": 2345673}
+4 {"id": 4, "city": "shenzhen", "code": 2345674}
+5 {"id": 5, "city": "guangzhou", "code": 2345675}
+
+-- !json_limit4 --
+1 {"id": 1, "city": "beijing", "code": 2345671}
+2 {"id": 2, "city": "shanghai", "code": 2345672}
+
-- !json_root --
1 beijing 2345671
2 shanghai 2345672
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
index 06d6c12c14..32315f60e6 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
@@ -132,6 +132,46 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker")
{
"strip_outer_array" = "false",
"read_json_by_line" = "true") order by id; """
+
+ uri = "${defaultFS}" +
"/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
+ format = "json"
+ qt_json_limit1 """ select * from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true") order by id limit 100;
"""
+
+ uri = "${defaultFS}" +
"/user/doris/preinstalled_data/json_format_test/one_array_json.json"
+ format = "json"
+ qt_json_limit2 """ select * from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "true",
+ "read_json_by_line" = "false") order by id limit 100;
"""
+ uri = "${defaultFS}" +
"/user/doris/preinstalled_data/json_format_test/nest_json.json"
+ format = "json"
+ qt_json_limit3 """ select * from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true") order by no limit 100;
"""
+ uri = "${defaultFS}" +
"/user/doris/preinstalled_data/json_format_test/nest_json.json"
+ format = "json"
+ qt_json_limit4 """ select * from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true") order by no limit 2; """
+
+
// test json root
uri = "${defaultFS}" +
"/user/doris/preinstalled_data/json_format_test/nest_json.json"
format = "json"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]