This is an automated email from the ASF dual-hosted git repository.
zhangchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new c342cc52153 branch-3.0: [fix](json) fix parsing double in jsonb #46977
(#47064)
c342cc52153 is described below
commit c342cc52153f2012ba1510c8c6a69a30fd156447
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 17 19:44:41 2025 +0800
branch-3.0: [fix](json) fix parsing double in jsonb #46977 (#47064)
Cherry-picked from #46977
Co-authored-by: Sun Chenyang <[email protected]>
---
be/src/util/jsonb_parser_simd.h | 22 +++++-
regression-test/data/json_p0/test_json_double.csv | 2 +
.../data/json_p0/test_json_load_double.out | 11 +++
.../suites/json_p0/test_json_load_double.groovy | 87 ++++++++++++++++++++++
4 files changed, 118 insertions(+), 4 deletions(-)
diff --git a/be/src/util/jsonb_parser_simd.h b/be/src/util/jsonb_parser_simd.h
index 6621912a9d0..07e2ab370f7 100644
--- a/be/src/util/jsonb_parser_simd.h
+++ b/be/src/util/jsonb_parser_simd.h
@@ -136,7 +136,7 @@ public:
break;
}
case simdjson::ondemand::json_type::number: {
- write_number(doc.get_number());
+ write_number(doc.get_number(), doc.raw_json_token());
break;
}
}
@@ -172,7 +172,7 @@ public:
break;
}
case simdjson::ondemand::json_type::number: {
- write_number(value.get_number());
+ write_number(value.get_number(), value.raw_json_token());
break;
}
case simdjson::ondemand::json_type::object: {
@@ -290,9 +290,23 @@ public:
}
}
- void write_number(simdjson::ondemand::number num) {
+ void write_number(simdjson::ondemand::number num, std::string_view
raw_string) {
if (num.is_double()) {
- if (writer_.writeDouble(num.get_double()) == 0) {
+ double number = num.get_double();
+ // When a double exceeds the precision that can be represented by
a double type in simdjson, it gets converted to 0.
+ // The correct approach, should be to truncate the double value
instead.
+ if (number == 0) {
+ StringParser::ParseResult result;
+ number =
StringParser::string_to_float<double>(raw_string.data(), raw_string.size(),
+ &result);
+ if (result != StringParser::PARSE_SUCCESS) {
+ err_ = JsonbErrType::E_INVALID_NUMBER;
+ LOG(WARNING) << "invalid number, raw string is: " <<
raw_string;
+ return;
+ }
+ }
+
+ if (writer_.writeDouble(number) == 0) {
err_ = JsonbErrType::E_OUTPUT_FAIL;
LOG(WARNING) << "writeDouble failed";
return;
diff --git a/regression-test/data/json_p0/test_json_double.csv
b/regression-test/data/json_p0/test_json_double.csv
new file mode 100644
index 00000000000..e928633659b
--- /dev/null
+++ b/regression-test/data/json_p0/test_json_double.csv
@@ -0,0 +1,2 @@
+2
{"rebookProfit":3.729672759600005773616970827788463793694972991943359375}
+3 3.729672759600005773616970827788463793694972991943359375
\ No newline at end of file
diff --git a/regression-test/data/json_p0/test_json_load_double.out
b/regression-test/data/json_p0/test_json_load_double.out
new file mode 100644
index 00000000000..621c3a0910e
--- /dev/null
+++ b/regression-test/data/json_p0/test_json_load_double.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql_select_src --
+3.72967275960001
+\N
+
+-- !sql_select_dst --
+1 3.72967275960001
+1 {"rebookProfit":3.72967275960001}
+2 {"rebookProfit":3.72967275960001}
+3 3.72967275960001
+
diff --git a/regression-test/suites/json_p0/test_json_load_double.groovy
b/regression-test/suites/json_p0/test_json_load_double.groovy
new file mode 100644
index 00000000000..8c692e3e71d
--- /dev/null
+++ b/regression-test/suites/json_p0/test_json_load_double.groovy
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_json_load_double", "p0") {
+
+ def srcTable = "stringTable"
+ def dstTable = "jsonTable"
+ def dataFile = "test_json_double.csv"
+
+ sql """ DROP TABLE IF EXISTS ${srcTable} """
+ sql """ DROP TABLE IF EXISTS ${dstTable} """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${srcTable} (
+ id INT not null,
+ v STRING not null
+ )
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES("replication_num" = "1");
+ """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${dstTable} (
+ id INT not null,
+ j JSON not null
+ )
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES("replication_num" = "1");
+ """
+
+ sql """
+ insert into ${srcTable}
values(1,'{"rebookProfit":3.729672759600005773616970827788463793694972991943359375}');
+ """
+
+ sql """
+ insert into ${srcTable}
values(1,'3.729672759600005773616970827788463793694972991943359375');
+ """
+
+ sql """ insert into ${dstTable} select * from ${srcTable} """
+
+ // load the json data from csv file
+ streamLoad {
+ table dstTable
+
+ file dataFile // import csv file
+ time 10000 // limit inflight 10s
+ set 'strict_mode', 'true'
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(2, json.NumberTotalRows)
+ assertEquals(2, json.NumberLoadedRows)
+ assertTrue(json.LoadBytes > 0)
+ log.info("url: " + json.ErrorURL)
+ }
+ }
+
+ qt_sql_select_src """ select jsonb_extract(v, '\$.rebookProfit') from
${srcTable} """
+ qt_sql_select_dst """ select * from ${dstTable} """
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]