This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 675b5f17525 [fix](load)fix load json format boolean type to int 
column. (#54397)
675b5f17525 is described below

commit 675b5f175253d412182612e37fcc07c699509674
Author: daidai <[email protected]>
AuthorDate: Wed Aug 13 06:51:04 2025 +0800

    [fix](load)fix load json format boolean type to int column. (#54397)
    
    ### What problem does this PR solve?
    
    Related PR: #43469
    Problem Summary:
    PR #43469 accidentally removed the logic for reading boolean
    "true"/"false" values in the simd join reader. Before PR #43469,
    "true"/"false" were treated as "1"/"0", allowing a bool column in a JSON
    file to be imported into an Doris int column. This PR restores this
    logic.
---
 be/src/vec/exec/format/json/new_json_reader.cpp    |  11 ++++++++
 .../data/load_p0/stream_load/test_json_load.out    | Bin 5107 -> 5185 bytes
 .../stream_load/test_read_boolean_to_int.json      |   6 +++++
 .../load_p0/stream_load/test_json_load.groovy      |  29 +++++++++++++++++++++
 4 files changed, 46 insertions(+)

diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index fc2e23eb685..e49100f82a1 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1057,6 +1057,17 @@ Status 
NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value&
             
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
                                                                        
_serde_options));
 
+        } else if (value.type() == simdjson::ondemand::json_type::boolean) {
+            const char* str_value = nullptr;
+            // insert "1"/"0" , not "true"/"false".
+            if (value.get_bool()) {
+                str_value = (char*)"1";
+            } else {
+                str_value = (char*)"0";
+            }
+            Slice slice {str_value, 1};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
         } else {
             // Maybe we can `switch (value->GetType()) case: kNumberType`.
             // Note that `if (value->IsInt())`, but column is FloatColumn.
diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out 
b/regression-test/data/load_p0/stream_load/test_json_load.out
index 3ef9ecb5be9..b2cf149e12b 100644
Binary files a/regression-test/data/load_p0/stream_load/test_json_load.out and 
b/regression-test/data/load_p0/stream_load/test_json_load.out differ
diff --git 
a/regression-test/data/load_p0/stream_load/test_read_boolean_to_int.json 
b/regression-test/data/load_p0/stream_load/test_read_boolean_to_int.json
new file mode 100644
index 00000000000..62ff27be0bf
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_read_boolean_to_int.json
@@ -0,0 +1,6 @@
+[
+{"id":1,"k1":true,"k2":true,"k3":true,"k4":true},
+{"id":2,"k1":false,"k2":false,"k3":false,"k4":false},
+{"id":3,"k1":100,"k2":100,"k3":100,"k4":100},
+{"id":4,"k1":null,"k2":null,"k3":null,"k4":null}
+]
\ No newline at end of file
diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
index 128d8fb6b0f..3f66745d941 100644
--- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
@@ -943,4 +943,33 @@ suite("test_json_load", "p0,nonConcurrent") {
     } finally {
         // try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
+
+    // try to load  `boolean` => `tinyint, int , string, decimal`
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+        sql """CREATE TABLE IF NOT EXISTS ${testTable} 
+            (
+                `id` int,
+                `k1` tinyint NULL,
+                `k2` int NULL,
+                `k3` string NULL,
+                `k4` decimal(10,2) NULL
+            )
+            DUPLICATE KEY(`id`)
+            COMMENT ''
+            DISTRIBUTED BY RANDOM BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            );"""
+
+
+        load_json_data.call("${testTable}", "${testTable}_case31", 'true', '', 
'json', '', '',
+                             '', '', '', 'test_read_boolean_to_int.json')
+        
+        sql "sync"
+        qt_select31 "select * from ${testTable} order by id"
+
+    } finally {
+        // try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to