This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 853033615ad [Bug](bits) fix wrong result of count_zero_num with 
nullmap (#56113)
853033615ad is described below

commit 853033615adad33ba2916c3844316e0ab6964b91
Author: Pxl <[email protected]>
AuthorDate: Wed Sep 17 11:17:32 2025 +0800

    [Bug](bits) fix wrong result of count_zero_num with nullmap (#56113)
    
    fix wrong result of count_zero_num with nullmap
---
 be/src/util/simd/bits.h                            |   2 +-
 be/test/util/bit_util_test.cpp                     |  93 +++++++++++++++
 .../test_conditional_function.out                  | Bin 3048 -> 3303 bytes
 .../sql_functions/conditional_functions/data.txt   |   1 +
 .../test_conditional_function.groovy               | 128 +++++++++++++++++++++
 5 files changed, 223 insertions(+), 1 deletion(-)

diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h
index 6c0d456254b..186e43746d2 100644
--- a/be/src/util/simd/bits.h
+++ b/be/src/util/simd/bits.h
@@ -164,7 +164,7 @@ inline T count_zero_num(const int8_t* __restrict data, 
const uint8_t* __restrict
     const __m128i zero16 = _mm_setzero_si128();
     const int8_t* end64 = data + (size / 64 * 64);
 
-    for (; data < end64; data += 64) {
+    for (; data < end64; data += 64, null_map += 64) {
         num += __builtin_popcountll(
                 static_cast<uint64_t>(_mm_movemask_epi8(_mm_or_si128(
                         _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const 
__m128i*>(data)),
diff --git a/be/test/util/bit_util_test.cpp b/be/test/util/bit_util_test.cpp
index f355b6a1c04..12345d03417 100644
--- a/be/test/util/bit_util_test.cpp
+++ b/be/test/util/bit_util_test.cpp
@@ -19,11 +19,16 @@
 
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
 
 #include <bit>
 #include <boost/utility/binary.hpp>
+#include <random>
 
 #include "gtest/gtest_pred_impl.h"
+#include "runtime/primitive_type.h"
+#include "util/simd/bits.h"
+#include "vec/columns/column_nullable.h"
 #include "vec/common/endian.h"
 
 namespace doris {
@@ -56,4 +61,92 @@ TEST(BitUtil, BigEndianToHost) {
                       wide::UInt256(0xf0debc9a78563412) << 64 | 
wide::UInt256(0xf0debc9a78563412));
 }
 
+void insert_true(vectorized::ColumnNullable* column, size_t num = 1) {
+    for (int i = 0; i < num; i++) {
+        
assert_cast<vectorized::ColumnUInt8*>(column->get_nested_column_ptr().get())
+                ->insert_value(1);
+        column->push_false_to_nullmap(1);
+    }
+}
+
+void insert_false(vectorized::ColumnNullable* column, size_t num = 1) {
+    for (int i = 0; i < num; i++) {
+        
assert_cast<vectorized::ColumnUInt8*>(column->get_nested_column_ptr().get())
+                ->insert_value(0);
+        column->push_false_to_nullmap(1);
+    }
+}
+
+void insert_null(vectorized::ColumnNullable* column, size_t num = 1) {
+    for (int i = 0; i < num; i++) {
+        column->insert_default();
+    }
+}
+
+size_t brute_force_count_zero_num(const uint8_t* __restrict data,
+                                  const uint8_t* __restrict null_map, size_t 
size) {
+    size_t num = 0;
+    for (size_t i = 0; i < size; ++i) {
+        if (data[i] == 0 || null_map[i]) {
+            num++;
+        }
+    }
+    return num;
+}
+
+TEST(BitUtil, CountZero) {
+    {
+        auto column = 
vectorized::ColumnNullable::create(vectorized::ColumnUInt8::create(),
+                                                         
vectorized::ColumnUInt8::create());
+        insert_false(column.get(), 5);
+        insert_null(column.get(), 1);
+        insert_false(column.get(), 8);
+        insert_null(column.get(), 1);
+        insert_false(column.get(), 54);
+        insert_true(column.get(), 1);
+        insert_false(column.get(), 14);
+        ASSERT_EQ(
+                brute_force_count_zero_num(assert_cast<const 
vectorized::ColumnUInt8*>(
+                                                   
column->get_nested_column_ptr().get())
+                                                   ->get_data()
+                                                   .data(),
+                                           column->get_null_map_data().data(), 
column->size()),
+                simd::count_zero_num((int8_t*)assert_cast<const 
vectorized::ColumnUInt8*>(
+                                             
column->get_nested_column_ptr().get())
+                                             ->get_data()
+                                             .data(),
+                                     column->get_null_map_data().data(), 
(uint32_t)column->size()));
+    }
+
+    {
+        auto column = 
vectorized::ColumnNullable::create(vectorized::ColumnUInt8::create(),
+                                                         
vectorized::ColumnUInt8::create());
+        std::mt19937 rng(12345);
+        std::uniform_int_distribution<int> val_dist(0, 1);
+        std::uniform_int_distribution<int> null_dist(0, 5);
+        for (int i = 0; i < 10000; ++i) {
+            if (null_dist(rng) == 0) {
+                insert_null(column.get(), 1);
+            } else {
+                if (val_dist(rng) == 0) {
+                    insert_false(column.get(), 1);
+                } else {
+                    insert_true(column.get(), 1);
+                }
+            }
+        }
+        ASSERT_EQ(
+                brute_force_count_zero_num(assert_cast<const 
vectorized::ColumnUInt8*>(
+                                                   
column->get_nested_column_ptr().get())
+                                                   ->get_data()
+                                                   .data(),
+                                           column->get_null_map_data().data(), 
column->size()),
+                simd::count_zero_num((int8_t*)assert_cast<const 
vectorized::ColumnUInt8*>(
+                                             
column->get_nested_column_ptr().get())
+                                             ->get_data()
+                                             .data(),
+                                     column->get_null_map_data().data(), 
(uint32_t)column->size()));
+    }
+}
+
 } // namespace doris
diff --git 
a/regression-test/data/query_p0/sql_functions/conditional_functions/test_conditional_function.out
 
b/regression-test/data/query_p0/sql_functions/conditional_functions/test_conditional_function.out
index 417f851a067..83a491bf27e 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/conditional_functions/test_conditional_function.out
 and 
b/regression-test/data/query_p0/sql_functions/conditional_functions/test_conditional_function.out
 differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/conditional_functions/data.txt 
b/regression-test/suites/query_p0/sql_functions/conditional_functions/data.txt
new file mode 100644
index 00000000000..1501e0a3165
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/conditional_functions/data.txt
@@ -0,0 +1 @@
+insert into 
table_800_undef_partitions2_keys3_properties4_distributed_by524(pk,col_boolean_undef_signed,col_boolean_undef_signed_not_null,col_tinyint_undef_signed,col_tinyint_undef_signed_index_inverted,col_tinyint_undef_signed_not_null,col_tinyint_undef_signed_not_null_index_inverted,col_smallint_undef_signed,col_smallint_undef_signed_index_inverted,col_smallint_undef_signed_not_null,col_smallint_undef_signed_not_null_index_inverted,col_int_undef_signed,col_int_undef_signed_index_invert
 [...]
diff --git 
a/regression-test/suites/query_p0/sql_functions/conditional_functions/test_conditional_function.groovy
 
b/regression-test/suites/query_p0/sql_functions/conditional_functions/test_conditional_function.groovy
index 8f1b659588c..bee3b53f95f 100644
--- 
a/regression-test/suites/query_p0/sql_functions/conditional_functions/test_conditional_function.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/conditional_functions/test_conditional_function.groovy
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+import groovy.io.FileType
+import java.nio.file.Files
+import java.nio.file.Paths
+
 suite("test_conditional_function") {
     sql "set batch_size = 4096;"
 
@@ -207,4 +211,128 @@ insert into 
table_50_undef_partitions2_keys3_properties4_distributed_by54(pk,col
     qt_test """
 SELECT TO_DATE ( table1 . `col_date_undef_signed_not_null` ) AS field1, MAX( 
distinct table1 . `col_int_undef_signed_not_null` ) AS field2, ( TO_DATE (CASE 
table1 . col_date_undef_signed_not_null WHEN table1 . 
col_date_undef_signed_not_null THEN DATE_ADD( table1 . 
`col_date_undef_signed_not_null` , INTERVAL 3 YEAR ) WHEN table1 . 
col_date_undef_signed THEN '2024-01-31' WHEN '2025-02-18' THEN '2024-02-18' 
WHEN '2008-09-25' THEN DATE_SUB( table1 . `col_date_undef_signed` , INTERVAL 7 
DAY ) [...]
     """
+
+    sql "drop table if exists 
table_800_undef_partitions2_keys3_properties4_distributed_by524;"
+    sql """
+create table table_800_undef_partitions2_keys3_properties4_distributed_by524 (
+pk int,
+col_int_undef_signed_index_inverted int  null  ,
+col_date_undef_signed_not_null date  not null  ,
+col_varchar_1024__undef_signed varchar(1024)  null  ,
+col_boolean_undef_signed boolean  null  ,
+col_boolean_undef_signed_not_null boolean  not null  ,
+col_tinyint_undef_signed tinyint  null  ,
+col_tinyint_undef_signed_index_inverted tinyint  null  ,
+col_tinyint_undef_signed_not_null tinyint  not null  ,
+col_tinyint_undef_signed_not_null_index_inverted tinyint  not null  ,
+col_smallint_undef_signed smallint  null  ,
+col_smallint_undef_signed_index_inverted smallint  null  ,
+col_smallint_undef_signed_not_null smallint  not null  ,
+col_smallint_undef_signed_not_null_index_inverted smallint  not null  ,
+col_int_undef_signed int  null  ,
+col_int_undef_signed_not_null int  not null  ,
+col_int_undef_signed_not_null_index_inverted int  not null  ,
+col_bigint_undef_signed bigint  null  ,
+col_bigint_undef_signed_index_inverted bigint  null  ,
+col_bigint_undef_signed_not_null bigint  not null  ,
+col_bigint_undef_signed_not_null_index_inverted bigint  not null  ,
+col_decimal_16__8__undef_signed decimal(16, 8)  null  ,
+col_decimal_16__8__undef_signed_index_inverted decimal(16, 8)  null  ,
+col_decimal_16__8__undef_signed_not_null decimal(16, 8)  not null  ,
+col_decimal_16__8__undef_signed_not_null_index_inverted decimal(16, 8)  not 
null  ,
+col_decimal_38__9__undef_signed decimal(38, 9)  null  ,
+col_decimal_38__9__undef_signed_index_inverted decimal(38, 9)  null  ,
+col_decimal_38__9__undef_signed_not_null decimal(38, 9)  not null  ,
+col_decimal_38__9__undef_signed_not_null_index_inverted decimal(38, 9)  not 
null  ,
+col_decimal_38__30__undef_signed decimal(38, 30)  null  ,
+col_decimal_38__30__undef_signed_index_inverted decimal(38, 30)  null  ,
+col_decimal_38__30__undef_signed_not_null decimal(38, 30)  not null  ,
+col_decimal_38__30__undef_signed_not_null_index_inverted decimal(38, 30)  not 
null  ,
+col_date_undef_signed date  null  ,
+col_date_undef_signed_index_inverted date  null  ,
+col_date_undef_signed_not_null_index_inverted date  not null  ,
+col_datetime_undef_signed datetime  null  ,
+col_datetime_undef_signed_index_inverted datetime  null  ,
+col_datetime_undef_signed_not_null datetime  not null  ,
+col_datetime_undef_signed_not_null_index_inverted datetime  not null  ,
+col_datetime_3__undef_signed datetime(3)  null  ,
+col_datetime_3__undef_signed_index_inverted datetime(3)  null  ,
+col_datetime_3__undef_signed_not_null datetime(3)  not null  ,
+col_datetime_3__undef_signed_not_null_index_inverted datetime(3)  not null  ,
+col_datetime_6__undef_signed datetime(6)  null  ,
+col_datetime_6__undef_signed_index_inverted datetime(6)  null  ,
+col_datetime_6__undef_signed_not_null datetime(6)  not null  ,
+col_datetime_6__undef_signed_not_null_index_inverted datetime(6)  not null  ,
+col_char_255__undef_signed char(255)  null  ,
+col_char_255__undef_signed_index_inverted char(255)  null  ,
+col_char_255__undef_signed_index_inverted_p_e char(255)  null  ,
+col_char_255__undef_signed_index_inverted_p_u char(255)  null  ,
+col_char_255__undef_signed_not_null char(255)  not null  ,
+col_char_255__undef_signed_not_null_index_inverted char(255)  not null  ,
+col_char_255__undef_signed_not_null_index_inverted_p_e char(255)  not null  ,
+col_char_255__undef_signed_not_null_index_inverted_p_u char(255)  not null  ,
+col_varchar_1024__undef_signed_index_inverted varchar(1024)  null  ,
+col_varchar_1024__undef_signed_index_inverted_p_e varchar(1024)  null  ,
+col_varchar_1024__undef_signed_index_inverted_p_u varchar(1024)  null  ,
+col_varchar_1024__undef_signed_not_null varchar(1024)  not null  ,
+col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)  not null 
 ,
+col_varchar_1024__undef_signed_not_null_index_inverted_p_e varchar(1024)  not 
null  ,
+col_varchar_1024__undef_signed_not_null_index_inverted_p_u varchar(1024)  not 
null  ,
+INDEX col_tinyint_undef_signed_index_inverted_idx 
(`col_tinyint_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_tinyint_undef_signed_not_null_index_inverted_idx 
(`col_tinyint_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_smallint_undef_signed_index_inverted_idx 
(`col_smallint_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_smallint_undef_signed_not_null_index_inverted_idx 
(`col_smallint_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_int_undef_signed_index_inverted_idx 
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_int_undef_signed_not_null_index_inverted_idx 
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_bigint_undef_signed_index_inverted_idx 
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_bigint_undef_signed_not_null_index_inverted_idx 
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_decimal_16__8__undef_signed_index_inverted_idx 
(`col_decimal_16__8__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_decimal_16__8__undef_signed_not_null_index_inverted_idx 
(`col_decimal_16__8__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_decimal_38__9__undef_signed_index_inverted_idx 
(`col_decimal_38__9__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_decimal_38__9__undef_signed_not_null_index_inverted_idx 
(`col_decimal_38__9__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_decimal_38__30__undef_signed_index_inverted_idx 
(`col_decimal_38__30__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_decimal_38__30__undef_signed_not_null_index_inverted_idx 
(`col_decimal_38__30__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_date_undef_signed_index_inverted_idx 
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_date_undef_signed_not_null_index_inverted_idx 
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_datetime_undef_signed_index_inverted_idx 
(`col_datetime_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_datetime_undef_signed_not_null_index_inverted_idx 
(`col_datetime_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_datetime_3__undef_signed_index_inverted_idx 
(`col_datetime_3__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_datetime_3__undef_signed_not_null_index_inverted_idx 
(`col_datetime_3__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_datetime_6__undef_signed_index_inverted_idx 
(`col_datetime_6__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_datetime_6__undef_signed_not_null_index_inverted_idx 
(`col_datetime_6__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_char_255__undef_signed_index_inverted_idx 
(`col_char_255__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_char_255__undef_signed_index_inverted_p_e_idx 
(`col_char_255__undef_signed_index_inverted_p_e`) USING INVERTED 
PROPERTIES("parser" = "english"),
+INDEX col_char_255__undef_signed_index_inverted_p_u_idx 
(`col_char_255__undef_signed_index_inverted_p_u`) USING INVERTED 
PROPERTIES("parser" = "unicode"),
+INDEX col_char_255__undef_signed_not_null_index_inverted_idx 
(`col_char_255__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_char_255__undef_signed_not_null_index_inverted_p_e_idx 
(`col_char_255__undef_signed_not_null_index_inverted_p_e`) USING INVERTED 
PROPERTIES("parser" = "english"),
+INDEX col_char_255__undef_signed_not_null_index_inverted_p_u_idx 
(`col_char_255__undef_signed_not_null_index_inverted_p_u`) USING INVERTED 
PROPERTIES("parser" = "unicode"),
+INDEX col_varchar_1024__undef_signed_index_inverted_idx 
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_varchar_1024__undef_signed_index_inverted_p_e_idx 
(`col_varchar_1024__undef_signed_index_inverted_p_e`) USING INVERTED 
PROPERTIES("parser" = "english"),
+INDEX col_varchar_1024__undef_signed_index_inverted_p_u_idx 
(`col_varchar_1024__undef_signed_index_inverted_p_u`) USING INVERTED 
PROPERTIES("parser" = "unicode"),
+INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_varchar_1024__undef_signed_not_null_index_inverted_p_e_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted_p_e`) USING INVERTED 
PROPERTIES("parser" = "english"),
+INDEX col_varchar_1024__undef_signed_not_null_index_inverted_p_u_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted_p_u`) USING INVERTED 
PROPERTIES("parser" = "unicode")
+) engine=olap
+UNIQUE KEY(pk, col_int_undef_signed_index_inverted, 
col_date_undef_signed_not_null, col_varchar_1024__undef_signed)
+distributed by hash(pk) buckets 10
+properties("bloom_filter_columns" = "col_int_undef_signed, 
col_int_undef_signed_not_null, col_date_undef_signed_not_null, 
col_varchar_1024__undef_signed, col_varchar_1024__undef_signed_not_null", 
"replication_num" = "1");
+    """
+    def sqlFile = new File(context.file.parent+'/data.txt')
+    sql """$sqlFile.text"""
+
+    qt_test """
+SELECT
+    col_date_undef_signed
+FROM
+    table_800_undef_partitions2_keys3_properties4_distributed_by524
+where
+    (
+        case
+            col_date_undef_signed
+            when "2024-01-09" then 1
+            when "2023-12-10" then 2
+            else 0
+        end
+    ) = 1;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to