This is an automated email from the ASF dual-hosted git repository.

hellostephen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5e603dde58c [check](column) Check that the values in ColumnBool are 
only 0 or 1. (#59707)
5e603dde58c is described below

commit 5e603dde58ccf5fb662788fc93a7e538ff76e63c
Author: Mryange <[email protected]>
AuthorDate: Tue Jan 13 20:28:15 2026 +0800

    [check](column) Check that the values in ColumnBool are only 0 or 1. 
(#59707)
---
 be/src/vec/columns/column.cpp             | 47 +++++++++++++++++++++++++++++++
 be/src/vec/columns/column.h               |  3 ++
 be/test/vec/columns/column_self_check.cpp | 27 ++++++++++++++++++
 3 files changed, 77 insertions(+)

diff --git a/be/src/vec/columns/column.cpp b/be/src/vec/columns/column.cpp
index f6f595fc6cd..b206fc17034 100644
--- a/be/src/vec/columns/column.cpp
+++ b/be/src/vec/columns/column.cpp
@@ -62,6 +62,49 @@ bool IColumn::const_nested_check() const {
     return const_cnt == 1 && is_column_const(*this);
 }
 
+bool IColumn::column_boolean_check() const {
+    if (const auto* col_nullable = 
check_and_get_column<ColumnNullable>(*this)) {
+        // for column nullable, we need to skip null values check
+        const auto& nested_col = col_nullable->get_nested_column();
+        const auto& null_map = col_nullable->get_null_map_data();
+        Filter not_null_filter;
+        not_null_filter.reserve(nested_col.size());
+        size_t result_size_hint = 0;
+        for (size_t i = 0; i < null_map.size(); ++i) {
+            not_null_filter.push_back(null_map[i] == 0);
+            if (null_map[i] == 0) {
+                ++result_size_hint;
+            }
+        }
+        auto nested_col_skip_null = nested_col.filter(not_null_filter, 
result_size_hint);
+        return nested_col_skip_null->column_boolean_check();
+    }
+
+    auto check_boolean_is_zero_or_one = [&](const IColumn& subcolumn) {
+        if (const auto* column_boolean = 
check_and_get_column<ColumnBool>(subcolumn)) {
+            for (size_t i = 0; i < column_boolean->size(); ++i) {
+                auto val = column_boolean->get_element(i);
+                if (val != 0 && val != 1) {
+                    LOG_WARNING("column boolean check failed at index {} with 
value {}", i, val)
+                            .tag("column structure", 
subcolumn.dump_structure());
+                    return false;
+                }
+            }
+        }
+        return true;
+    };
+
+    bool is_valid = check_boolean_is_zero_or_one(*this);
+    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
+        if (!subcolumn->column_boolean_check()) {
+            is_valid = false;
+        }
+    };
+    // simply read using for_each_subcolumn without modification; const_cast 
can be used.
+    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
+    return is_valid;
+}
+
 bool IColumn::null_map_check() const {
     auto check_null_map_is_zero_or_one = [&](const IColumn& subcolumn) {
         if (is_column_nullable(subcolumn)) {
@@ -100,6 +143,10 @@ Status IColumn::column_self_check() const {
     if (!null_map_check()) {
         return Status::InternalError("null map check failed for column: {}", 
get_name());
     }
+    // check boolean column
+    if (!column_boolean_check()) {
+        return Status::InternalError("boolean column check failed for column: 
{}", get_name());
+    }
 #endif
     return Status::OK();
 }
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 99653bd4acf..e162ee49516 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -679,6 +679,9 @@ public:
     //  const(array(const(...))) is not allowed
     bool const_nested_check() const;
 
+    // column boolean check, only allow 0 and 1
+    bool column_boolean_check() const;
+
     Status column_self_check() const;
 
     // only used in agg value replace for column which is not variable length, 
eg.BlockReader::_copy_value_data
diff --git a/be/test/vec/columns/column_self_check.cpp 
b/be/test/vec/columns/column_self_check.cpp
index 13fcfcbf5f0..4a4de32c3f1 100644
--- a/be/test/vec/columns/column_self_check.cpp
+++ b/be/test/vec/columns/column_self_check.cpp
@@ -120,4 +120,31 @@ TEST(ColumnSelfCheckTest, nullmap_check_test) {
     }
 }
 
+TEST(ColumnSelfCheckTest, boolean_check) {
+    {
+        auto column_bool = ColumnHelper::create_column<DataTypeUInt8>({0, 1, 
0, 1, 1});
+        EXPECT_EQ(column_bool->column_boolean_check(), true);
+    }
+    {
+        auto column_bool = ColumnHelper::create_column<DataTypeUInt8>({0, 1, 
2, 1, 1});
+        EXPECT_EQ(column_bool->column_boolean_check(), false);
+    }
+
+    {
+        auto column_nullable_bool = 
ColumnHelper::create_nullable_column<DataTypeUInt8>(
+                {0, 1, 0, 1, 1}, {0, 0, 0, 0, 0});
+        EXPECT_EQ(column_nullable_bool->column_boolean_check(), true);
+    }
+    {
+        auto column_nullable_bool = 
ColumnHelper::create_nullable_column<DataTypeUInt8>(
+                {0, 1, 2, 1, 1}, {0, 0, 0, 0, 0});
+        EXPECT_EQ(column_nullable_bool->column_boolean_check(), false);
+    }
+
+    {
+        auto column_nullable_bool = 
ColumnHelper::create_nullable_column<DataTypeUInt8>(
+                {0, 1, 2, 1, 1}, {0, 0, 1, 0, 0});
+        EXPECT_EQ(column_nullable_bool->column_boolean_check(), true);
+    }
+}
 } // namespace doris::vectorized
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to