This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8db0cd995fdc9fa615e094a68f2a6c20668b8e2a
Author: bobhan1 <[email protected]>
AuthorDate: Wed Aug 23 14:21:22 2023 +0800

    [enhancement](bitmap)support bitmap type for non-key  column in unique 
table (#23228)
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |  4 +-
 .../sql-manual/sql-reference/Data-Types/BITMAP.md  |  4 +-
 .../sql-manual/sql-reference/Data-Types/BITMAP.md  | 17 ++++---
 .../org/apache/doris/analysis/CreateTableStmt.java | 18 +++----
 .../org/apache/doris/catalog/AggregateType.java    |  1 -
 .../apache/doris/analysis/CreateTableStmtTest.java | 17 ++-----
 .../data_model_p0/unique/test_unique_bitmap.out    | 21 +++++++++
 .../data_model_p0/unique/test_unique_bitmap.groovy | 55 ++++++++++++++++++++++
 8 files changed, 106 insertions(+), 31 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 270eb98995..681934b8ad 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -154,7 +154,9 @@ Status SegmentWriter::init(const std::vector<uint32_t>& 
col_ids, bool has_key,
 
         // now we create zone map for key columns in AGG_KEYS or all column in 
UNIQUE_KEYS or DUP_KEYS
         // and not support zone map for array type and jsonb type.
-        opts.need_zone_map = column.is_key() || _tablet_schema->keys_type() != 
KeysType::AGG_KEYS;
+        opts.need_zone_map =
+                (column.is_key() || _tablet_schema->keys_type() != 
KeysType::AGG_KEYS) &&
+                column.type() != FieldType::OLAP_FIELD_TYPE_OBJECT;
         opts.need_bloom_filter = column.is_bf_column();
         auto* tablet_index = 
_tablet_schema->get_ngram_bf_index(column.unique_id());
         if (tablet_index) {
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md 
b/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
index a96026ac60..81ec1bae79 100644
--- a/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
@@ -28,7 +28,9 @@ under the License.
 ### Description
 BITMAP
 
-BITMAP cannot be used as a key column, and the aggregation type is 
BITMAP_UNION when building the table.
+The columns of the BITMAP type can be used in Aggregate tables or Unique 
tables.
+When used in a Unique table, they must be used as non-key columns.
+When used in an Aggregate table, they must be used as non-key columns, and the 
aggregation type is BITMAP_UNION when building the table.
 The user does not need to specify the length and default value. The length is 
controlled within the system according to the degree of data aggregation.
 And the BITMAP column can only be queried or used by supporting functions such 
as bitmap_union_count, bitmap_union, bitmap_hash and bitmap_hash64.
     
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md 
b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
index 02604de472..dee7f760cc 100644
--- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
@@ -26,13 +26,16 @@ under the License.
 
 ## BITMAP
 ### description
-    BITMAP
-    BITMAP不能作为key列使用,建表时配合聚合类型为BITMAP_UNION。
-    用户不需要指定长度和默认值。长度根据数据的聚合程度系统内控制。
-    
并且BITMAP列只能通过配套的bitmap_union_count、bitmap_union、bitmap_hash、bitmap_hash64等函数进行查询或使用。
-    
-    离线场景下使用BITMAP会影响导入速度,在数据量大的情况下查询速度会慢于HLL,并优于Count Distinct。
-    
注意:实时场景下BITMAP如果不使用全局字典,使用了bitmap_hash()可能会导致有千分之一左右的误差。如果这个误差不可接受,可以使用bitmap_hash64。
+BITMAP
+
+BITMAP类型的列可以在Aggregate表或Unique表中使用。
+在Unique表中使用时,其必须作为非key列使用。
+在Aggregate表中使用时,其必须作为非key列使用,且建表时配合的聚合类型为BITMAP_UNION。
+用户不需要指定长度和默认值。长度根据数据的聚合程度系统内控制。
+并且BITMAP列只能通过配套的bitmap_union_count、bitmap_union、bitmap_hash、bitmap_hash64等函数进行查询或使用。
+
+离线场景下使用BITMAP会影响导入速度,在数据量大的情况下查询速度会慢于HLL,并优于Count Distinct。
+注意:实时场景下BITMAP如果不使用全局字典,使用了bitmap_hash()可能会导致有千分之一左右的误差。如果这个误差不可接受,可以使用bitmap_hash64。
 
 ### example
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
index 3192b37038..b1df8498b8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
@@ -491,8 +491,6 @@ public class CreateTableStmt extends DdlStmt {
                 
columnDefs.add(ColumnDef.newVersionColumnDef(AggregateType.REPLACE));
             }
         }
-        boolean hasObjectStored = false;
-        String objectStoredColumn = "";
         Set<String> columnSet = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER);
         for (ColumnDef columnDef : columnDefs) {
             columnDef.analyze(engineName.equals("olap"));
@@ -519,8 +517,16 @@ public class CreateTableStmt extends DdlStmt {
             }
 
             if (columnDef.getType().isObjectStored()) {
-                hasObjectStored = true;
-                objectStoredColumn = columnDef.getName();
+                if (columnDef.getType().isBitmapType()) {
+                    if (keysDesc.getKeysType() == KeysType.DUP_KEYS) {
+                        throw new AnalysisException("column:" + 
columnDef.getName()
+                                + " must be used in AGG_KEYS or UNIQUE_KEYS.");
+                    }
+                } else {
+                    if (keysDesc.getKeysType() != KeysType.AGG_KEYS) {
+                        throw new AnalysisException("column:" + 
columnDef.getName() + " must be used in AGG_KEYS.");
+                    }
+                }
             }
 
             if (!columnSet.add(columnDef.getName())) {
@@ -528,10 +534,6 @@ public class CreateTableStmt extends DdlStmt {
             }
         }
 
-        if (hasObjectStored && keysDesc.getKeysType() != KeysType.AGG_KEYS) {
-            throw new AnalysisException("column:" + objectStoredColumn + " 
must be used in AGG_KEYS.");
-        }
-
         if (engineName.equals("olap")) {
             // before analyzing partition, handle the replication allocation 
info
             properties = rewriteReplicaAllocationProperties(properties);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateType.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateType.java
index d58330b599..1d6b862869 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateType.java
@@ -102,7 +102,6 @@ public enum AggregateType {
         // all types except object stored column type, such as bitmap hll
         // quantile_state.
         EnumSet<PrimitiveType> excObjectStored = 
EnumSet.allOf(PrimitiveType.class);
-        excObjectStored.remove(PrimitiveType.BITMAP);
         excObjectStored.remove(PrimitiveType.HLL);
         excObjectStored.remove(PrimitiveType.QUANTILE_STATE);
         excObjectStored.remove(PrimitiveType.AGG_STATE);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableStmtTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableStmtTest.java
index 0c0eec2c53..b7bdfed3ff 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableStmtTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableStmtTest.java
@@ -329,21 +329,12 @@ public class CreateTableStmtTest {
     }
 
     @Test
-    public void testBmpHllNoAggTab() throws Exception {
-        ColumnDef bitmap = new ColumnDef("col3", new 
TypeDef(ScalarType.createType(PrimitiveType.BITMAP)));
-        cols.add(bitmap);
-        CreateTableStmt stmt = new CreateTableStmt(false, false, tblNameNoDb, 
cols, "olap",
-                new KeysDesc(KeysType.DUP_KEYS, colsName), null, new 
RandomDistributionDesc(10), null, null, "");
-        expectedEx.expect(AnalysisException.class);
-        expectedEx.expectMessage(
-                "Aggregate type `col3` bitmap NONE NOT NULL COMMENT \"\" is 
not compatible with primitive type bitmap");
-        stmt.analyze(analyzer);
-
-        cols.remove(bitmap);
+    public void testHllNoAggTab() throws Exception {
         ColumnDef hll = new ColumnDef("col3", new 
TypeDef(ScalarType.createType(PrimitiveType.HLL)));
         cols.add(hll);
-        stmt = new CreateTableStmt(false, false, tblNameNoDb, cols, "olap", 
new KeysDesc(KeysType.DUP_KEYS, colsName),
-                null, new RandomDistributionDesc(10), null, null, "");
+        CreateTableStmt stmt = new CreateTableStmt(false, false, tblNameNoDb, 
cols, "olap",
+                        new KeysDesc(KeysType.DUP_KEYS, colsName), null, new 
RandomDistributionDesc(10),
+                                        null, null, "");
         expectedEx.expect(AnalysisException.class);
         expectedEx.expectMessage(
                 "Aggregate type `col3` hll NONE NOT NULL COMMENT \"\" is not 
compatible with primitive type hll");
diff --git a/regression-test/data/data_model_p0/unique/test_unique_bitmap.out 
b/regression-test/data/data_model_p0/unique/test_unique_bitmap.out
new file mode 100644
index 0000000000..85f559e238
--- /dev/null
+++ b/regression-test/data/data_model_p0/unique/test_unique_bitmap.out
@@ -0,0 +1,21 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      1       1
+2      2       3,1000
+3      3       999,1000,888888
+
+-- !sql --
+1      4       5,90,876,1000
+2      2       3,1000
+3      8       0,1,2,3,5,99,876,2445
+
+-- !sql --
+1      1       1
+2      2       3,1000
+3      3       999,1000,888888
+
+-- !sql --
+1      4       5,90,876,1000
+2      2       3,1000
+3      8       0,1,2,3,5,99,876,2445
+
diff --git 
a/regression-test/suites/data_model_p0/unique/test_unique_bitmap.groovy 
b/regression-test/suites/data_model_p0/unique/test_unique_bitmap.groovy
new file mode 100644
index 0000000000..64cf809eea
--- /dev/null
+++ b/regression-test/suites/data_model_p0/unique/test_unique_bitmap.groovy
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_unique_table_bitmap") {
+    def tbName = "test_uniq_table_bitmap1"
+    sql "DROP TABLE IF EXISTS ${tbName}"
+    sql """
+            CREATE TABLE IF NOT EXISTS ${tbName} (
+                k int,
+                id_bitmap bitmap
+            ) UNIQUE KEY(k)
+            DISTRIBUTED BY HASH(k) BUCKETS 1 properties("replication_num" = 
"1");
+        """
+    sql "insert into ${tbName} values(1,to_bitmap(1));"
+    sql "insert into ${tbName} 
values(2,bitmap_or(to_bitmap(3),to_bitmap(1000)));"
+    sql "insert into ${tbName} 
values(3,bitmap_or(to_bitmap(999),to_bitmap(1000),to_bitmap(888888)));"
+    qt_sql "select k,bitmap_count(id_bitmap),bitmap_to_string(id_bitmap) from 
${tbName} order by k;"
+    sql "insert into ${tbName} 
values(3,bitmap_from_string('1,0,1,2,3,1,5,99,876,2445'));"
+    sql "insert into ${tbName} 
values(1,bitmap_or(bitmap_from_string('90,5,876'),to_bitmap(1000)));"
+    qt_sql "select k,bitmap_count(id_bitmap),bitmap_to_string(id_bitmap) from 
${tbName} order by k;"
+    sql "DROP TABLE ${tbName};"
+
+    def tbName2 = "test_uniq_table_bitmap2"
+    sql "DROP TABLE IF EXISTS ${tbName2}"
+    sql """
+            CREATE TABLE IF NOT EXISTS ${tbName2} (
+                k int,
+                id_bitmap bitmap
+            ) UNIQUE KEY(k)
+            DISTRIBUTED BY HASH(k) BUCKETS 1 
+            properties("replication_num" = "1", 
"enable_unique_key_merge_on_write" = "true");
+        """
+    sql "insert into ${tbName2} values(1,to_bitmap(1));"
+    sql "insert into ${tbName2} 
values(2,bitmap_or(to_bitmap(3),to_bitmap(1000)));"
+    sql "insert into ${tbName2} 
values(3,bitmap_or(to_bitmap(999),to_bitmap(1000),to_bitmap(888888)));"
+    qt_sql "select k,bitmap_count(id_bitmap),bitmap_to_string(id_bitmap) from 
${tbName2} order by k;"
+    sql "insert into ${tbName2} 
values(3,bitmap_from_string('1,0,1,2,3,1,5,99,876,2445'));"
+    sql "insert into ${tbName2} 
values(1,bitmap_or(bitmap_from_string('90,5,876'),to_bitmap(1000)));"
+    qt_sql "select k,bitmap_count(id_bitmap),bitmap_to_string(id_bitmap) from 
${tbName2} order by k;"
+    sql "DROP TABLE ${tbName2};"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to