Re: [PR] [Feature](Row store) support column group with store row format for partial columns of table [doris]

via GitHub Fri, 07 Jun 2024 01:09:39 -0700


xiaokang commented on code in PR #34089:
URL: https://github.com/apache/doris/pull/34089#discussion_r1630710541



##########
fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java:
##########
@@ -84,6 +85,8 @@ public class PropertyAnalyzer {
     public static final String PROPERTIES_BF_COLUMNS = "bloom_filter_columns";
     public static final String PROPERTIES_BF_FPP = "bloom_filter_fpp";
 
+    public static final String PROPERTIES_COLUMN_GROUPS = "column_groups";

Review Comment:
   useless



##########
fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java:
##########
@@ -446,7 +447,17 @@ public void analyze(Analyzer analyzer) throws 
UserException {
             }
         }
         // add a hidden column as row store
-        if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new 
HashMap<>(properties))) {
+        List<String> rsColumns = Lists.newArrayList();
+        if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new 
HashMap<>(properties), rsColumns, true)) {
+            // check columns in column def
+            Optional<String> invalidColumn = rsColumns.stream()

Review Comment:
   get all invalid columns and report error once for user.



##########
fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java:
##########
@@ -777,14 +781,25 @@ public static Boolean analyzeStoreRowColumn(Map<String, 
String> properties) thro
         if (null == value) {
             return false;
         }
-        properties.remove(PROPERTIES_STORE_ROW_COLUMN);
+        if (stripProperty) {
+            properties.remove(PROPERTIES_STORE_ROW_COLUMN);
+        }
         if (value.equalsIgnoreCase("true")) {
             return true;
         } else if (value.equalsIgnoreCase("false")) {
             return false;
         }
-        throw new AnalysisException(PROPERTIES_STORE_ROW_COLUMN
-                + " must be `true` or `false`");
+        if (Strings.isNullOrEmpty(value)) {
+            return false;
+        }
+        if (rsColumns != null) {
+            String[] rsColumnArr = value.split(COMMA_SEPARATOR);
+            rsColumns.addAll(Arrays.asList(rsColumnArr));
+            if (!rsColumns.isEmpty()) {
+                return true;
+            }
+        }
+        throw new AnalysisException(PROPERTIES_STORE_ROW_COLUMN + "must be 
`true`, `false` or set containing columns");

Review Comment:
   the original config name `store_row_column` expose the impl to user. I 
suggest a new solution:
   1. deprecate `store_row_column` but keep it for compatibility
   2. add a new config `enable_row_store` and its value can be `true` or `false`
   3. add a new config `row_store_columns` to set columns. If it's not set and 
`enable_row_store` is set `true`, use all columns. If it's set, use the 
specified columns and set `enable_row_store = true` implicitly.



##########
gensrc/proto/olap_file.proto:
##########
@@ -314,6 +314,8 @@ message ColumnPB {
     optional bool is_auto_increment = 22;
     // only reference by variant sparse columns
     optional int32 parent_unique_id = 23;
+    // column unique ids for row store columns
+    repeated int32 row_store_column_cids = 24;

Review Comment:
   Why set column ids to column meta rather than table meta?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java:
##########
@@ -385,6 +390,25 @@ public void removeInvalidProperties() {
         properties.remove(PropertyAnalyzer.PROPERTIES_COLOCATE_WITH);
     }
 
+    public List<String> getCopiedRowStoreColumns() {
+        if (rowStoreColumns == null) {
+            return null;
+        }
+        return Lists.newArrayList(rowStoreColumns);
+    }
+
+    public List<Integer> getRowStoreColumnsUniqueIds(Map<String, Column> 
nameToColumn) {

Review Comment:
   duplicate method with OlapTable



##########
fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java:
##########
@@ -172,7 +175,13 @@ public TAlterTabletReqV2 toThrift() {
             if (value == null) {
                 List<TColumn> columns = new ArrayList<TColumn>();
                 for (Column column : baseSchemaColumns) {
-                    columns.add(column.toThrift());
+                    TColumn tColumn = column.toThrift();
+                    // set original row store columns
+                    if (column.getName().equalsIgnoreCase(Column.ROW_STORE_COL)
+                            && rowStoreColumnUniqueIds != null) {
+                        tColumn.setRowStoreColCids(rowStoreColumnUniqueIds);

Review Comment:
   Why set column ids to column meta rather than table meta?
   
   



##########
fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java:
##########
@@ -1920,6 +1951,19 @@ public int getAsInt() {
                         }
                         enableLightSchemaChange(db, olapTable);
                         return;
+                    } else if 
(properties.containsKey(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN)) {
+                        String value = 
properties.get(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN);
+                        if (value.equalsIgnoreCase("false")) {
+                            throw new DdlException("Can not alter 
store_row_column from true to false currently");
+                        }
+                        if (!olapTable.storeRowColumn()) {
+                            Column rowColumn = ColumnDefinition
+                                    
.newRowStoreColumnDefinition(null).translateToCatalogStyle();
+                            int maxColUniqueId = olapTable
+                                    
.getIndexMetaByIndexId(olapTable.getBaseIndexId()).getMaxColUniqueId();
+                            rowColumn.setUniqueId(maxColUniqueId + 1);

Review Comment:
   If user add new column after adding row storage, will it be OK?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java:
##########
@@ -340,7 +340,7 @@ private void getColumns(Plan plan) {
         if (properties != null) {
             try {
                 boolean storeRowColumn =
-                        
PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties));
+                        
PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties), null, true);

Review Comment:
   why not get columns



##########
fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java:
##########
@@ -129,6 +129,13 @@ public class SchemaChangeJobV2 extends AlterJobV2 {
     @SerializedName(value = "storageFormat")
     private TStorageFormat storageFormat = TStorageFormat.DEFAULT;
 
+    @SerializedName(value = "rsColumns")

Review Comment:
   use full name rowStoreColumns



##########
regression-test/suites/point_query_p0/test_rowstore.groovy:
##########
@@ -15,7 +15,295 @@
 // specific language governing permissions and limitations
 // under the License.
 
-suite("test_rowstore", "p0") {
+suite("test_rowstore", "p0,nonConcurrent") {
+    def backendId_to_backendIP = [:]
+    def backendId_to_backendHttpPort = [:]
+    getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);
+
+    def set_be_param = { paramName, paramValue ->
+        // for eache be node, set paramName=paramValue
+        for (String id in backendId_to_backendIP.keySet()) {
+                   def beIp = backendId_to_backendIP.get(id)
+                   def bePort = backendId_to_backendHttpPort.get(id)
+                   def (code, out, err) = curl("POST", 
String.format("http://%s:%s/api/update_config?%s=%s";, beIp, bePort, paramName, 
paramValue))
+                   assertTrue(out.contains("OK"))
+           }
+    }
+    set_be_param("enable_short_circuit_query_access_column_store", "false");
+    // Parse url
+    String jdbcUrl = context.config.jdbcUrl
+    def user = context.config.jdbcUser
+    def password = context.config.jdbcPassword
+    String urlWithoutSchema = jdbcUrl.substring(jdbcUrl.indexOf("://") + 3)
+    def sql_ip = urlWithoutSchema.substring(0, urlWithoutSchema.indexOf(":"))
+    def realDb = "regression_test_point_query_p0"
+    def sql_port
+    if (urlWithoutSchema.indexOf("/") >= 0) {
+        // e.g: jdbc:mysql://locahost:8080/?a=b
+        sql_port = urlWithoutSchema.substring(urlWithoutSchema.indexOf(":") + 
1, urlWithoutSchema.indexOf("/"))
+    } else {
+        // e.g: jdbc:mysql://locahost:8080
+        sql_port = urlWithoutSchema.substring(urlWithoutSchema.indexOf(":") + 
1)
+    }
+    def prepare_url = "jdbc:mysql://" + sql_ip + ":" + sql_port + "/" + realDb 
+ "?&useServerPrepStmts=true"
+
+    sql "DROP TABLE IF EXISTS table_with_column_group"
+    sql """
+        CREATE TABLE IF NOT EXISTS table_with_column_group (
+                `k1` int(11) NULL COMMENT "",
+                `v1` text NULL COMMENT "",
+                `v2` bigint NULL COMMENT "",
+                `v3` double NULL COMMENT "",
+                `v4` datetime NULL COMMENT ""
+              ) ENGINE=OLAP
+              UNIQUE KEY(`k1`)
+              DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+              PROPERTIES (
+              "replication_allocation" = "tag.location.default: 1",
+              "store_row_column" = "v1,v2",
+              "enable_unique_key_merge_on_write" = "true",
+              "light_schema_change" = "true",
+              "storage_format" = "V2"
+              )
+    """
+    sql """
+        insert into table_with_column_group values (1, 
"11111111111111111111111111111111111111", 3, 4.0, '2021-02-01 11:11:11'), (2, 
"222222222222222222222222222222222", 3, 4, '2022-02-01 11:11:11'), (3, 
"33333333333333333333333333333333", 3, 4, '2023-02-01 11:11:11');
+    """
+    sql "set show_hidden_columns = true"
+    qt_sql """
+        select length(__DORIS_ROW_STORE_COL__) from table_with_column_group 
order by k1;
+    """
+
+     sql "DROP TABLE IF EXISTS table_with_column_group1"
+    sql """
+        CREATE TABLE IF NOT EXISTS table_with_column_group1 (
+                `k1` int(11) NULL COMMENT "",
+                `v1` text NULL COMMENT "",
+                `v2` bigint NULL COMMENT "",
+                `v3` double NULL COMMENT "",
+                `v4` datetime NULL COMMENT ""
+              ) ENGINE=OLAP
+              UNIQUE KEY(`k1`)
+              DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+              PROPERTIES (
+              "replication_allocation" = "tag.location.default: 1",
+              "store_row_column" = "v2,v4",
+              "enable_unique_key_merge_on_write" = "true",
+              "light_schema_change" = "true",
+              "storage_format" = "V2"
+              )
+    """
+    sql """
+        insert into table_with_column_group1 values (1, 
"11111111111111111111111111111111111111", 3, 4.0, '2021-02-01 11:11:11'), (2, 
"222222222222222222222222222222222", 3, 4, '2022-02-01 11:11:11'), (3, 
"33333333333333333333333333333333", 3, 4, '2023-02-01 11:11:11');
+    """
+    sql "set show_hidden_columns = true"
+    qt_sql """
+        select length(__DORIS_ROW_STORE_COL__) from table_with_column_group1 
order by k1;
+    """
+
+    sql "DROP TABLE IF EXISTS table_with_column_group2"
+    sql """
+        CREATE TABLE IF NOT EXISTS table_with_column_group2 (
+                `k1` int(11) NULL COMMENT "",
+                `v1` text NULL COMMENT "",
+                `v2` bigint NULL COMMENT "",
+                `v3` double NULL COMMENT "",
+                `v4` datetime NULL COMMENT ""
+              ) ENGINE=OLAP
+              UNIQUE KEY(`k1`)
+              DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+              PROPERTIES (
+              "replication_allocation" = "tag.location.default: 1",
+              "store_row_column" = "v2",
+              "enable_unique_key_merge_on_write" = "true",
+              "light_schema_change" = "true",
+              "storage_format" = "V2"
+              )
+    """
+    sql """
+        insert into table_with_column_group2 values (1, 
"11111111111111111111111111111111111111", 3, 4.0, '2021-02-01 11:11:11'), (2, 
"222222222222222222222222222222222", 3, 4, '2022-02-01 11:11:11'), (3, 
"33333333333333333333333333333333", 3, 4, '2023-02-01 11:11:11');
+    """
+    qt_sql """
+        select length(__DORIS_ROW_STORE_COL__) from table_with_column_group2 
order by k1;
+    """
+
+    sql "DROP TABLE IF EXISTS table_with_column_group3"
+    sql """
+        CREATE TABLE IF NOT EXISTS table_with_column_group3 (
+                `k1` int(11) NULL COMMENT "",
+                `v1` text NULL COMMENT "",
+                `v2` bigint NULL COMMENT "",
+                `v3` double NULL COMMENT "",
+                `v4` datetime NULL COMMENT ""
+              ) ENGINE=OLAP
+              UNIQUE KEY(`k1`)
+              DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+              PROPERTIES (
+              "replication_allocation" = "tag.location.default: 1",
+              "store_row_column" = "v2,v4",
+              "enable_unique_key_merge_on_write" = "true",
+              "light_schema_change" = "true",
+              "storage_format" = "V2"
+              )
+    """
+     sql """
+        insert into table_with_column_group3 values (1, 
"11111111111111111111111111111111111111", 3, 4.0, '2021-02-01 11:11:11'), (2, 
"222222222222222222222222222222222", 3, 4, '2022-02-01 11:11:11'), (3, 
"33333333333333333333333333333333", 3, 4, '2023-02-01 11:11:11');
+    """
+    qt_sql """
+        select length(__DORIS_ROW_STORE_COL__) from table_with_column_group3 
order by k1;
+    """
+    sql "set show_hidden_columns = false"
+
+    sql """DROP TABLE IF EXISTS table_with_column_group_xxx"""
+    sql """
+            CREATE TABLE IF NOT EXISTS table_with_column_group_xxx (
+                `user_id` int NOT NULL COMMENT "用户id",
+                `date` DATE NOT NULL COMMENT "数据灌入日期时间",
+                `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间",
+                `datetimev2_1` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
+                `datetimev2_2` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
+                `city` VARCHAR(20) COMMENT "用户所在城市",
+                `age` SMALLINT COMMENT "用户年龄",
+                `sex` TINYINT COMMENT "用户性别",
+                `last_visit_date` DATETIME DEFAULT "1970-01-01 00:00:00" 
COMMENT "用户最后一次访问时间",
+                `last_update_date` DATETIME DEFAULT "1970-01-01 00:00:00" 
COMMENT "用户最后一次更新时间",
+                `datetime_val1` DATETIMEV2(3) DEFAULT "1970-01-01 
00:00:00.111" COMMENT "用户最后一次访问时间",
+                `datetime_val2` DATETIME(6) DEFAULT "1970-01-01 00:00:00" 
COMMENT "用户最后一次更新时间",
+                `last_visit_date_not_null` DATETIME NOT NULL DEFAULT 
"1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+                `cost` BIGINT DEFAULT "0" COMMENT "用户总消费",
+                `max_dwell_time` INT DEFAULT "0" COMMENT "用户最大停留时间",
+                `min_dwell_time` INT DEFAULT "99999" COMMENT "用户最小停留时间")
+            UNIQUE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, 
`datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`)
+            PROPERTIES ( "replication_num" = "1",
+                    "enable_unique_key_merge_on_write" = "true",
+                    "store_row_column" = 
"datetimev2_1,datetime_val1,datetime_val2,max_dwell_time"
+            );
+        """
+    sql """ INSERT INTO table_with_column_group_xxx values
+             (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.021', 
'2017-10-01 11:11:11.011', 'Beijing', 10, 1, '2020-01-01', '2020-01-01', 
'2017-10-01 11:11:11.170000', '2017-10-01 11:11:11.110111', '2020-01-01', 1, 
30, 20)
+            """
+    sql """ INSERT INTO table_with_column_group_xxx VALUES
+             (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.022', 
'2017-10-01 11:11:11.012', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', 
'2017-10-01 11:11:11.160000', '2017-10-01 11:11:11.100111', '2020-01-02', 1, 
31, 19)
+            """
+
+    sql """ INSERT INTO table_with_column_group_xxx VALUES
+         (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.023', 
'2017-10-01 11:11:11.013', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', 
'2017-10-01 11:11:11.150000', '2017-10-01 11:11:11.130111', '2020-01-02', 1, 
31, 21)
+        """
+
+    sql """ INSERT INTO table_with_column_group_xxx VALUES
+         (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.024', 
'2017-10-01 11:11:11.014', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', 
'2017-10-01 11:11:11.140000', '2017-10-01 11:11:11.120111', '2020-01-03', 1, 
32, 20)
+        """
+
+    sql """ INSERT INTO table_with_column_group_xxx VALUES
+         (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.025', 
'2017-10-01 11:11:11.015', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', 
'2017-10-01 11:11:11.100000', '2017-10-01 11:11:11.140111', '2020-01-03', 1, 
32, 22)
+        """
+
+    sql """ INSERT INTO table_with_column_group_xxx VALUES
+         (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.026', 
'2017-10-01 11:11:11.016', 'Beijing', 10, 1, '2020-01-04', '2020-01-04', 
'2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.150111', '2020-01-04', 1, 
33, 21)
+        """
+
+    sql """ INSERT INTO table_with_column_group_xxx VALUES
+         (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.027', 
'2017-10-01 11:11:11.017', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, 
'2020-01-05', 1, 34, 20)
+        """
+
+    sql """ INSERT INTO table_with_column_group_xxx VALUES
+         (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.028', 
'2017-10-01 11:11:11.018', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, 
'2020-01-05', 1, 34, 20)
+        """
+
+    // set server side prepared statement url
+    connect(user = user, password = password, url = prepare_url) {
+        def prep_sql = { sql_str, k ->
+            def stmt = prepareStatement sql_str
+            stmt.setInt(1, k)
+            assertEquals(stmt.class, 
com.mysql.cj.jdbc.ServerPreparedStatement);
+            qe_point_select stmt
+        }
+        def sql_str = "select v1, v2 from table_with_column_group where k1 = ?"
+        prep_sql sql_str, 1
+        prep_sql sql_str, 2 
+        prep_sql sql_str, 3
+        sql_str = "select v2 from table_with_column_group where k1 = ?"
+        prep_sql sql_str, 1
+        prep_sql sql_str, 2 
+        prep_sql sql_str, 3
+        sql_str = "select v1 from table_with_column_group where k1 = ?"
+        prep_sql sql_str, 3
+        sql_str = "select v2, v1 from table_with_column_group where k1 = ?"
+        prep_sql sql_str, 3
+
+    
+        sql_str = "select v2 from table_with_column_group where k1 = ?"
+        prep_sql sql_str, 1
+
+        sql_str = "select v2 from table_with_column_group2 where k1 = ?"
+        prep_sql sql_str, 1
+        prep_sql sql_str, 2
+        prep_sql sql_str, 3
+
+        sql_str = "select v4 from table_with_column_group3 where k1 = ?"
+        prep_sql sql_str, 1
+        prep_sql sql_str, 2
+        prep_sql sql_str, 3
+
+        def setPrepareStmtArgs = {stmt, user_id, date, datev2, datetimev2_1, 
datetimev2_2, city, age, sex ->
+            java.text.SimpleDateFormat formater = new 
java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SS");
+            stmt.setInt(1, user_id)
+            stmt.setDate(2, java.sql.Date.valueOf(date))
+            stmt.setDate(3, java.sql.Date.valueOf(datev2))
+            stmt.setTimestamp(4, new 
java.sql.Timestamp(formater.parse(datetimev2_1).getTime()))
+            stmt.setTimestamp(5, new 
java.sql.Timestamp(formater.parse(datetimev2_2).getTime()))
+            stmt.setString(6, city)
+            stmt.setInt(7, age)
+            stmt.setInt(8, sex)
+        }
+
+        def stmt = prepareStatement """ SELECT 
datetimev2_1,datetime_val1,datetime_val2,max_dwell_time FROM 
table_with_column_group_xxx t where user_id = ? and date = ? and datev2 = ? and 
datetimev2_1 = ? and datetimev2_2 = ? and city = ? and age = ? and sex = ?; """
+        setPrepareStmtArgs stmt, 1, '2017-10-01', '2017-10-01', '2017-10-01 
11:11:11.21', '2017-10-01 11:11:11.11', 'Beijing', 10, 1
+        qe_point_select stmt
+        setPrepareStmtArgs stmt, 1, '2017-10-01', '2017-10-01', '2017-10-01 
11:11:11.22', '2017-10-01 11:11:11.12', 'Beijing', 10, 1
+        qe_point_select stmt
+        setPrepareStmtArgs stmt, 2, '2017-10-01', '2017-10-01', '2017-10-01 
11:11:11.23', '2017-10-01 11:11:11.13', 'Beijing', 10, 1
+        qe_point_select stmt
+        setPrepareStmtArgs stmt, 2, '2017-10-01', '2017-10-01', '2017-10-01 
11:11:11.24', '2017-10-01 11:11:11.14', 'Beijing', 10, 1
+        qe_point_select stmt
+        setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 
11:11:11.25', '2017-10-01 11:11:11.15', 'Beijing', 10, 1
+        qe_point_select stmt
+        setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 
11:11:11.26', '2017-10-01 11:11:11.16', 'Beijing', 10, 1
+        qe_point_select stmt
+        setPrepareStmtArgs stmt, 3, '2017-10-01', '2017-10-01', '2017-10-01 
11:11:11.27', '2017-10-01 11:11:11.17', 'Beijing', 10, 1
+        qe_point_select stmt
+        setPrepareStmtArgs stmt, 4, '2017-10-01', '2017-10-01', '2017-10-01 
11:11:11.28', '2017-10-01 11:11:11.18', 'Beijing', 10, 1
+        qe_point_select stmt
+    }
+
+    test {
+        sql "select /*+ SET_VAR(enable_nereids_planner=false)*/ * from 
table_with_column_group where k1 = 1"
+        exception("Not support column store")
+    }
+    
+    sql "DROP TABLE IF EXISTS table_with_column_group4"
+    sql """
+         CREATE TABLE IF NOT EXISTS table_with_column_group4 (
+                 `k1` int(11) NULL COMMENT "",
+                 `v1` text NULL COMMENT "",
+                 `v2` bigint NULL COMMENT "",
+                 `v3` double NULL COMMENT "",
+                 `v4` datetime NULL COMMENT ""
+               ) ENGINE=OLAP
+               UNIQUE KEY(`k1`)
+               DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+               PROPERTIES (
+               "replication_allocation" = "tag.location.default: 1",
+               "enable_unique_key_merge_on_write" = "true",
+               "light_schema_change" = "true",
+               "store_row_column" = "v4",

Review Comment:
   suggest a new config name: row_store_columns



##########
fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java:
##########
@@ -1317,6 +1318,33 @@ private void createJob(String rawSql, long dbId, 
OlapTable olapTable, Map<Long,
         TInvertedIndexStorageFormat invertedIndexStorageFormat =
                 
PropertyAnalyzer.analyzeInvertedIndexStorageFormat(propertyMap);
 
+        // property store_row_column
+        // eg. "store_row_column" = "k1,k2"
+        List<String> rsColumns = Lists.newArrayList();
+        boolean storeRowColumn = false;
+        try {
+            storeRowColumn = 
PropertyAnalyzer.analyzeStoreRowColumn(propertyMap, rsColumns, true);
+            // check columns in column def
+            Optional<String> invalidColumn = rsColumns.stream()
+                    .filter(colName -> olapTable.getBaseColumn(colName) == 
null)
+                    .findFirst();
+            if (invalidColumn.isPresent()) {

Review Comment:
   Many duplicate code to check invalid column. It can be encapsulated inside 
analyzeStoreRowColumn since it can throw AnalysisException.



##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -277,27 +278,23 @@ void 
VerticalSegmentWriter::_serialize_block_to_row_column(vectorized::Block& bl
     }
     MonotonicStopWatch watch;
     watch.start();
-    // find row column id
     int row_column_id = 0;
     for (int i = 0; i < _tablet_schema->num_columns(); ++i) {
         if (_tablet_schema->column(i).is_row_store_column()) {
-            row_column_id = i;
-            break;
+            auto* row_store_column = static_cast<vectorized::ColumnString*>(
+                    
block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get());
+            row_store_column->clear();
+            vectorized::DataTypeSerDeSPtrs serdes =
+                    
vectorized::create_data_type_serdes(block.get_data_types());
+            std::unordered_set<int> group_cids_set(
+                    _tablet_schema->column(i).row_columns_cids().begin(),
+                    _tablet_schema->column(i).row_columns_cids().end());
+            vectorized::JsonbSerializeUtil::block_to_jsonb(
+                    *_tablet_schema, block, *row_store_column, 
_tablet_schema->num_columns(),
+                    serdes, group_cids_set);
         }

Review Comment:
   add break



##########
be/src/service/point_query_executor.cpp:
##########
@@ -80,6 +131,23 @@ Status Reusable::init(const TDescriptorTable& t_desc_tbl, 
const std::vector<TExp
         _col_uid_to_idx[slot->col_unique_id()] = i;
         _col_default_values[i] = slot->col_default_value();
     }
+
+    // Get the output slot descriptors
+    std::vector<SlotDescriptor*> output_slot_descs;
+    std::vector<vectorized::VExprSPtr> slot_exprs;
+    for (const auto& expr : _output_exprs_ctxs) {
+        extract_slot_ref(expr->root(), slot_exprs);
+    }
+    for (const auto& expr : slot_exprs) {
+        int column_id = static_cast<const 
vectorized::VSlotRef*>(expr.get())->column_id();
+        auto* slot_desc = tuple_desc()->slots()[column_id];
+        output_slot_descs.push_back(slot_desc);

Review Comment:
   You can change `extract_slot_ref` to `extract_slot_desc` and extract 
`output_slot_descs` in it directlly.



##########
be/src/service/point_query_executor.cpp:
##########
@@ -335,23 +407,68 @@ Status PointQueryExecutor::_lookup_row_data() {
                     _reusable->get_data_type_serdes(),
                     _row_read_ctxs[i]._cached_row_data.data().data,
                     _row_read_ctxs[i]._cached_row_data.data().size, 
_reusable->get_col_uid_to_idx(),
-                    *_result_block, _reusable->get_col_default_values());
+                    *_result_block, _reusable->get_col_default_values(),
+                    _reusable->include_col_uids());
             continue;

Review Comment:
   Do you need to consider missing columns?



##########
be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:
##########
@@ -264,7 +265,7 @@ void VerticalSegmentWriter::_maybe_invalid_row_cache(const 
std::string& key) con
     // Just invalid row cache for simplicity, since the rowset is not visible 
at present.
     // If we update/insert cache, if load failed rowset will not be visible 
but cached data
     // will be visible, and lead to inconsistency.
-    if (!config::disable_storage_row_cache && 
_tablet_schema->store_row_column() &&
+    if (!config::disable_storage_row_cache && 
_tablet_schema->has_full_row_store_column() &&

Review Comment:
   Why only for all columns row store?



##########
be/src/vec/jsonb/serialize.cpp:
##########
@@ -55,12 +57,15 @@ void JsonbSerializeUtil::block_to_jsonb(const TabletSchema& 
schema, const Block&
         for (int j = 0; j < num_cols; ++j) {
             const auto& column = block.get_by_position(j).column;
             const auto& tablet_column = *schema.columns()[j];
+            // ignore row store columns
             if (tablet_column.is_row_store_column()) {
-                // ignore dst row store column
                 continue;
             }
-            serdes[j]->write_one_cell_to_jsonb(*column, jsonb_writer, &pool,
-                                               tablet_column.unique_id(), i);
+            // TODO improve performance for checking column in group

Review Comment:
   Can you loop over group_cids instead of [0, num_cols) ?



##########
fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java:
##########
@@ -777,14 +781,25 @@ public static Boolean analyzeStoreRowColumn(Map<String, 
String> properties) thro
         if (null == value) {
             return false;
         }
-        properties.remove(PROPERTIES_STORE_ROW_COLUMN);
+        if (stripProperty) {
+            properties.remove(PROPERTIES_STORE_ROW_COLUMN);
+        }
         if (value.equalsIgnoreCase("true")) {
             return true;
         } else if (value.equalsIgnoreCase("false")) {
             return false;
         }
-        throw new AnalysisException(PROPERTIES_STORE_ROW_COLUMN
-                + " must be `true` or `false`");
+        if (Strings.isNullOrEmpty(value)) {
+            return false;
+        }
+        if (rsColumns != null) {
+            String[] rsColumnArr = value.split(COMMA_SEPARATOR);
+            rsColumns.addAll(Arrays.asList(rsColumnArr));
+            if (!rsColumns.isEmpty()) {
+                return true;
+            }
+        }
+        throw new AnalysisException(PROPERTIES_STORE_ROW_COLUMN + "must be 
`true`, `false` or set containing columns");

Review Comment:
   another solution:
   1.deprecate store_row_column but keep it for compatibility
   2.add a one config `row_store_columns`, value `__all__` for all columns, and 
value `c1,c2,c3` for some columns, value ``__all__`` for escaped single column 
with name `__all__` 



##########
be/src/olap/tablet_schema.cpp:
##########
@@ -652,7 +659,7 @@ void TabletColumn::add_sub_column(TabletColumn& sub_column) 
{
 }
 
 bool TabletColumn::is_row_store_column() const {
-    return _col_name == BeConsts::ROW_STORE_COL;
+    return _col_name.starts_with(BeConsts::ROW_STORE_COL);

Review Comment:
   back to ==



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -1223,6 +1223,18 @@ public void setBloomFilterInfo(Set<String> bfColumns, 
double bfFpp) {
         this.bfFpp = bfFpp;
     }
 
+    public List<Integer> getRowStoreColumnsUniqueIds(List<String> 
rsColumnNames) {

Review Comment:
   Now, `getRowStoreColumnsUniqueIds` is always used and only used with 
`getTableProperty().getCopiedRowStoreColumns()`, so 
`getRowStoreColumnsUniqueIds` can call `getCopiedRowStoreColumns` internally.



##########
be/src/olap/schema_change.cpp:
##########
@@ -1324,11 +1331,15 @@ Status SchemaChangeJob::parse_request(const 
SchemaChangeParams& sc_params,
         } else if (column_mapping->ref_column >= 0) {
             const auto& column_new = new_tablet_schema->column(i);
             const auto& column_old = 
base_tablet_schema->column(column_mapping->ref_column);
-            // index changed
+            // check index changed or row store columns changed
             if (column_new.is_bf_column() != column_old.is_bf_column() ||
                 column_new.has_bitmap_index() != column_old.has_bitmap_index() 
||
                 new_tablet_schema->has_inverted_index(column_new) !=
-                        base_tablet_schema->has_inverted_index(column_old)) {
+                        base_tablet_schema->has_inverted_index(column_old) ||
+                !std::equal(column_new.row_columns_cids().begin(),

Review Comment:
   Does std::equal check element order?



##########
be/src/common/config.h:
##########
@@ -1370,6 +1370,9 @@ DECLARE_Bool(enable_jvm_monitor);
 // Skip loading stale rowset meta when initializing `TabletMeta` from protobuf
 DECLARE_mBool(skip_loading_stale_rowset_meta);
 
+// Whether to enable access column store in short circuit quries, otherwise 
report error
+DECLARE_mBool(enable_short_circuit_query_access_column_store);

Review Comment:
   Is it necessary? If yes, session variable is better.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [Feature](Row store) support column group with store row format for partial columns of table [doris]

Reply via email to