eldenmoon commented on code in PR #47541:
URL: https://github.com/apache/doris/pull/47541#discussion_r1944154431


##########
be/src/vec/columns/column_object.h:
##########
@@ -268,18 +268,26 @@ class ColumnObject final : public COWHelper<IColumn, 
ColumnObject> {
     WrappedPtr serialized_sparse_column = ColumnMap::create(
             ColumnString::create(), ColumnString::create(), 
ColumnArray::ColumnOffsets::create());
 
+    int32_t _max_subcolumns_count = 0;
+
 public:
     static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
 
+    // Nullable(Array(Nullable(Object)))
+    const DataTypePtr NESTED_TYPE = std::make_shared<DataTypeNullable>(

Review Comment:
   use static variable



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -209,6 +209,14 @@ Status Segment::_open() {
     // 0.01 comes from PrimaryKeyIndexBuilder::init
     _meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8;
 
+    for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
+        const auto& column = _tablet_schema->column(ordinal);
+        if (column.is_variant_type()) {
+            _variant_max_subcolumns_count = 
column.variant_max_subcolumns_count();

Review Comment:
   _variant_max_subcolumns_count is useless in segment, we should ignore it, 
and use 0 should be ok, if the type is not same, cast should be performed



##########
be/src/vec/columns/column_object.h:
##########
@@ -342,10 +350,15 @@ class ColumnObject final : public COWHelper<IColumn, 
ColumnObject> {
 
     void incr_num_rows(size_t n) { num_rows += n; }
 
-    void set_num_rows(size_t n);
+    // Sets the number of rows and aligns all subcolumns and the serialized 
sparse column accordingly.
+    // During serialization and reading, each subcolumn is processed 
separately and then added to the column object,
+    // ultimately aligning all columns through this method.
+    void set_num_rows_and_align(size_t n);

Review Comment:
   i think we should not do align, align should be done in some where else, it 
may lead to data polluted when encounter bug



##########
be/src/vec/data_types/data_type_object.h:
##########
@@ -50,21 +49,20 @@ class IColumn;
 namespace doris::vectorized {
 class DataTypeObject : public IDataType {
 private:
-    String schema_format;
-    bool is_nullable;
+    int32_t _max_subcolumns_count = -6;
 
 public:
-    DataTypeObject(const String& schema_format_ = "json", bool is_nullable_ = 
true);
+    DataTypeObject(int32_t max_subcolumns_count);
     const char* get_family_name() const override { return "Variant"; }
     TypeIndex get_type_id() const override { return TypeIndex::VARIANT; }
     TypeDescriptor get_type_as_type_descriptor() const override {
-        return TypeDescriptor(TYPE_VARIANT);
+        return TypeDescriptor(TYPE_VARIANT, _max_subcolumns_count);
     }
 
     doris::FieldType get_storage_field_type() const override {
         return doris::FieldType::OLAP_FIELD_TYPE_VARIANT;
     }
-    MutableColumnPtr create_column() const override { return 
ColumnObject::create(is_nullable); }
+    MutableColumnPtr create_column() const override;
     bool equals(const IDataType& rhs) const override;

Review Comment:
   this function should be modified according to `_max_subcolumns_count`



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java:
##########
@@ -154,6 +154,9 @@ public class Column implements GsonPostProcessable {
     @SerializedName(value = "gctt")
     private Set<String> generatedColumnsThatReferToThis = new HashSet<>();
 
+    @SerializedName(value = "varaintMaxSubcolumnsCount")
+    private int variantMaxSubcolumnsCount = -21;

Review Comment:
   why -21, is it necessary add this field in Column, since it's already in 
type?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -2527,6 +2527,25 @@ public Boolean variantEnableFlattenNested() {
         return false;
     }
 
+    public void setVariantMaxSubcolumnsCount(int maxSubcoumnsCount) {
+        
getOrCreatTableProperty().setVariantMaxSubcolumnsCount(maxSubcoumnsCount);
+        List<Column> columns = getBaseSchema(true);
+        for (Column column : columns) {
+            Type type = column.getType();
+            if (type.isVariantType()) {
+                ScalarType scType = (ScalarType) type;
+                if (scType.getVariantMaxSubcolumnsCount() < 0) {
+                    LOG.info("error count: ", 
scType.getVariantMaxSubcolumnsCount());

Review Comment:
   throw exception



##########
gensrc/thrift/AgentService.thrift:
##########
@@ -50,6 +50,7 @@ struct TTabletSchema {
     21: optional i64 row_store_page_size = 16384
     22: optional bool variant_enable_flatten_nested = false 
     23: optional i64 storage_page_size = 65536
+    24: optional i32 variant_max_subcolumns_count = -17

Review Comment:
   -17



##########
be/src/vec/data_types/serde/data_type_object_serde.cpp:
##########
@@ -48,23 +48,24 @@ Status DataTypeObjectSerDe::_write_column_to_mysql(const 
IColumn& column,
                                                    int64_t row_idx, bool 
col_const,
                                                    const FormatOptions& 
options) const {
     const auto& variant = assert_cast<const ColumnObject&>(column);
-    if (variant.is_scalar_variant()) {
-        // Serialize scalar types, like int, string, array, faster path
-        const auto& root = variant.get_subcolumn({});
-        
RETURN_IF_ERROR(root->get_least_common_type_serde()->write_column_to_mysql(
-                root->get_finalized_column(), row_buffer, row_idx, col_const, 
options));
+    // if (variant.is_scalar_variant()) {

Review Comment:
   remove useless code



##########
be/src/vec/functions/function_fake.cpp:
##########
@@ -187,8 +187,6 @@ void register_function_fake(SimpleFunctionFactory& factory) 
{
     register_table_function_with_impl<FunctionPoseExplode<false>>(factory, 
"posexplode");
     register_table_function_with_impl<FunctionPoseExplode<true>>(factory, 
"posexplode",
                                                                  
COMBINATOR_SUFFIX_OUTER);
-    register_table_function_expand_outer_default<DataTypeObject, 
false>(factory,

Review Comment:
   why remove



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -594,8 +602,10 @@ vectorized::DataTypePtr Segment::get_data_type_of(const 
ColumnIdentifier& identi
         }
         // it contains children or column missing in storage, so treat it as 
variant
         return identifier.is_nullable
-                       ? 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>())
-                       : std::make_shared<vectorized::DataTypeObject>();
+                       ? 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>(
+                                 _variant_max_subcolumns_count))

Review Comment:
   use default value instead



##########
be/src/olap/rowset/segment_v2/segment.h:
##########
@@ -288,6 +288,7 @@ class Segment : public 
std::enable_shared_from_this<Segment>, public MetadataAdd
     InvertedIndexFileInfo _idx_file_info;
 
     int _be_exec_version = BeExecVersionManager::get_newest_version();
+    int32_t _variant_max_subcolumns_count = 0;

Review Comment:
   remove



##########
fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java:
##########
@@ -652,6 +654,14 @@ private List<Column> 
checkAndPrepareMaterializedView(CreateMaterializedViewStmt
         if (LOG.isDebugEnabled()) {
             LOG.debug("lightSchemaChange:{}, newMVColumns:{}", 
olapTable.getEnableLightSchemaChange(), newMVColumns);
         }
+        for (Column column : newMVColumns) {

Review Comment:
   add comment



##########
be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp:
##########
@@ -502,6 +507,7 @@ void 
VariantColumnWriterImpl::_init_column_meta(ColumnMetaPB* meta, uint32_t col
     for (uint32_t i = 0; i < column.get_subtype_count(); ++i) {
         _init_column_meta(meta->add_children_columns(), column_id, 
column.get_sub_column(i));
     }
+    
meta->set_variant_max_subcolumns_count(column.variant_max_subcolumns_count());

Review Comment:
   maybe persit it is useless?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java:
##########
@@ -821,19 +828,20 @@ public OlapFile.ColumnPB toPb(Set<String> bfColumns, 
List<Index> indexes) throws
 
         if (this.type.isArrayType()) {
             Column child = this.getChildren().get(0);
-            builder.addChildrenColumns(child.toPb(Sets.newHashSet(), 
Lists.newArrayList()));
+            builder.addChildrenColumns(child.toPb(Sets.newHashSet(), 
Lists.newArrayList(), variantMaxSubcolumnsCount));
         } else if (this.type.isMapType()) {
             Column k = this.getChildren().get(0);
-            builder.addChildrenColumns(k.toPb(Sets.newHashSet(), 
Lists.newArrayList()));
+            builder.addChildrenColumns(k.toPb(Sets.newHashSet(), 
Lists.newArrayList(), variantMaxSubcolumnsCount));
             Column v = this.getChildren().get(1);
-            builder.addChildrenColumns(v.toPb(Sets.newHashSet(), 
Lists.newArrayList()));
+            builder.addChildrenColumns(v.toPb(Sets.newHashSet(), 
Lists.newArrayList(), variantMaxSubcolumnsCount));
         } else if (this.type.isStructType()) {
             List<Column> childrenColumns = this.getChildren();
             for (Column c : childrenColumns) {
-                builder.addChildrenColumns(c.toPb(Sets.newHashSet(), 
Lists.newArrayList()));
+                builder.addChildrenColumns(c.toPb(Sets.newHashSet(), 
Lists.newArrayList(), variantMaxSubcolumnsCount));
             }
         }
 
+        builder.setVariantMaxSubcolumnsCount(variantMaxSubcolumnsCount);

Review Comment:
   only set for variant type



##########
fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java:
##########
@@ -727,11 +730,17 @@ public void toThrift(TTypeDesc container) {
             case CHAR:
             case HLL:
             case STRING:
-            case JSONB:
-            case VARIANT: {
+            case JSONB: {
                 scalarType.setLen(getLength());
                 break;
             }
+            case VARIANT: {
+                
scalarType.setVariantMaxSubcolumnsCount(variantMaxSubcolumnsCount);
+                if (variantMaxSubcolumnsCount == -33) {

Review Comment:
   variantMaxSubcolumnsCount == -33  ? debug code? should remove?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeVariantArray.java:
##########
@@ -36,7 +36,7 @@
 public class ExplodeVariantArray extends TableGeneratingFunction implements 
UnaryExpression, AlwaysNullable {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
-            FunctionSignature.ret(new VariantType()).args(new VariantType())
+            FunctionSignature.ret(new VariantType(-12)).args(new 
VariantType(-12))

Review Comment:
   -12?



##########
be/src/vec/data_types/serde/data_type_object_serde.cpp:
##########
@@ -48,23 +48,24 @@ Status DataTypeObjectSerDe::_write_column_to_mysql(const 
IColumn& column,
                                                    int64_t row_idx, bool 
col_const,
                                                    const FormatOptions& 
options) const {
     const auto& variant = assert_cast<const ColumnObject&>(column);
-    if (variant.is_scalar_variant()) {
-        // Serialize scalar types, like int, string, array, faster path
-        const auto& root = variant.get_subcolumn({});
-        
RETURN_IF_ERROR(root->get_least_common_type_serde()->write_column_to_mysql(
-                root->get_finalized_column(), row_buffer, row_idx, col_const, 
options));
+    // if (variant.is_scalar_variant()) {
+    //     // Serialize scalar types, like int, string, array, faster path
+    //     const auto& root = variant.get_subcolumn({});
+    //     
RETURN_IF_ERROR(root->get_least_common_type_serde()->write_column_to_mysql(
+    //             root->get_finalized_column(), row_buffer, row_idx, 
col_const, options));
+    // } else {
+
+    // }
+    // Serialize hierarchy types to json format
+    std::string buffer;
+    bool is_null = false;
+    if (!variant.serialize_one_row_to_string(row_idx, &buffer)) {
+        return Status::InternalError("Invalid json format");
+    }
+    if (is_null) {

Review Comment:
   @is_null not set, maybe remove



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java:
##########
@@ -31,13 +32,25 @@
 @Developing
 public class VariantType extends PrimitiveType {
 
-    public static final VariantType INSTANCE = new VariantType();
+    public static final VariantType INSTANCE = new VariantType(1000000);

Review Comment:
   why 1000000



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java:
##########
@@ -45,8 +45,8 @@ public class ElementAt extends ScalarFunction
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             FunctionSignature.ret(new FollowToAnyDataType(0))
                     .args(ArrayType.of(new AnyDataType(0)), 
BigIntType.INSTANCE),
-            FunctionSignature.ret(new VariantType())
-                    .args(new VariantType(), VarcharType.SYSTEM_DEFAULT),
+            FunctionSignature.ret(new VariantType(-36))
+                    .args(new VariantType(-35), VarcharType.SYSTEM_DEFAULT),

Review Comment:
   -36?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelper.java:
##########
@@ -428,6 +429,34 @@ private static FunctionSignature 
defaultDateTimeV2PrecisionPromotion(
         return signature;
     }
 
+    /** dynamicComputeVariantArgs */
+    public static FunctionSignature dynamicComputeVariantArgs(

Review Comment:
   why modify this, add comment



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java:
##########
@@ -31,13 +32,25 @@
 @Developing
 public class VariantType extends PrimitiveType {
 
-    public static final VariantType INSTANCE = new VariantType();
+    public static final VariantType INSTANCE = new VariantType(1000000);
 
     public static final int WIDTH = 24;
 
+    private int variantMaxSubcolumnsCount = -10;

Review Comment:
   -10?



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -209,6 +209,14 @@ Status Segment::_open() {
     // 0.01 comes from PrimaryKeyIndexBuilder::init
     _meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8;
 
+    for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
+        const auto& column = _tablet_schema->column(ordinal);
+        if (column.is_variant_type()) {
+            _variant_max_subcolumns_count = 
column.variant_max_subcolumns_count();

Review Comment:
   see _convert_to_expected_type



##########
fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java:
##########
@@ -131,6 +131,7 @@ public class CreateReplicaTask extends AgentTask {
     private List<Integer> rowStoreColumnUniqueIds;
 
     private boolean variantEnableFlattenNested;
+    private int variantMaxSubcolumnsCount = -19;

Review Comment:
   -19



##########
gensrc/thrift/Descriptors.thrift:
##########
@@ -43,6 +43,7 @@ struct TColumn {
     18: optional bool is_auto_increment = false;
     19: optional i32 cluster_key_id = -1
     20: optional i32 be_exec_version = -1
+    21: optional i32 variant_max_subcolumns_count = -15

Review Comment:
   -15



##########
fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java:
##########
@@ -124,6 +124,9 @@ public long getByteSize() {
     @SerializedName(value = "lenStr")
     private String lenStr;
 
+    @SerializedName(value = "variantMaxSubcolumnsCount")
+    private int variantMaxSubcolumnsCount = -9;

Review Comment:
   why -9



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to