chaoyli closed pull request #432: Support add new key column for 
LinkedSchemaChange
URL: https://github.com/apache/incubator-doris/pull/432
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/be/src/olap/column_mapping.h b/be/src/olap/column_mapping.h
new file mode 100644
index 00000000..b5ec1876
--- /dev/null
+++ b/be/src/olap/column_mapping.h
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DORIS_BE_SRC_OLAP_COLUMN_MAPPING_H
+#define DORIS_BE_SRC_OLAP_COLUMN_MAPPING_H
+
+#include "olap/wrapper_field.h"
+
+namespace doris {
+
+struct ColumnMapping {
+    ColumnMapping() : ref_column(-1), default_value(NULL) {}
+    virtual ~ColumnMapping() {}
+
+    // <0: use default value
+    // >=0: use origin column
+    int32_t ref_column;
+    // normally for default value. stores values for filters
+    WrapperField* default_value;
+};
+
+}  // namespace doris
+#endif // DORIS_BE_SRC_COLUMN_MAPPING_H
\ No newline at end of file
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index ad36cdba..a5c2a2c1 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -644,9 +644,11 @@ bool RowBlockMerger::_pop_heap() {
 }
 
 LinkedSchemaChange::LinkedSchemaChange(
-        OLAPTablePtr base_olap_table, OLAPTablePtr new_olap_table) :
+        OLAPTablePtr base_olap_table, OLAPTablePtr new_olap_table,
+        const RowBlockChanger& row_block_changer) :
         _base_olap_table(base_olap_table),
-        _new_olap_table(new_olap_table) {}
+        _new_olap_table(new_olap_table),
+        _row_block_changer(row_block_changer) {}
 
 SchemaChangeDirectly::SchemaChangeDirectly(
         OLAPTablePtr olap_table,
@@ -709,7 +711,8 @@ bool LinkedSchemaChange::process(ColumnData* olap_data, 
SegmentGroup* new_segmen
 
     new_segment_group->set_empty(olap_data->empty());
     
new_segment_group->set_num_segments(olap_data->segment_group()->num_segments());
-    
new_segment_group->add_column_statistics_for_linked_schema_change(olap_data->segment_group()->get_column_statistics());
+    
new_segment_group->add_column_statistics_for_linked_schema_change(olap_data->segment_group()->get_column_statistics(),
+                                                                      
_row_block_changer.get__schema_mapping() );
 
     if (OLAP_SUCCESS != new_segment_group->load()) {
         OLAP_LOG_WARNING("fail to reload index. [table='%s' version='%d-%d']",
@@ -1780,7 +1783,8 @@ OLAPStatus SchemaChangeHandler::schema_version_convert(
         LOG(INFO) << "doing linked schema change.";
         sc_procedure = new(nothrow) LinkedSchemaChange(
                                 src_olap_table,
-                                dest_olap_table);
+                                dest_olap_table,
+                                rb_changer);
     }
 
     if (NULL == sc_procedure) {
@@ -1998,7 +2002,8 @@ OLAPStatus 
SchemaChangeHandler::_alter_table(SchemaChangeParams* sc_params) {
         LOG(INFO) << "doing linked schema change.";
         sc_procedure = new(nothrow) LinkedSchemaChange(
                                 sc_params->ref_olap_table,
-                                sc_params->new_olap_table);
+                                sc_params->new_olap_table,
+                                rb_changer);
     }
 
     if (NULL == sc_procedure) {
diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h
index 84cafa6f..87b15fb7 100644
--- a/be/src/olap/schema_change.h
+++ b/be/src/olap/schema_change.h
@@ -41,17 +41,6 @@ class RowCursor;
 // defined in 'writer.h'
 class ColumnDataWriter;
 
-struct ColumnMapping {
-    ColumnMapping() : ref_column(-1), default_value(NULL) {}
-    virtual ~ColumnMapping() {}
-
-    // <0: use default value
-    // >=0: use origin column
-    int32_t ref_column;
-    // normally for default value. stores values for filters
-    WrapperField* default_value;
-};
-
 class RowBlockChanger {
 public:
     typedef std::vector<ColumnMapping> SchemaMapping;
@@ -66,6 +55,10 @@ class RowBlockChanger {
     virtual ~RowBlockChanger();
 
     ColumnMapping* get_mutable_column_mapping(size_t column_index);
+
+    SchemaMapping get__schema_mapping() const {
+        return _schema_mapping;
+    }
     
     bool change_row_block(
             const DataFileType df_type,
@@ -192,13 +185,15 @@ class LinkedSchemaChange : public SchemaChange {
 public:
     explicit LinkedSchemaChange(
                 OLAPTablePtr base_olap_table, 
-                OLAPTablePtr new_olap_table);
+                OLAPTablePtr new_olap_table,
+                const RowBlockChanger& row_block_changer);
     ~LinkedSchemaChange() {}
 
     bool process(ColumnData* olap_data, SegmentGroup* new_segment_group);
 private:
     OLAPTablePtr _base_olap_table;
     OLAPTablePtr _new_olap_table;
+    const RowBlockChanger& _row_block_changer;
     DISALLOW_COPY_AND_ASSIGN(LinkedSchemaChange);
 };
 
diff --git a/be/src/olap/segment_group.cpp b/be/src/olap/segment_group.cpp
index a5c24c0f..fdabc9bc 100644
--- a/be/src/olap/segment_group.cpp
+++ b/be/src/olap/segment_group.cpp
@@ -27,7 +27,7 @@
 #include "olap/row_block.h"
 #include "olap/row_cursor.h"
 #include "olap/utils.h"
-#include "olap/wrapper_field.h"
+#include "olap/column_mapping.h"
 
 using std::ifstream;
 using std::string;
@@ -198,24 +198,38 @@ void SegmentGroup::delete_all_files() {
     }
 }
 
+
 OLAPStatus SegmentGroup::add_column_statistics_for_linked_schema_change(
-        const std::vector<std::pair<WrapperField*, WrapperField*>>& 
column_statistic_fields) {
+        const std::vector<std::pair<WrapperField*, WrapperField*>>& 
column_statistic_fields,
+        const SchemaMapping& schema_mapping) {
     //When add rollup table, the base table index maybe empty
     if (column_statistic_fields.size() == 0) {
         return OLAP_SUCCESS;
     }
 
-    //Should use _table->num_key_fields(), not column_statistic_fields.size()
-    //as rollup table num_key_fields will less than base table 
column_statistic_fields.size().
-    //For LinkedSchemaChange, the rollup table keys order is the same as base 
table
+    //1 for LinkedSchemaChange, the rollup table keys order is the same as 
base table
+    //2 when user add a new key column to base table, _table->num_key_fields() 
size will
+    // greater than _column_statistics size
+    int num_new_keys = 0;
     for (size_t i = 0; i < _table->num_key_fields(); ++i) {
-        WrapperField* first = WrapperField::create(_table->tablet_schema()[i]);
+        const FieldInfo& column_schema = _table->tablet_schema()[i];
+
+        WrapperField* first = WrapperField::create(column_schema);
         DCHECK(first != NULL) << "failed to allocate memory for field: " << i;
-        first->copy(column_statistic_fields[i].first);
 
-        WrapperField* second = 
WrapperField::create(_table->tablet_schema()[i]);
+        WrapperField* second = WrapperField::create(column_schema);
         DCHECK(second != NULL) << "failed to allocate memory for field: " << i;
-        second->copy(column_statistic_fields[i].second);
+
+        //for new key column, use default value to fill into column_statistics
+        if (schema_mapping[i].ref_column == -1) {
+            num_new_keys++;
+
+            first->copy(schema_mapping[i].default_value);
+            second->copy(schema_mapping[i].default_value);
+        } else {
+            first->copy(column_statistic_fields[i - num_new_keys].first);
+            second->copy(column_statistic_fields[i - num_new_keys].second);
+        }
 
         _column_statistics.push_back(std::make_pair(first, second));
     }
diff --git a/be/src/olap/segment_group.h b/be/src/olap/segment_group.h
index 19d17af4..ef9f6414 100644
--- a/be/src/olap/segment_group.h
+++ b/be/src/olap/segment_group.h
@@ -36,6 +36,7 @@
 #include "olap/row_cursor.h"
 #include "olap/olap_index.h"
 #include "olap/utils.h"
+#include "olap/column_mapping.h"
 
 namespace doris {
 
@@ -47,6 +48,8 @@ namespace doris {
 class SegmentGroup {
     friend class MemIndex;
 public:
+    typedef std::vector<ColumnMapping> SchemaMapping;
+
     SegmentGroup(OLAPTable* table, Version version, VersionHash version_hash,
               bool delete_flag, int segment_group_id, int32_t num_segments);
 
@@ -66,7 +69,8 @@ class SegmentGroup {
     }
 
     OLAPStatus add_column_statistics_for_linked_schema_change(
-        const std::vector<std::pair<WrapperField*, WrapperField*>>& 
column_statistic_fields);
+        const std::vector<std::pair<WrapperField*, WrapperField*>>& 
column_statistic_fields,
+        const SchemaMapping& schema_mapping);
 
     OLAPStatus add_column_statistics(
         const std::vector<std::pair<WrapperField*, WrapperField*>>& 
column_statistic_fields);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to