[doris] branch branch-2.0 updated: [Fix](multi-catalog) Fix load string dict issue for transactional hive tables. (#23306) (#23524)

kxiao Sat, 26 Aug 2023 05:14:24 -0700

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new e011dbda46 [Fix](multi-catalog) Fix load string dict issue for 
transactional hive tables. (#23306) (#23524)
e011dbda46 is described below

commit e011dbda46a38a00b637518e2c6381f569dcf443
Author: Mingyu Chen <[email protected]>
AuthorDate: Sat Aug 26 20:14:12 2023 +0800

    [Fix](multi-catalog) Fix load string dict issue for transactional hive 
tables. (#23306) (#23524)
    
    Fix load string dict issue for transactional hive tables. The column name 
need to pass 'row.column_name'.
    
    apache/doris-thirdparty#112
    
    Co-authored-by: Qi Chen <[email protected]>
---
 be/src/apache-orc                                       |  2 +-
 be/src/vec/exec/format/orc/vorc_reader.cpp              | 12 +++++++-----
 be/src/vec/exec/format/orc/vorc_reader.h                |  4 ++--
 regression-test/pipeline/p0/conf/regression-conf.groovy |  2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/be/src/apache-orc b/be/src/apache-orc
index a4e67d732e..78bbe2e41f 160000
--- a/be/src/apache-orc
+++ b/be/src/apache-orc
@@ -1 +1 @@
-Subproject commit a4e67d732e9acf3acb45e85c4cfe84d630e71ec1
+Subproject commit 78bbe2e41f2140b803855d683fae5e1a4b734a37
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index d6b258d39a..fc68a57d21 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -317,22 +317,24 @@ Status OrcReader::_init_read_columns() {
             _missing_cols.emplace_back(col_name);
         } else {
             int pos = std::distance(orc_cols_lower_case.begin(), iter);
+            std::string read_col;
             if (_is_acid && i < _column_names->size() - 
TransactionalHive::READ_PARAMS.size()) {
-                auto read_col = fmt::format(
+                read_col = fmt::format(
                         "{}.{}",
                         
TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET],
                         orc_cols[pos]);
                 _read_cols.emplace_back(read_col);
             } else {
-                _read_cols.emplace_back(orc_cols[pos]);
+                read_col = orc_cols[pos];
+                _read_cols.emplace_back(read_col);
             }
             _read_cols_lower_case.emplace_back(col_name);
             // For hive engine, store the orc column name to schema column 
name map.
             // This is for Hive 1.x orc file with internal column name _col0, 
_col1...
             if (_is_hive) {
-                _file_col_to_schema_col[orc_cols[pos]] = col_name;
+                _removed_acid_file_col_name_to_schema_col[orc_cols[pos]] = 
col_name;
             }
-            _col_name_to_file_col_name[col_name] = orc_cols[pos];
+            _col_name_to_file_col_name[col_name] = read_col;
         }
     }
     return Status::OK();
@@ -811,7 +813,7 @@ Status OrcReader::_init_select_types(const orc::Type& type, 
int idx) {
         // For hive engine, translate the column name in orc file to schema 
column name.
         // This is for Hive 1.x which use internal column name _col0, _col1...
         if (_is_hive) {
-            name = _file_col_to_schema_col[type.getFieldName(i)];
+            name = 
_removed_acid_file_col_name_to_schema_col[type.getFieldName(i)];
         } else {
             name = _get_field_name_lower_case(&type, i);
         }
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index 9b5c1fe576..4f11eb4de1 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -510,10 +510,10 @@ private:
     std::list<std::string> _read_cols_lower_case;
     std::list<std::string> _missing_cols;
     std::unordered_map<std::string, int> _colname_to_idx;
-    // Column name in Orc file to column name to schema.
+    // Column name in Orc file after removed acid(remove row.) to column name 
to schema.
     // This is used for Hive 1.x which use internal column name in Orc file.
     // _col0, _col1...
-    std::unordered_map<std::string, std::string> _file_col_to_schema_col;
+    std::unordered_map<std::string, std::string> 
_removed_acid_file_col_name_to_schema_col;
     // Flag for hive engine. True if the external table engine is Hive.
     bool _is_hive = false;
     std::unordered_map<std::string, std::string> _col_name_to_file_col_name;
diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy 
b/regression-test/pipeline/p0/conf/regression-conf.groovy
index ad7edda61c..3ff7428dab 100644
--- a/regression-test/pipeline/p0/conf/regression-conf.groovy
+++ b/regression-test/pipeline/p0/conf/regression-conf.groovy
@@ -54,7 +54,7 @@ testDirectories = ""
 // this groups will not be executed
 excludeGroups = ""
 // this suites will not be executed
-excludeSuites = 
"test_pk_uk_index_change,test_pk_uk_case,window_function,test_profile,test_broker_load_p2,test_spark_load,test_analyze_stats_p1,test_refresh_mtmv,test_bitmap_filter,test_export_parquet,test_doris_jdbc_catalog,test_transactional_hive,test_hdfs_tvf"
+excludeSuites = 
"test_pk_uk_index_change,test_pk_uk_case,window_function,test_profile,test_broker_load_p2,test_spark_load,test_analyze_stats_p1,test_refresh_mtmv,test_bitmap_filter,test_export_parquet,test_doris_jdbc_catalog,test_hdfs_tvf"
 
 // this directories will not be executed
 excludeDirectories = 
"nereids_tpcds_shape_sf100_p0,nereids_tpch_shape_sf1000_p0,nereids_tpch_shape_sf500_p0,workload_manager_p1"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch branch-2.0 updated: [Fix](multi-catalog) Fix load string dict issue for transactional hive tables. (#23306) (#23524)

Reply via email to