This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new e011dbda46 [Fix](multi-catalog) Fix load string dict issue for
transactional hive tables. (#23306) (#23524)
e011dbda46 is described below
commit e011dbda46a38a00b637518e2c6381f569dcf443
Author: Mingyu Chen <[email protected]>
AuthorDate: Sat Aug 26 20:14:12 2023 +0800
[Fix](multi-catalog) Fix load string dict issue for transactional hive
tables. (#23306) (#23524)
Fix load string dict issue for transactional hive tables. The column name
need to pass 'row.column_name'.
apache/doris-thirdparty#112
Co-authored-by: Qi Chen <[email protected]>
---
be/src/apache-orc | 2 +-
be/src/vec/exec/format/orc/vorc_reader.cpp | 12 +++++++-----
be/src/vec/exec/format/orc/vorc_reader.h | 4 ++--
regression-test/pipeline/p0/conf/regression-conf.groovy | 2 +-
4 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/be/src/apache-orc b/be/src/apache-orc
index a4e67d732e..78bbe2e41f 160000
--- a/be/src/apache-orc
+++ b/be/src/apache-orc
@@ -1 +1 @@
-Subproject commit a4e67d732e9acf3acb45e85c4cfe84d630e71ec1
+Subproject commit 78bbe2e41f2140b803855d683fae5e1a4b734a37
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index d6b258d39a..fc68a57d21 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -317,22 +317,24 @@ Status OrcReader::_init_read_columns() {
_missing_cols.emplace_back(col_name);
} else {
int pos = std::distance(orc_cols_lower_case.begin(), iter);
+ std::string read_col;
if (_is_acid && i < _column_names->size() -
TransactionalHive::READ_PARAMS.size()) {
- auto read_col = fmt::format(
+ read_col = fmt::format(
"{}.{}",
TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET],
orc_cols[pos]);
_read_cols.emplace_back(read_col);
} else {
- _read_cols.emplace_back(orc_cols[pos]);
+ read_col = orc_cols[pos];
+ _read_cols.emplace_back(read_col);
}
_read_cols_lower_case.emplace_back(col_name);
// For hive engine, store the orc column name to schema column
name map.
// This is for Hive 1.x orc file with internal column name _col0,
_col1...
if (_is_hive) {
- _file_col_to_schema_col[orc_cols[pos]] = col_name;
+ _removed_acid_file_col_name_to_schema_col[orc_cols[pos]] =
col_name;
}
- _col_name_to_file_col_name[col_name] = orc_cols[pos];
+ _col_name_to_file_col_name[col_name] = read_col;
}
}
return Status::OK();
@@ -811,7 +813,7 @@ Status OrcReader::_init_select_types(const orc::Type& type,
int idx) {
// For hive engine, translate the column name in orc file to schema
column name.
// This is for Hive 1.x which use internal column name _col0, _col1...
if (_is_hive) {
- name = _file_col_to_schema_col[type.getFieldName(i)];
+ name =
_removed_acid_file_col_name_to_schema_col[type.getFieldName(i)];
} else {
name = _get_field_name_lower_case(&type, i);
}
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h
b/be/src/vec/exec/format/orc/vorc_reader.h
index 9b5c1fe576..4f11eb4de1 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -510,10 +510,10 @@ private:
std::list<std::string> _read_cols_lower_case;
std::list<std::string> _missing_cols;
std::unordered_map<std::string, int> _colname_to_idx;
- // Column name in Orc file to column name to schema.
+ // Column name in Orc file after removed acid(remove row.) to column name
to schema.
// This is used for Hive 1.x which use internal column name in Orc file.
// _col0, _col1...
- std::unordered_map<std::string, std::string> _file_col_to_schema_col;
+ std::unordered_map<std::string, std::string>
_removed_acid_file_col_name_to_schema_col;
// Flag for hive engine. True if the external table engine is Hive.
bool _is_hive = false;
std::unordered_map<std::string, std::string> _col_name_to_file_col_name;
diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy
b/regression-test/pipeline/p0/conf/regression-conf.groovy
index ad7edda61c..3ff7428dab 100644
--- a/regression-test/pipeline/p0/conf/regression-conf.groovy
+++ b/regression-test/pipeline/p0/conf/regression-conf.groovy
@@ -54,7 +54,7 @@ testDirectories = ""
// this groups will not be executed
excludeGroups = ""
// this suites will not be executed
-excludeSuites =
"test_pk_uk_index_change,test_pk_uk_case,window_function,test_profile,test_broker_load_p2,test_spark_load,test_analyze_stats_p1,test_refresh_mtmv,test_bitmap_filter,test_export_parquet,test_doris_jdbc_catalog,test_transactional_hive,test_hdfs_tvf"
+excludeSuites =
"test_pk_uk_index_change,test_pk_uk_case,window_function,test_profile,test_broker_load_p2,test_spark_load,test_analyze_stats_p1,test_refresh_mtmv,test_bitmap_filter,test_export_parquet,test_doris_jdbc_catalog,test_hdfs_tvf"
// this directories will not be executed
excludeDirectories =
"nereids_tpcds_shape_sf100_p0,nereids_tpch_shape_sf1000_p0,nereids_tpch_shape_sf500_p0,workload_manager_p1"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]