This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 5d02c48715f [feature](hive)Support reading renamed Parquet Hive and
Orc Hive tables. (#38432) (#38809)
5d02c48715f is described below
commit 5d02c48715ff35f22b5943433132aba08aeb162d
Author: daidai <[email protected]>
AuthorDate: Mon Aug 5 09:06:49 2024 +0800
[feature](hive)Support reading renamed Parquet Hive and Orc Hive tables.
(#38432) (#38809)
bp #38432
## Proposed changes
Add `hive_parquet_use_column_names` and `hive_orc_use_column_names`
session variables to read the table after rename column in `Hive`.
These two session variables are referenced from
`parquet_use_column_names` and `orc_use_column_names` of `Trino` hive
connector.
By default, these two session variables are true. When they are set to
false, reading orc/parquet will access the columns according to the
ordinal position in the Hive table definition.
For example:
```mysql
in Hive :
hive> create table tmp (a int , b string) stored as parquet;
hive> insert into table tmp values(1,"2");
hive> alter table tmp change column a new_a int;
hive> insert into table tmp values(2,"4");
in Doris :
mysql> set hive_parquet_use_column_names=true;
Query OK, 0 rows affected (0.00 sec)
mysql> select * from tmp;
+-------+------+
| new_a | b |
+-------+------+
| NULL | 2 |
| 2 | 4 |
+-------+------+
2 rows in set (0.02 sec)
mysql> set hive_parquet_use_column_names=false;
Query OK, 0 rows affected (0.00 sec)
mysql> select * from tmp;
+-------+------+
| new_a | b |
+-------+------+
| 1 | 2 |
| 2 | 4 |
+-------+------+
2 rows in set (0.02 sec)
```
You can use `set
parquet.column.index.access/orc.force.positional.evolution = true/false`
in hive 3 to control the results of reading the table like these two
session variables. However, for the rename struct inside column parquet
table, the effects of hive and doris are different.
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 16 +-
be/src/vec/exec/format/orc/vorc_reader.h | 15 +-
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 93 ++++-
be/src/vec/exec/format/parquet/vparquet_reader.h | 3 +-
be/src/vec/exec/scan/vfile_scanner.cpp | 21 +-
.../scripts/create_preinstalled_scripts/run64.hql | 29 ++
.../orc_table/simulation_hive1_orc/000000_0 | Bin 0 -> 408 bytes
.../orc_table/test_hive_rename_column_orc/000000_0 | Bin 0 -> 405 bytes
.../test_hive_rename_column_orc/000000_0_copy_1 | Bin 0 -> 396 bytes
.../test_hive_rename_column_orc/000000_0_copy_2 | Bin 0 -> 554 bytes
.../test_hive_rename_column_orc/000000_0_copy_3 | Bin 0 -> 592 bytes
.../test_hive_rename_column_parquet/000000_0 | Bin 0 -> 538 bytes
.../000000_0_copy_1 | Bin 0 -> 543 bytes
.../000000_0_copy_2 | Bin 0 -> 787 bytes
.../000000_0_copy_3 | Bin 0 -> 801 bytes
.../apache/doris/datasource/FileQueryScanNode.java | 4 +
.../java/org/apache/doris/qe/SessionVariable.java | 20 +
gensrc/thrift/PaloInternalService.thrift | 6 +
.../hive/test_hive_rename_column_orc_parquet.out | 435 +++++++++++++++++++++
.../test_hive_rename_column_orc_parquet.groovy | 196 ++++++++++
20 files changed, 803 insertions(+), 35 deletions(-)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 547d53bd00e..7a820845ed0 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -279,13 +279,15 @@ Status OrcReader::init_reader(
const VExprContextSPtrs& conjuncts, bool is_acid, const
TupleDescriptor* tuple_descriptor,
const RowDescriptor* row_descriptor,
const VExprContextSPtrs* not_single_slot_filter_conjuncts,
- const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts) {
+ const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts,
+ const bool hive_use_column_names) {
_column_names = column_names;
_colname_to_value_range = colname_to_value_range;
_lazy_read_ctx.conjuncts = conjuncts;
_is_acid = is_acid;
_tuple_descriptor = tuple_descriptor;
_row_descriptor = row_descriptor;
+ _is_hive1_orc_or_use_idx = !hive_use_column_names;
if (not_single_slot_filter_conjuncts != nullptr &&
!not_single_slot_filter_conjuncts->empty()) {
_not_single_slot_filter_conjuncts.insert(_not_single_slot_filter_conjuncts.end(),
not_single_slot_filter_conjuncts->begin(),
@@ -337,10 +339,11 @@ Status OrcReader::_init_read_columns() {
// In old version slot_name_to_schema_pos may not be set in _scan_params
// TODO, should be removed in 2.2 or later
- _is_hive1_orc = is_hive1_orc &&
_scan_params.__isset.slot_name_to_schema_pos;
+ _is_hive1_orc_or_use_idx = (is_hive1_orc || _is_hive1_orc_or_use_idx) &&
+ _scan_params.__isset.slot_name_to_schema_pos;
for (size_t i = 0; i < _column_names->size(); ++i) {
auto& col_name = (*_column_names)[i];
- if (_is_hive1_orc) {
+ if (_is_hive1_orc_or_use_idx) {
auto iter = _scan_params.slot_name_to_schema_pos.find(col_name);
if (iter != _scan_params.slot_name_to_schema_pos.end()) {
int pos = iter->second;
@@ -375,9 +378,10 @@ Status OrcReader::_init_read_columns() {
_read_cols_lower_case.emplace_back(col_name);
// For hive engine, store the orc column name to schema column
name map.
// This is for Hive 1.x orc file with internal column name _col0,
_col1...
- if (_is_hive1_orc) {
+ if (_is_hive1_orc_or_use_idx) {
_removed_acid_file_col_name_to_schema_col[orc_cols[pos]] =
col_name;
}
+
_col_name_to_file_col_name[col_name] = read_col;
}
}
@@ -708,7 +712,7 @@ bool OrcReader::_init_search_argument(
if (iter == colname_to_value_range->end()) {
continue;
}
- auto type_it = type_map.find(col_name);
+ auto type_it = type_map.find(_col_name_to_file_col_name[col_name]);
if (type_it == type_map.end()) {
continue;
}
@@ -913,7 +917,7 @@ Status OrcReader::_init_select_types(const orc::Type& type,
int idx) {
std::string name;
// For hive engine, translate the column name in orc file to schema
column name.
// This is for Hive 1.x which use internal column name _col0, _col1...
- if (_is_hive1_orc) {
+ if (_is_hive1_orc_or_use_idx) {
name =
_removed_acid_file_col_name_to_schema_col[type.getFieldName(i)];
} else {
name = get_field_name_lower_case(&type, i);
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h
b/be/src/vec/exec/format/orc/vorc_reader.h
index 77eec261b01..c0b372dfcea 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -139,14 +139,15 @@ public:
const std::string& ctz, io::IOContext* io_ctx, bool
enable_lazy_mat = true);
~OrcReader() override;
-
+ //If you want to read the file by index instead of column name, set
hive_use_column_names to false.
Status init_reader(
const std::vector<std::string>* column_names,
std::unordered_map<std::string, ColumnValueRangeType>*
colname_to_value_range,
const VExprContextSPtrs& conjuncts, bool is_acid,
const TupleDescriptor* tuple_descriptor, const RowDescriptor*
row_descriptor,
const VExprContextSPtrs* not_single_slot_filter_conjuncts,
- const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts);
+ const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts,
+ const bool hive_use_column_names = true);
Status set_fill_columns(
const std::unordered_map<std::string, std::tuple<std::string,
const SlotDescriptor*>>&
@@ -570,9 +571,11 @@ private:
// This is used for Hive 1.x which use internal column name in Orc file.
// _col0, _col1...
std::unordered_map<std::string, std::string>
_removed_acid_file_col_name_to_schema_col;
- // Flag for hive engine. True if the external table engine is Hive1.x with
orc col name
- // as _col1, col2, ...
- bool _is_hive1_orc = false;
+ // Flag for hive engine.
+ // 1. True if the external table engine is Hive1.x with orc col name as
_col1, col2, ...
+ // 2. If true, use indexes instead of column names when reading orc tables.
+ bool _is_hive1_orc_or_use_idx = false;
+
std::unordered_map<std::string, std::string> _col_name_to_file_col_name;
std::unordered_map<std::string, const orc::Type*> _type_map;
std::vector<const orc::Type*> _col_orc_type;
@@ -621,6 +624,8 @@ private:
// resolve schema change
std::unordered_map<std::string,
std::unique_ptr<converter::ColumnTypeConverter>> _converters;
//for iceberg table , when table column name != file column name
+ //TODO(CXY) : remove _table_col_to_file_col,because we hava
_col_name_to_file_col_name,
+ // the two have the same effect.
std::unordered_map<std::string, std::string> _table_col_to_file_col;
//support iceberg position delete .
std::vector<int64_t>* _position_delete_ordered_rowids = nullptr;
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index f3b9f2ad55c..57396c349dd 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -22,6 +22,7 @@
#include <gen_cpp/parquet_types.h>
#include <glog/logging.h>
+#include <algorithm>
#include <functional>
#include <utility>
@@ -300,12 +301,14 @@ Status ParquetReader::init_reader(
const std::unordered_map<std::string, int>* colname_to_slot_id,
const VExprContextSPtrs* not_single_slot_filter_conjuncts,
const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts,
- bool filter_groups) {
+ bool filter_groups, const bool hive_use_column_names) {
_tuple_descriptor = tuple_descriptor;
_row_descriptor = row_descriptor;
_colname_to_slot_id = colname_to_slot_id;
_not_single_slot_filter_conjuncts = not_single_slot_filter_conjuncts;
_slot_id_to_filter_conjuncts = slot_id_to_filter_conjuncts;
+ _colname_to_value_range = colname_to_value_range;
+ _hive_use_column_names = hive_use_column_names;
if (_file_metadata == nullptr) {
return Status::InternalError("failed to init parquet reader, please
open reader first");
}
@@ -320,28 +323,59 @@ Status ParquetReader::init_reader(
// e.g. table added a column after this parquet file was written.
_column_names = &all_column_names;
auto schema_desc = _file_metadata->schema();
- std::set<std::string> required_columns(all_column_names.begin(),
all_column_names.end());
- // Currently only used in iceberg, the columns are dropped but added back
- std::set<std::string> dropped_columns(missing_column_names.begin(),
missing_column_names.end());
- // Make the order of read columns the same as physical order in parquet
file
- for (int i = 0; i < schema_desc.size(); ++i) {
- auto name = schema_desc.get_column(i)->name;
- // If the column in parquet file is included in all_column_names and
not in missing_column_names,
- // add it to _map_column, which means the reader should read the data
of this column.
- // Here to check against missing_column_names is for the 'Add a column
back to the table
- // with the same column name' case. (drop column a then add column a).
- // Shouldn't read this column data in this case.
- if (required_columns.find(name) != required_columns.end() &&
- dropped_columns.find(name) == dropped_columns.end()) {
- required_columns.erase(name);
- _read_columns.emplace_back(name);
+ if (_hive_use_column_names) {
+ std::set<std::string> required_columns(all_column_names.begin(),
all_column_names.end());
+ // Currently only used in iceberg, the columns are dropped but added
back
+ std::set<std::string> dropped_columns(missing_column_names.begin(),
+ missing_column_names.end());
+ // Make the order of read columns the same as physical order in
parquet file
+ for (int i = 0; i < schema_desc.size(); ++i) {
+ auto name = schema_desc.get_column(i)->name;
+ // If the column in parquet file is included in all_column_names
and not in missing_column_names,
+ // add it to _map_column, which means the reader should read the
data of this column.
+ // Here to check against missing_column_names is for the 'Add a
column back to the table
+ // with the same column name' case. (drop column a then add column
a).
+ // Shouldn't read this column data in this case.
+ if (required_columns.find(name) != required_columns.end() &&
+ dropped_columns.find(name) == dropped_columns.end()) {
+ required_columns.erase(name);
+ _read_columns.emplace_back(name);
+ }
+ }
+ for (const std::string& name : required_columns) {
+ _missing_cols.emplace_back(name);
+ }
+ } else {
+ std::unordered_map<std::string, ColumnValueRangeType>
new_colname_to_value_range;
+ const auto& table_column_idxs = _scan_params.column_idxs;
+ std::map<int, int> table_col_id_to_idx;
+ for (int i = 0; i < table_column_idxs.size(); i++) {
+ table_col_id_to_idx.insert({table_column_idxs[i], i});
}
- }
- for (const std::string& name : required_columns) {
- _missing_cols.emplace_back(name);
- }
- _colname_to_value_range = colname_to_value_range;
+ for (auto [id, idx] : table_col_id_to_idx) {
+ if (id >= schema_desc.size()) {
+ _missing_cols.emplace_back(all_column_names[idx]);
+ } else {
+ auto& table_col = all_column_names[idx];
+ auto file_col = schema_desc.get_column(id)->name;
+ _read_columns.emplace_back(file_col);
+
+ if (table_col != file_col) {
+ _table_col_to_file_col[table_col] = file_col;
+ auto iter = _colname_to_value_range->find(table_col);
+ if (iter != _colname_to_value_range->end()) {
+ continue;
+ }
+ new_colname_to_value_range[file_col] = iter->second;
+ _colname_to_value_range->erase(iter->first);
+ }
+ }
+ }
+ for (auto it : new_colname_to_value_range) {
+ _colname_to_value_range->emplace(it.first, std::move(it.second));
+ }
+ }
// build column predicates for column lazy read
_lazy_read_ctx.conjuncts = conjuncts;
RETURN_IF_ERROR(_init_row_groups(filter_groups));
@@ -525,6 +559,16 @@ Status ParquetReader::get_next_block(Block* block, size_t*
read_rows, bool* eof)
return Status::OK();
}
+ if (!_hive_use_column_names) {
+ for (auto i = 0; i < block->get_names().size(); i++) {
+ auto& col = block->get_by_position(i);
+ if (_table_col_to_file_col.contains(col.name)) {
+ col.name = _table_col_to_file_col[col.name];
+ }
+ }
+ block->initialize_index_by_name();
+ }
+
SCOPED_RAW_TIMER(&_statistics.column_read_time);
Status batch_st =
_current_group_reader->next_batch(block, _batch_size, read_rows,
&_row_group_eof);
@@ -535,6 +579,13 @@ Status ParquetReader::get_next_block(Block* block, size_t*
read_rows, bool* eof)
*eof = true;
return Status::OK();
}
+
+ if (!_hive_use_column_names) {
+ for (auto i = 0; i < block->columns(); i++) {
+ block->get_by_position(i).name = (*_column_names)[i];
+ }
+ block->initialize_index_by_name();
+ }
if (!batch_st.ok()) {
return Status::InternalError("Read parquet file {} failed, reason =
{}", _scan_range.path,
batch_st.to_string());
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h
b/be/src/vec/exec/format/parquet/vparquet_reader.h
index 52700aafb7f..3cc262e14e6 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.h
@@ -116,7 +116,7 @@ public:
const std::unordered_map<std::string, int>* colname_to_slot_id,
const VExprContextSPtrs* not_single_slot_filter_conjuncts,
const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts,
- bool filter_groups = true);
+ bool filter_groups = true, const bool hive_use_column_names =
true);
Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
@@ -283,5 +283,6 @@ private:
const std::unordered_map<std::string, int>* _colname_to_slot_id = nullptr;
const VExprContextSPtrs* _not_single_slot_filter_conjuncts = nullptr;
const std::unordered_map<int, VExprContextSPtrs>*
_slot_id_to_filter_conjuncts = nullptr;
+ bool _hive_use_column_names = false;
};
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 45ec7fe4dc7..1f7e2df0f34 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -862,12 +862,21 @@ Status VFileScanner::_get_next_reader() {
RETURN_IF_ERROR(paimon_reader->init_row_filters(range));
_cur_reader = std::move(paimon_reader);
} else {
+ bool hive_parquet_use_column_names = true;
+
+ if (range.__isset.table_format_params &&
+ range.table_format_params.table_format_type == "hive" &&
_state != nullptr)
+ [[likely]] {
+ hive_parquet_use_column_names =
+
_state->query_options().hive_parquet_use_column_names;
+ }
+
std::vector<std::string> place_holder;
init_status = parquet_reader->init_reader(
_file_col_names, place_holder, _colname_to_value_range,
_push_down_conjuncts, _real_tuple_desc,
_default_val_row_desc.get(),
_col_name_to_slot_id,
&_not_single_slot_filter_conjuncts,
- &_slot_id_to_filter_conjuncts);
+ &_slot_id_to_filter_conjuncts, true,
hive_parquet_use_column_names);
_cur_reader = std::move(parquet_reader);
}
need_to_get_parsed_schema = true;
@@ -923,10 +932,18 @@ Status VFileScanner::_get_next_reader() {
RETURN_IF_ERROR(paimon_reader->init_row_filters(range));
_cur_reader = std::move(paimon_reader);
} else {
+ bool hive_orc_use_column_names = true;
+
+ if (range.__isset.table_format_params &&
+ range.table_format_params.table_format_type == "hive" &&
_state != nullptr)
+ [[likely]] {
+ hive_orc_use_column_names =
_state->query_options().hive_orc_use_column_names;
+ }
init_status = orc_reader->init_reader(
&_file_col_names, _colname_to_value_range,
_push_down_conjuncts, false,
_real_tuple_desc, _default_val_row_desc.get(),
- &_not_single_slot_filter_conjuncts,
&_slot_id_to_filter_conjuncts);
+ &_not_single_slot_filter_conjuncts,
&_slot_id_to_filter_conjuncts,
+ hive_orc_use_column_names);
_cur_reader = std::move(orc_reader);
}
need_to_get_parsed_schema = true;
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run64.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run64.hql
new file mode 100644
index 00000000000..744b83418db
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run64.hql
@@ -0,0 +1,29 @@
+use default;
+
+create table simulation_hive1_orc(
+ `a` boolean,
+ `b` int,
+ `c` string
+)stored as orc
+LOCATION '/user/doris/preinstalled_data/orc_table/simulation_hive1_orc';
+msck repair table simulation_hive1_orc;
+
+create table test_hive_rename_column_parquet(
+ `new_a` boolean,
+ `new_b` int,
+ `c` string,
+ `new_d` int,
+ `f` string
+)stored as parquet
+LOCATION
'/user/doris/preinstalled_data/parquet_table/test_hive_rename_column_parquet';
+msck repair table test_hive_rename_column_parquet;
+
+create table test_hive_rename_column_orc(
+ `new_a` boolean,
+ `new_b` int,
+ `c` string,
+ `new_d` int,
+ `f` string
+)stored as orc
+LOCATION '/user/doris/preinstalled_data/orc_table/test_hive_rename_column_orc';
+msck repair table test_hive_rename_column_orc;
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/simulation_hive1_orc/000000_0
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/simulation_hive1_orc/000000_0
new file mode 100644
index 00000000000..848dc3250ee
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/simulation_hive1_orc/000000_0
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0
new file mode 100644
index 00000000000..398aed3001f
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_1
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_1
new file mode 100644
index 00000000000..e58535d6661
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_1
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_2
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_2
new file mode 100644
index 00000000000..84490d9f085
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_2
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_3
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_3
new file mode 100644
index 00000000000..2c54adff6f2
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_3
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0
new file mode 100644
index 00000000000..deea62bcfb3
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_1
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_1
new file mode 100644
index 00000000000..45ae5dee1ab
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_1
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_2
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_2
new file mode 100644
index 00000000000..e37fc5d2eb7
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_2
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_3
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_3
new file mode 100644
index 00000000000..97bb0ab8475
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_3
differ
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
index 517ba8be5f8..f572daf9446 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
@@ -421,6 +421,10 @@ public abstract class FileQueryScanNode extends
FileScanNode {
transactionalHiveDesc.setDeleteDeltas(deleteDeltaDescs);
tableFormatFileDesc.setTransactionalHiveParams(transactionalHiveDesc);
rangeDesc.setTableFormatParams(tableFormatFileDesc);
+ } else if (fileSplit instanceof HiveSplit) {
+ TTableFormatFileDesc tableFormatFileDesc = new
TTableFormatFileDesc();
+
tableFormatFileDesc.setTableFormatType(TableFormatType.HIVE.value());
+ rangeDesc.setTableFormatParams(tableFormatFileDesc);
}
setScanParams(rangeDesc, fileSplit);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 067ec326251..b9c8a91bd47 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -556,6 +556,10 @@ public class SessionVariable implements Serializable,
Writable {
public static final String ENABLE_PUSHDOWN_MINMAX_ON_UNIQUE =
"enable_pushdown_minmax_on_unique";
+ public static final String HIVE_PARQUET_USE_COLUMN_NAMES =
"hive_parquet_use_column_names";
+
+ public static final String HIVE_ORC_USE_COLUMN_NAMES =
"hive_orc_use_column_names";
+
public static final String KEEP_CARRIAGE_RETURN = "keep_carriage_return";
public static final String ENABLE_PUSHDOWN_STRING_MINMAX =
"enable_pushdown_string_minmax";
@@ -1770,11 +1774,25 @@ public class SessionVariable implements Serializable,
Writable {
public int createTablePartitionMaxNum = 10000;
+ @VariableMgr.VarAttr(name = HIVE_PARQUET_USE_COLUMN_NAMES,
+ description = {"默认情况下按名称访问 Parquet 列。将此属性设置为“false”可按 Hive
表定义中的序号位置访问列。",
+ "Access Parquet columns by name by default. Set this
property to `false` to access columns "
+ + "by their ordinal position in the Hive table
definition."})
+ public boolean hiveParquetUseColumnNames = true;
+
+
+ @VariableMgr.VarAttr(name = HIVE_ORC_USE_COLUMN_NAMES,
+ description = {"默认情况下按名称访问 Orc 列。将此属性设置为“false”可按 Hive
表定义中的序号位置访问列。",
+ "Access Parquet columns by name by default. Set this
property to `false` to access columns "
+ + "by their ordinal position in the Hive table
definition."})
+ public boolean hiveOrcUseColumnNames = true;
+
@VariableMgr.VarAttr(name = KEEP_CARRIAGE_RETURN,
description = {"在同时处理\r和\r\n作为CSV的行分隔符时,是否保留\r",
"When processing both \\n and \\r\\n as CSV line
separators, should \\r be retained?"})
public boolean keepCarriageReturn = false;
+
@VariableMgr.VarAttr(name = FORCE_JNI_SCANNER,
description = {"强制使用jni方式读取外表", "Force the use of jni mode to read
external table"})
private boolean forceJniScanner = false;
@@ -3435,6 +3453,8 @@ public class SessionVariable implements Serializable,
Writable {
tResult.setReadCsvEmptyLineAsNull(readCsvEmptyLineAsNull);
tResult.setSerdeDialect(getSerdeDialect());
+ tResult.setHiveOrcUseColumnNames(hiveOrcUseColumnNames);
+ tResult.setHiveParquetUseColumnNames(hiveParquetUseColumnNames);
tResult.setKeepCarriageReturn(keepCarriageReturn);
return tResult;
}
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index e2e25619abe..41d113497d3 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -311,6 +311,12 @@ struct TQueryOptions {
119: optional bool keep_carriage_return = false; // \n,\r\n split line in
CSV.
122: optional i32 runtime_bloom_filter_min_size = 1048576;
+
+ //Access Parquet/ORC columns by name by default. Set this property to
`false` to access columns
+ //by their ordinal position in the Hive table definition.
+ 123: optional bool hive_parquet_use_column_names = true;
+ 124: optional bool hive_orc_use_column_names = true;
+
// For cloud, to control if the content would be written into file cache
1000: optional bool disable_file_cache = false
}
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out
b/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out
new file mode 100644
index 00000000000..fa260b96221
--- /dev/null
+++
b/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out
@@ -0,0 +1,435 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !hive1_orc_1_true --
+true 10 hello world
+false 20 keep
+
+-- !hive1_orc_2_true --
+10 hello world true
+20 keep false
+
+-- !hive1_orc_3_true --
+hello world true
+keep false
+
+-- !hive1_orc_4_true --
+2
+
+-- !hive1_orc_5_true --
+2
+
+-- !hive1_orc_6_true --
+10
+20
+
+-- !hive1_orc_7_true --
+10 1
+20 1
+
+-- !hive1_orc_8_true --
+true 10 hello world
+
+-- !hive1_orc_9_true --
+false 20 keep
+
+-- !hive1_orc_10_true --
+false 20 keep
+
+-- !hive1_orc_11_true --
+false 20 keep
+
+-- !hive1_orc_12_true --
+hello world
+keep
+
+-- !hive1_orc_1_false --
+true 10 hello world
+false 20 keep
+
+-- !hive1_orc_2_false --
+10 hello world true
+20 keep false
+
+-- !hive1_orc_3_false --
+hello world true
+keep false
+
+-- !hive1_orc_4_false --
+2
+
+-- !hive1_orc_5_false --
+2
+
+-- !hive1_orc_6_false --
+10
+20
+
+-- !hive1_orc_7_false --
+10 1
+20 1
+
+-- !hive1_orc_8_false --
+true 10 hello world
+
+-- !hive1_orc_9_false --
+false 20 keep
+
+-- !hive1_orc_10_false --
+false 20 keep
+
+-- !hive1_orc_11_false --
+false 20 keep
+
+-- !hive1_orc_12_false --
+hello world
+keep
+
+-- !rename_orc_1_true --
+\N \N hello world \N \N
+\N \N keep \N \N
+true 30 abcd \N \N
+false 40 new adcd \N \N
+true 50 xxx \N cols
+false 60 yyy \N yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_orc_2_true --
+\N
+\N
+30
+40
+50
+60
+70
+80
+
+-- !rename_orc_3_true --
+\N 2
+30 1
+40 1
+50 1
+60 1
+70 1
+80 1
+
+-- !rename_orc_4_true --
+true 30 abcd \N \N
+true 50 xxx \N cols
+true 70 hahaha 8888 abcd
+
+-- !rename_orc_5_true --
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_orc_6_true --
+\N \N hello world \N \N
+\N \N keep \N \N
+true 30 abcd \N \N
+false 40 new adcd \N \N
+true 50 xxx \N cols
+false 60 yyy \N yyyyyy
+
+-- !rename_orc_7_true --
+true 30 abcd \N \N
+
+-- !rename_orc_8_true --
+true
+true
+true
+
+-- !rename_orc_9_true --
+
+-- !rename_orc_10_true --
+
+-- !rename_orc_11_true --
+\N \N
+\N \N
+30 true
+40 false
+50 true
+60 false
+70 true
+80 false
+
+-- !rename_orc_12_true --
+\N \N hello world \N \N
+\N \N keep \N \N
+\N \N abcd 30 true
+\N \N new adcd 40 false
+cols \N xxx 50 true
+yyyyyy \N yyy 60 false
+abcd 8888 hahaha 70 true
+efg 9999 cmake 80 false
+
+-- !rename_orc_13_true --
+false 40 new adcd \N \N
+true 50 xxx \N cols
+false 60 yyy \N yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_orc_1_false --
+true 10 hello world \N \N
+false 20 keep \N \N
+true 30 abcd \N \N
+false 40 new adcd \N \N
+true 50 xxx 60 cols
+false 60 yyy 100 yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_orc_2_false --
+10
+20
+30
+40
+50
+60
+70
+80
+
+-- !rename_orc_3_false --
+10 1
+20 1
+30 1
+40 1
+50 1
+60 1
+70 1
+80 1
+
+-- !rename_orc_4_false --
+true 10 hello world \N \N
+true 30 abcd \N \N
+true 50 xxx 60 cols
+true 70 hahaha 8888 abcd
+
+-- !rename_orc_5_false --
+true 50 xxx 60 cols
+false 60 yyy 100 yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_orc_6_false --
+true 10 hello world \N \N
+false 20 keep \N \N
+true 30 abcd \N \N
+false 40 new adcd \N \N
+
+-- !rename_orc_7_false --
+true 30 abcd \N \N
+
+-- !rename_orc_8_false --
+true
+true
+true
+true
+
+-- !rename_orc_9_false --
+
+-- !rename_orc_10_false --
+
+-- !rename_orc_11_false --
+10 true
+20 false
+30 true
+40 false
+50 true
+60 false
+70 true
+80 false
+
+-- !rename_orc_12_false --
+\N \N hello world 10 true
+\N \N keep 20 false
+\N \N abcd 30 true
+\N \N new adcd 40 false
+cols 60 xxx 50 true
+yyyyyy 100 yyy 60 false
+abcd 8888 hahaha 70 true
+efg 9999 cmake 80 false
+
+-- !rename_orc_13_false --
+true 10 hello world \N \N
+false 20 keep \N \N
+false 40 new adcd \N \N
+true 50 xxx 60 cols
+false 60 yyy 100 yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_parquet_1_true --
+\N \N hello world \N \N
+\N \N keep \N \N
+true 30 abcd \N \N
+false 40 new adcd \N \N
+true 50 xxx \N cols
+false 60 yyy \N yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_parquet_2_true --
+\N
+\N
+30
+40
+50
+60
+70
+80
+
+-- !rename_parquet_3_true --
+\N 2
+30 1
+40 1
+50 1
+60 1
+70 1
+80 1
+
+-- !rename_parquet_4_true --
+true 30 abcd \N \N
+true 50 xxx \N cols
+true 70 hahaha 8888 abcd
+
+-- !rename_parquet_5_true --
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_parquet_6_true --
+\N \N hello world \N \N
+\N \N keep \N \N
+true 30 abcd \N \N
+false 40 new adcd \N \N
+true 50 xxx \N cols
+false 60 yyy \N yyyyyy
+
+-- !rename_parquet_7_true --
+true 30 abcd \N \N
+
+-- !rename_parquet_8_true --
+true
+true
+true
+
+-- !rename_parquet_9_true --
+
+-- !rename_parquet_10_true --
+
+-- !rename_parquet_11_true --
+\N \N
+\N \N
+30 true
+40 false
+50 true
+60 false
+70 true
+80 false
+
+-- !rename_parquet_12_true --
+\N \N hello world \N \N
+\N \N keep \N \N
+\N \N abcd 30 true
+\N \N new adcd 40 false
+cols \N xxx 50 true
+yyyyyy \N yyy 60 false
+abcd 8888 hahaha 70 true
+efg 9999 cmake 80 false
+
+-- !rename_parquet_13_true --
+false 40 new adcd \N \N
+true 50 xxx \N cols
+false 60 yyy \N yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_parquet_1_false --
+true 10 hello world \N \N
+false 20 keep \N \N
+true 30 abcd \N \N
+false 40 new adcd \N \N
+true 50 xxx 60 cols
+false 60 yyy 100 yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_parquet_2_false --
+10
+20
+30
+40
+50
+60
+70
+80
+
+-- !rename_parquet_3_false --
+10 1
+20 1
+30 1
+40 1
+50 1
+60 1
+70 1
+80 1
+
+-- !rename_parquet_4_false --
+true 10 hello world \N \N
+true 30 abcd \N \N
+true 50 xxx 60 cols
+true 70 hahaha 8888 abcd
+
+-- !rename_parquet_5_false --
+true 50 xxx 60 cols
+false 60 yyy 100 yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
+-- !rename_parquet_6_false --
+true 10 hello world \N \N
+false 20 keep \N \N
+true 30 abcd \N \N
+false 40 new adcd \N \N
+
+-- !rename_parquet_7_false --
+true 30 abcd \N \N
+
+-- !rename_parquet_8_false --
+true
+true
+true
+true
+
+-- !rename_parquet_9_false --
+
+-- !rename_parquet_10_false --
+
+-- !rename_parquet_11_false --
+10 true
+20 false
+30 true
+40 false
+50 true
+60 false
+70 true
+80 false
+
+-- !rename_parquet_12_false --
+\N \N hello world 10 true
+\N \N keep 20 false
+\N \N abcd 30 true
+\N \N new adcd 40 false
+cols 60 xxx 50 true
+yyyyyy 100 yyy 60 false
+abcd 8888 hahaha 70 true
+efg 9999 cmake 80 false
+
+-- !rename_parquet_13_false --
+true 10 hello world \N \N
+false 20 keep \N \N
+false 40 new adcd \N \N
+true 50 xxx 60 cols
+false 60 yyy 100 yyyyyy
+true 70 hahaha 8888 abcd
+false 80 cmake 9999 efg
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy
new file mode 100644
index 00000000000..88d8a586e68
--- /dev/null
+++
b/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy
@@ -0,0 +1,196 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_hive_rename_column_orc_parquet",
"p0,external,hive,external_docker,external_docker_hive") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String hivePrefix ="hive3";
+ setHivePrefix(hivePrefix)
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String hmsPort = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String hdfs_port = context.config.otherConfigs.get(hivePrefix +
"HdfsPort")
+
+ String catalog_name = "test_hive_schema_change2"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hmsPort}'
+ );
+ """
+
+ sql """ switch ${catalog_name} """
+ sql """ use `default` """
+
+
+ sql """ set hive_orc_use_column_names=true; """
+ qt_hive1_orc_1_true """ select * from simulation_hive1_orc order by
b; """
+ qt_hive1_orc_2_true """ select b,c,a from simulation_hive1_orc order
by b; """
+ qt_hive1_orc_3_true """ select c,a from simulation_hive1_orc order
by b; """
+ qt_hive1_orc_4_true """ select count(*) from simulation_hive1_orc;
"""
+ qt_hive1_orc_5_true """ select count(a) from simulation_hive1_orc;
"""
+ qt_hive1_orc_6_true """ select b from simulation_hive1_orc order by
b; """
+ qt_hive1_orc_7_true """ select b,count(*) from simulation_hive1_orc
group by b order by b; """
+ qt_hive1_orc_8_true """ select * from simulation_hive1_orc where a
+b = 11 ; """
+ qt_hive1_orc_9_true """ select * from simulation_hive1_orc where a
+b != 11 ; """
+ qt_hive1_orc_10_true """ select * from simulation_hive1_orc where a
+b != 11 and c = "keep"; """
+ qt_hive1_orc_11_true """ select * from simulation_hive1_orc where a
+b != 11 and c != "keepxxx"; """
+ qt_hive1_orc_12_true """ select c from simulation_hive1_orc order by
c; """
+
+
+ sql """ set hive_orc_use_column_names=false; """
+ qt_hive1_orc_1_false """ select * from simulation_hive1_orc order by
b; """
+ qt_hive1_orc_2_false """ select b,c,a from simulation_hive1_orc
order by b; """
+ qt_hive1_orc_3_false """ select c,a from simulation_hive1_orc order
by b; """
+ qt_hive1_orc_4_false """ select count(*) from simulation_hive1_orc;
"""
+ qt_hive1_orc_5_false """ select count(a) from simulation_hive1_orc;
"""
+ qt_hive1_orc_6_false """ select b from simulation_hive1_orc order by
b; """
+ qt_hive1_orc_7_false """ select b,count(*) from simulation_hive1_orc
group by b order by b; """
+ qt_hive1_orc_8_false """ select * from simulation_hive1_orc where a
+b = 11 ; """
+ qt_hive1_orc_9_false """ select * from simulation_hive1_orc where a
+b != 11 ; """
+ qt_hive1_orc_10_false """ select * from simulation_hive1_orc where a
+b != 11 and c = "keep"; """
+ qt_hive1_orc_11_false """ select * from simulation_hive1_orc where a
+b != 11 and c != "keepxxx"; """
+ qt_hive1_orc_12_false """ select c from simulation_hive1_orc order
by c; """
+
+
+ sql """ set hive_orc_use_column_names=true; """
+ qt_rename_orc_1_true """ select * from test_hive_rename_column_orc
order by new_b,c """;
+ qt_rename_orc_2_true """ select new_b from
test_hive_rename_column_orc order by new_b,c """;
+ qt_rename_orc_3_true """ select new_b,count(*) from
test_hive_rename_column_orc group by new_b order by new_b """;
+ qt_rename_orc_4_true """ select * from test_hive_rename_column_orc
where new_a = 1 order by new_b,c """;
+ qt_rename_orc_5_true """ select * from test_hive_rename_column_orc
where new_d is not null order by new_b,c """
+ qt_rename_orc_6_true """ select * from test_hive_rename_column_orc
where new_d is null order by new_b,c; """
+ qt_rename_orc_7_true """ select * from test_hive_rename_column_orc
where new_b + new_a = 31 order by new_b,c; """
+ qt_rename_orc_8_true """ select new_a from
test_hive_rename_column_orc where new_a = 1 order by new_b,c; """
+ qt_rename_orc_9_true """ select new_b from
test_hive_rename_column_orc where new_b = 1 order by new_b; """
+ qt_rename_orc_10_true """ select new_b,new_d from
test_hive_rename_column_orc where new_d +30*new_b=100 order by new_b,c; """
+ qt_rename_orc_11_true """ select new_b,new_a from
test_hive_rename_column_orc order by new_b,c,new_a; """
+ qt_rename_orc_12_true """ select f,new_d,c,new_b,new_a from
test_hive_rename_column_orc order by new_b,c; """
+ qt_rename_orc_13_true """ select * from test_hive_rename_column_orc
where new_b + new_a != 31 order by new_b,c; """
+
+
+
+
+ sql """ set hive_orc_use_column_names=false; """
+ qt_rename_orc_1_false """ select * from test_hive_rename_column_orc
order by new_b,c """;
+ qt_rename_orc_2_false """ select new_b from
test_hive_rename_column_orc order by new_b,c """;
+ qt_rename_orc_3_false """ select new_b,count(*) from
test_hive_rename_column_orc group by new_b order by new_b """;
+ qt_rename_orc_4_false """ select * from test_hive_rename_column_orc
where new_a = 1 order by new_b,c """;
+ qt_rename_orc_5_false """ select * from test_hive_rename_column_orc
where new_d is not null order by new_b """
+ qt_rename_orc_6_false """ select * from test_hive_rename_column_orc
where new_d is null order by new_b,c; """
+ qt_rename_orc_7_false """ select * from test_hive_rename_column_orc
where new_b + new_a = 31 order by new_b,c; """
+ qt_rename_orc_8_false """ select new_a from
test_hive_rename_column_orc where new_a = 1 order by new_b,c; """
+ qt_rename_orc_9_false """ select new_b from
test_hive_rename_column_orc where new_b = 1 order by new_b; """
+ qt_rename_orc_10_false """ select new_b,new_d from
test_hive_rename_column_orc where new_d +30*new_b=100 order by new_b,c; """
+ qt_rename_orc_11_false """ select new_b,new_a from
test_hive_rename_column_orc order by new_b,c,new_a; """
+ qt_rename_orc_12_false """ select f,new_d,c,new_b,new_a from
test_hive_rename_column_orc order by new_b,c; """
+ qt_rename_orc_13_false """ select * from test_hive_rename_column_orc
where new_b + new_a != 31 order by new_b,c; """
+
+
+ sql """ set hive_parquet_use_column_names=true; """
+ qt_rename_parquet_1_true """ select * from
test_hive_rename_column_parquet order by new_b,c """;
+ qt_rename_parquet_2_true """ select new_b from
test_hive_rename_column_parquet order by new_b,c """;
+ qt_rename_parquet_3_true """ select new_b,count(*) from
test_hive_rename_column_parquet group by new_b order by new_b """;
+ qt_rename_parquet_4_true """ select * from
test_hive_rename_column_parquet where new_a = 1 order by new_b,c """;
+ qt_rename_parquet_5_true """ select * from
test_hive_rename_column_parquet where new_d is not null order by new_b,c """
+ qt_rename_parquet_6_true """ select * from
test_hive_rename_column_parquet where new_d is null order by new_b,c; """
+ qt_rename_parquet_7_true """ select * from
test_hive_rename_column_parquet where new_b + new_a = 31 order by new_b,c; """
+ qt_rename_parquet_8_true """ select new_a from
test_hive_rename_column_parquet where new_a = 1 order by new_b,c; """
+ qt_rename_parquet_9_true """ select new_b from
test_hive_rename_column_parquet where new_b = 1 order by new_b; """
+ qt_rename_parquet_10_true """ select new_b,new_d from
test_hive_rename_column_parquet where new_d +30*new_b=100 order by new_b,c; """
+ qt_rename_parquet_11_true """ select new_b,new_a from
test_hive_rename_column_parquet order by new_b,c,new_a; """
+ qt_rename_parquet_12_true """ select f,new_d,c,new_b,new_a from
test_hive_rename_column_parquet order by new_b,c; """
+ qt_rename_parquet_13_true """ select * from
test_hive_rename_column_parquet where new_b + new_a != 31 order by new_b,c; """
+
+
+
+
+ sql """ set hive_parquet_use_column_names=false; """
+ qt_rename_parquet_1_false """ select * from
test_hive_rename_column_parquet order by new_b,c """;
+ qt_rename_parquet_2_false """ select new_b from
test_hive_rename_column_parquet order by new_b,c """;
+ qt_rename_parquet_3_false """ select new_b,count(*) from
test_hive_rename_column_parquet group by new_b order by new_b """;
+ qt_rename_parquet_4_false """ select * from
test_hive_rename_column_parquet where new_a = 1 order by new_b,c """;
+ qt_rename_parquet_5_false """ select * from
test_hive_rename_column_parquet where new_d is not null order by new_b,c """
+ qt_rename_parquet_6_false """ select * from
test_hive_rename_column_parquet where new_d is null order by new_b,c; """
+ qt_rename_parquet_7_false """ select * from
test_hive_rename_column_parquet where new_b + new_a = 31 order by new_b,c; """
+ qt_rename_parquet_8_false """ select new_a from
test_hive_rename_column_parquet where new_a = 1 order by new_b,c; """
+ qt_rename_parquet_9_false """ select new_b from
test_hive_rename_column_parquet where new_b = 1 order by new_b; """
+ qt_rename_parquet_10_false """ select new_b,new_d from
test_hive_rename_column_parquet where new_d +30*new_b=100 order by new_b,c; """
+ qt_rename_parquet_11_false """ select new_b,new_a from
test_hive_rename_column_parquet order by new_b,c,new_a; """
+ qt_rename_parquet_12_false """ select f,new_d,c,new_b,new_a from
test_hive_rename_column_parquet order by new_b,c; """
+ qt_rename_parquet_13_false """ select * from
test_hive_rename_column_parquet where new_b + new_a != 31 order by new_b,c; """
+
+
+
+
+
+ }
+}
+/*
+CREATE TABLE simulation_hive1_orc(
+ `_col0` boolean,
+ `_col1` INT,
+ `_col2` STRING
+)stored as orc;
+insert into simulation_hive1_orc values(true,10,"hello
world"),(false,20,"keep");
+select * from simulation_hive1_orc;
+alter table simulation_hive1_orc change column `_col0` a boolean;
+alter table simulation_hive1_orc change column `_col1` b int;
+alter table simulation_hive1_orc change column `_col2` c string;
+select * from simulation_hive1_orc;
+show create table simulation_hive1_orc;
+
+
+CREATE TABLE test_hive_rename_column_orc(
+ a boolean,
+ b INT,
+ c STRING
+)stored as orc;
+insert into test_hive_rename_column_orc values (true,10,"hello
world"),(false,20,"keep");
+alter table test_hive_rename_column_orc change column a new_a boolean;
+alter table test_hive_rename_column_orc change column b new_b int;
+insert into test_hive_rename_column_orc values
(true,30,"abcd"),(false,40,"new adcd");
+select * from test_hive_rename_column_orc;
+alter table test_hive_rename_column_orc add columns(d int,f string);
+insert into test_hive_rename_column_orc values
(true,50,"xxx",60,"cols"),(false,60,"yyy",100,"yyyyyy");
+alter table test_hive_rename_column_orc change column d new_d int;
+insert into test_hive_rename_column_orc values
(true,70,"hahaha",8888,"abcd"),(false,80,"cmake",9999,"efg");
+select * from test_hive_rename_column_orc;
+show create table test_hive_rename_column_orc;
+
+
+
+CREATE TABLE test_hive_rename_column_parquet(
+ a boolean,
+ b INT,
+ c STRING
+)stored as parquet;
+insert into test_hive_rename_column_parquet values (true,10,"hello
world"),(false,20,"keep");
+alter table test_hive_rename_column_parquet change column a new_a boolean;
+alter table test_hive_rename_column_parquet change column b new_b int;
+insert into test_hive_rename_column_parquet values
(true,30,"abcd"),(false,40,"new adcd");
+select * from test_hive_rename_column_parquet;
+alter table test_hive_rename_column_parquet add columns(d int,f string);
+insert into test_hive_rename_column_parquet values
(true,50,"xxx",60,"cols"),(false,60,"yyy",100,"yyyyyy");
+alter table test_hive_rename_column_parquet change column d new_d int;
+insert into test_hive_rename_column_parquet values
(true,70,"hahaha",8888,"abcd"),(false,80,"cmake",9999,"efg");
+select * from test_hive_rename_column_parquet;
+show create table test_hive_rename_column_parquet;
+*/
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]