(doris) branch branch-2.1 updated: [feature](hive)Support reading renamed Parquet Hive and Orc Hive tables. (#38432) (#38809)

yiguolei Sun, 04 Aug 2024 18:07:02 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 5d02c48715f [feature](hive)Support reading renamed Parquet Hive and 
Orc Hive tables. (#38432) (#38809)
5d02c48715f is described below

commit 5d02c48715ff35f22b5943433132aba08aeb162d
Author: daidai <[email protected]>
AuthorDate: Mon Aug 5 09:06:49 2024 +0800

    [feature](hive)Support reading renamed Parquet Hive and Orc Hive tables. 
(#38432) (#38809)
    
    bp #38432
    
    ## Proposed changes
    Add `hive_parquet_use_column_names` and `hive_orc_use_column_names`
    session variables to read the table after rename column in `Hive`.
    
    These two session variables are referenced from
    `parquet_use_column_names` and `orc_use_column_names` of `Trino` hive
    connector.
    
    By default, these two session variables are true. When they are set to
    false, reading orc/parquet will access the columns according to the
    ordinal position in the Hive table definition.
    
    For example:
    ```mysql
    in Hive :
    hive> create table tmp (a int , b string) stored as parquet;
    hive> insert into table tmp values(1,"2");
    hive> alter table tmp  change column  a new_a int;
    hive> insert into table tmp values(2,"4");
    
    in Doris :
    mysql> set hive_parquet_use_column_names=true;
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> select  * from tmp;
    +-------+------+
    | new_a | b    |
    +-------+------+
    |  NULL | 2    |
    |     2 | 4    |
    +-------+------+
    2 rows in set (0.02 sec)
    
    mysql> set hive_parquet_use_column_names=false;
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> select  * from tmp;
    +-------+------+
    | new_a | b    |
    +-------+------+
    |     1 | 2    |
    |     2 | 4    |
    +-------+------+
    2 rows in set (0.02 sec)
    ```
    
    You can use `set
    parquet.column.index.access/orc.force.positional.evolution = true/false`
    in hive 3 to control the results of reading the table like these two
    session variables. However, for the rename struct inside column parquet
    table, the effects of hive and doris are different.
---
 be/src/vec/exec/format/orc/vorc_reader.cpp         |  16 +-
 be/src/vec/exec/format/orc/vorc_reader.h           |  15 +-
 be/src/vec/exec/format/parquet/vparquet_reader.cpp |  93 ++++-
 be/src/vec/exec/format/parquet/vparquet_reader.h   |   3 +-
 be/src/vec/exec/scan/vfile_scanner.cpp             |  21 +-
 .../scripts/create_preinstalled_scripts/run64.hql  |  29 ++
 .../orc_table/simulation_hive1_orc/000000_0        | Bin 0 -> 408 bytes
 .../orc_table/test_hive_rename_column_orc/000000_0 | Bin 0 -> 405 bytes
 .../test_hive_rename_column_orc/000000_0_copy_1    | Bin 0 -> 396 bytes
 .../test_hive_rename_column_orc/000000_0_copy_2    | Bin 0 -> 554 bytes
 .../test_hive_rename_column_orc/000000_0_copy_3    | Bin 0 -> 592 bytes
 .../test_hive_rename_column_parquet/000000_0       | Bin 0 -> 538 bytes
 .../000000_0_copy_1                                | Bin 0 -> 543 bytes
 .../000000_0_copy_2                                | Bin 0 -> 787 bytes
 .../000000_0_copy_3                                | Bin 0 -> 801 bytes
 .../apache/doris/datasource/FileQueryScanNode.java |   4 +
 .../java/org/apache/doris/qe/SessionVariable.java  |  20 +
 gensrc/thrift/PaloInternalService.thrift           |   6 +
 .../hive/test_hive_rename_column_orc_parquet.out   | 435 +++++++++++++++++++++
 .../test_hive_rename_column_orc_parquet.groovy     | 196 ++++++++++
 20 files changed, 803 insertions(+), 35 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 547d53bd00e..7a820845ed0 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -279,13 +279,15 @@ Status OrcReader::init_reader(
         const VExprContextSPtrs& conjuncts, bool is_acid, const 
TupleDescriptor* tuple_descriptor,
         const RowDescriptor* row_descriptor,
         const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* 
slot_id_to_filter_conjuncts) {
+        const std::unordered_map<int, VExprContextSPtrs>* 
slot_id_to_filter_conjuncts,
+        const bool hive_use_column_names) {
     _column_names = column_names;
     _colname_to_value_range = colname_to_value_range;
     _lazy_read_ctx.conjuncts = conjuncts;
     _is_acid = is_acid;
     _tuple_descriptor = tuple_descriptor;
     _row_descriptor = row_descriptor;
+    _is_hive1_orc_or_use_idx = !hive_use_column_names;
     if (not_single_slot_filter_conjuncts != nullptr && 
!not_single_slot_filter_conjuncts->empty()) {
         
_not_single_slot_filter_conjuncts.insert(_not_single_slot_filter_conjuncts.end(),
                                                  
not_single_slot_filter_conjuncts->begin(),
@@ -337,10 +339,11 @@ Status OrcReader::_init_read_columns() {
 
     // In old version slot_name_to_schema_pos may not be set in _scan_params
     // TODO, should be removed in 2.2 or later
-    _is_hive1_orc = is_hive1_orc && 
_scan_params.__isset.slot_name_to_schema_pos;
+    _is_hive1_orc_or_use_idx = (is_hive1_orc || _is_hive1_orc_or_use_idx) &&
+                               _scan_params.__isset.slot_name_to_schema_pos;
     for (size_t i = 0; i < _column_names->size(); ++i) {
         auto& col_name = (*_column_names)[i];
-        if (_is_hive1_orc) {
+        if (_is_hive1_orc_or_use_idx) {
             auto iter = _scan_params.slot_name_to_schema_pos.find(col_name);
             if (iter != _scan_params.slot_name_to_schema_pos.end()) {
                 int pos = iter->second;
@@ -375,9 +378,10 @@ Status OrcReader::_init_read_columns() {
             _read_cols_lower_case.emplace_back(col_name);
             // For hive engine, store the orc column name to schema column 
name map.
             // This is for Hive 1.x orc file with internal column name _col0, 
_col1...
-            if (_is_hive1_orc) {
+            if (_is_hive1_orc_or_use_idx) {
                 _removed_acid_file_col_name_to_schema_col[orc_cols[pos]] = 
col_name;
             }
+
             _col_name_to_file_col_name[col_name] = read_col;
         }
     }
@@ -708,7 +712,7 @@ bool OrcReader::_init_search_argument(
         if (iter == colname_to_value_range->end()) {
             continue;
         }
-        auto type_it = type_map.find(col_name);
+        auto type_it = type_map.find(_col_name_to_file_col_name[col_name]);
         if (type_it == type_map.end()) {
             continue;
         }
@@ -913,7 +917,7 @@ Status OrcReader::_init_select_types(const orc::Type& type, 
int idx) {
         std::string name;
         // For hive engine, translate the column name in orc file to schema 
column name.
         // This is for Hive 1.x which use internal column name _col0, _col1...
-        if (_is_hive1_orc) {
+        if (_is_hive1_orc_or_use_idx) {
             name = 
_removed_acid_file_col_name_to_schema_col[type.getFieldName(i)];
         } else {
             name = get_field_name_lower_case(&type, i);
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index 77eec261b01..c0b372dfcea 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -139,14 +139,15 @@ public:
               const std::string& ctz, io::IOContext* io_ctx, bool 
enable_lazy_mat = true);
 
     ~OrcReader() override;
-
+    //If you want to read the file by index instead of column name, set 
hive_use_column_names to false.
     Status init_reader(
             const std::vector<std::string>* column_names,
             std::unordered_map<std::string, ColumnValueRangeType>* 
colname_to_value_range,
             const VExprContextSPtrs& conjuncts, bool is_acid,
             const TupleDescriptor* tuple_descriptor, const RowDescriptor* 
row_descriptor,
             const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* 
slot_id_to_filter_conjuncts);
+            const std::unordered_map<int, VExprContextSPtrs>* 
slot_id_to_filter_conjuncts,
+            const bool hive_use_column_names = true);
 
     Status set_fill_columns(
             const std::unordered_map<std::string, std::tuple<std::string, 
const SlotDescriptor*>>&
@@ -570,9 +571,11 @@ private:
     // This is used for Hive 1.x which use internal column name in Orc file.
     // _col0, _col1...
     std::unordered_map<std::string, std::string> 
_removed_acid_file_col_name_to_schema_col;
-    // Flag for hive engine. True if the external table engine is Hive1.x with 
orc col name
-    // as _col1, col2, ...
-    bool _is_hive1_orc = false;
+    // Flag for hive engine.
+    // 1. True if the external table engine is Hive1.x with orc col name as 
_col1, col2, ...
+    // 2. If true, use indexes instead of column names when reading orc tables.
+    bool _is_hive1_orc_or_use_idx = false;
+
     std::unordered_map<std::string, std::string> _col_name_to_file_col_name;
     std::unordered_map<std::string, const orc::Type*> _type_map;
     std::vector<const orc::Type*> _col_orc_type;
@@ -621,6 +624,8 @@ private:
     // resolve schema change
     std::unordered_map<std::string, 
std::unique_ptr<converter::ColumnTypeConverter>> _converters;
     //for iceberg table , when table column name != file column name
+    //TODO(CXY) : remove _table_col_to_file_col,because we hava 
_col_name_to_file_col_name，
+    // the two have the same effect.
     std::unordered_map<std::string, std::string> _table_col_to_file_col;
     //support iceberg position delete .
     std::vector<int64_t>* _position_delete_ordered_rowids = nullptr;
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index f3b9f2ad55c..57396c349dd 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -22,6 +22,7 @@
 #include <gen_cpp/parquet_types.h>
 #include <glog/logging.h>
 
+#include <algorithm>
 #include <functional>
 #include <utility>
 
@@ -300,12 +301,14 @@ Status ParquetReader::init_reader(
         const std::unordered_map<std::string, int>* colname_to_slot_id,
         const VExprContextSPtrs* not_single_slot_filter_conjuncts,
         const std::unordered_map<int, VExprContextSPtrs>* 
slot_id_to_filter_conjuncts,
-        bool filter_groups) {
+        bool filter_groups, const bool hive_use_column_names) {
     _tuple_descriptor = tuple_descriptor;
     _row_descriptor = row_descriptor;
     _colname_to_slot_id = colname_to_slot_id;
     _not_single_slot_filter_conjuncts = not_single_slot_filter_conjuncts;
     _slot_id_to_filter_conjuncts = slot_id_to_filter_conjuncts;
+    _colname_to_value_range = colname_to_value_range;
+    _hive_use_column_names = hive_use_column_names;
     if (_file_metadata == nullptr) {
         return Status::InternalError("failed to init parquet reader, please 
open reader first");
     }
@@ -320,28 +323,59 @@ Status ParquetReader::init_reader(
     // e.g. table added a column after this parquet file was written.
     _column_names = &all_column_names;
     auto schema_desc = _file_metadata->schema();
-    std::set<std::string> required_columns(all_column_names.begin(), 
all_column_names.end());
-    // Currently only used in iceberg, the columns are dropped but added back
-    std::set<std::string> dropped_columns(missing_column_names.begin(), 
missing_column_names.end());
-    // Make the order of read columns the same as physical order in parquet 
file
-    for (int i = 0; i < schema_desc.size(); ++i) {
-        auto name = schema_desc.get_column(i)->name;
-        // If the column in parquet file is included in all_column_names and 
not in missing_column_names,
-        // add it to _map_column, which means the reader should read the data 
of this column.
-        // Here to check against missing_column_names is for the 'Add a column 
back to the table
-        // with the same column name' case. (drop column a then add column a).
-        // Shouldn't read this column data in this case.
-        if (required_columns.find(name) != required_columns.end() &&
-            dropped_columns.find(name) == dropped_columns.end()) {
-            required_columns.erase(name);
-            _read_columns.emplace_back(name);
+    if (_hive_use_column_names) {
+        std::set<std::string> required_columns(all_column_names.begin(), 
all_column_names.end());
+        // Currently only used in iceberg, the columns are dropped but added 
back
+        std::set<std::string> dropped_columns(missing_column_names.begin(),
+                                              missing_column_names.end());
+        // Make the order of read columns the same as physical order in 
parquet file
+        for (int i = 0; i < schema_desc.size(); ++i) {
+            auto name = schema_desc.get_column(i)->name;
+            // If the column in parquet file is included in all_column_names 
and not in missing_column_names,
+            // add it to _map_column, which means the reader should read the 
data of this column.
+            // Here to check against missing_column_names is for the 'Add a 
column back to the table
+            // with the same column name' case. (drop column a then add column 
a).
+            // Shouldn't read this column data in this case.
+            if (required_columns.find(name) != required_columns.end() &&
+                dropped_columns.find(name) == dropped_columns.end()) {
+                required_columns.erase(name);
+                _read_columns.emplace_back(name);
+            }
+        }
+        for (const std::string& name : required_columns) {
+            _missing_cols.emplace_back(name);
+        }
+    } else {
+        std::unordered_map<std::string, ColumnValueRangeType> 
new_colname_to_value_range;
+        const auto& table_column_idxs = _scan_params.column_idxs;
+        std::map<int, int> table_col_id_to_idx;
+        for (int i = 0; i < table_column_idxs.size(); i++) {
+            table_col_id_to_idx.insert({table_column_idxs[i], i});
         }
-    }
-    for (const std::string& name : required_columns) {
-        _missing_cols.emplace_back(name);
-    }
 
-    _colname_to_value_range = colname_to_value_range;
+        for (auto [id, idx] : table_col_id_to_idx) {
+            if (id >= schema_desc.size()) {
+                _missing_cols.emplace_back(all_column_names[idx]);
+            } else {
+                auto& table_col = all_column_names[idx];
+                auto file_col = schema_desc.get_column(id)->name;
+                _read_columns.emplace_back(file_col);
+
+                if (table_col != file_col) {
+                    _table_col_to_file_col[table_col] = file_col;
+                    auto iter = _colname_to_value_range->find(table_col);
+                    if (iter != _colname_to_value_range->end()) {
+                        continue;
+                    }
+                    new_colname_to_value_range[file_col] = iter->second;
+                    _colname_to_value_range->erase(iter->first);
+                }
+            }
+        }
+        for (auto it : new_colname_to_value_range) {
+            _colname_to_value_range->emplace(it.first, std::move(it.second));
+        }
+    }
     // build column predicates for column lazy read
     _lazy_read_ctx.conjuncts = conjuncts;
     RETURN_IF_ERROR(_init_row_groups(filter_groups));
@@ -525,6 +559,16 @@ Status ParquetReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof)
         return Status::OK();
     }
 
+    if (!_hive_use_column_names) {
+        for (auto i = 0; i < block->get_names().size(); i++) {
+            auto& col = block->get_by_position(i);
+            if (_table_col_to_file_col.contains(col.name)) {
+                col.name = _table_col_to_file_col[col.name];
+            }
+        }
+        block->initialize_index_by_name();
+    }
+
     SCOPED_RAW_TIMER(&_statistics.column_read_time);
     Status batch_st =
             _current_group_reader->next_batch(block, _batch_size, read_rows, 
&_row_group_eof);
@@ -535,6 +579,13 @@ Status ParquetReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof)
         *eof = true;
         return Status::OK();
     }
+
+    if (!_hive_use_column_names) {
+        for (auto i = 0; i < block->columns(); i++) {
+            block->get_by_position(i).name = (*_column_names)[i];
+        }
+        block->initialize_index_by_name();
+    }
     if (!batch_st.ok()) {
         return Status::InternalError("Read parquet file {} failed, reason = 
{}", _scan_range.path,
                                      batch_st.to_string());
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_reader.h
index 52700aafb7f..3cc262e14e6 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.h
@@ -116,7 +116,7 @@ public:
             const std::unordered_map<std::string, int>* colname_to_slot_id,
             const VExprContextSPtrs* not_single_slot_filter_conjuncts,
             const std::unordered_map<int, VExprContextSPtrs>* 
slot_id_to_filter_conjuncts,
-            bool filter_groups = true);
+            bool filter_groups = true, const bool hive_use_column_names = 
true);
 
     Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
 
@@ -283,5 +283,6 @@ private:
     const std::unordered_map<std::string, int>* _colname_to_slot_id = nullptr;
     const VExprContextSPtrs* _not_single_slot_filter_conjuncts = nullptr;
     const std::unordered_map<int, VExprContextSPtrs>* 
_slot_id_to_filter_conjuncts = nullptr;
+    bool _hive_use_column_names = false;
 };
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 45ec7fe4dc7..1f7e2df0f34 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -862,12 +862,21 @@ Status VFileScanner::_get_next_reader() {
                 RETURN_IF_ERROR(paimon_reader->init_row_filters(range));
                 _cur_reader = std::move(paimon_reader);
             } else {
+                bool hive_parquet_use_column_names = true;
+
+                if (range.__isset.table_format_params &&
+                    range.table_format_params.table_format_type == "hive" && 
_state != nullptr)
+                        [[likely]] {
+                    hive_parquet_use_column_names =
+                            
_state->query_options().hive_parquet_use_column_names;
+                }
+
                 std::vector<std::string> place_holder;
                 init_status = parquet_reader->init_reader(
                         _file_col_names, place_holder, _colname_to_value_range,
                         _push_down_conjuncts, _real_tuple_desc, 
_default_val_row_desc.get(),
                         _col_name_to_slot_id, 
&_not_single_slot_filter_conjuncts,
-                        &_slot_id_to_filter_conjuncts);
+                        &_slot_id_to_filter_conjuncts, true, 
hive_parquet_use_column_names);
                 _cur_reader = std::move(parquet_reader);
             }
             need_to_get_parsed_schema = true;
@@ -923,10 +932,18 @@ Status VFileScanner::_get_next_reader() {
                 RETURN_IF_ERROR(paimon_reader->init_row_filters(range));
                 _cur_reader = std::move(paimon_reader);
             } else {
+                bool hive_orc_use_column_names = true;
+
+                if (range.__isset.table_format_params &&
+                    range.table_format_params.table_format_type == "hive" && 
_state != nullptr)
+                        [[likely]] {
+                    hive_orc_use_column_names = 
_state->query_options().hive_orc_use_column_names;
+                }
                 init_status = orc_reader->init_reader(
                         &_file_col_names, _colname_to_value_range, 
_push_down_conjuncts, false,
                         _real_tuple_desc, _default_val_row_desc.get(),
-                        &_not_single_slot_filter_conjuncts, 
&_slot_id_to_filter_conjuncts);
+                        &_not_single_slot_filter_conjuncts, 
&_slot_id_to_filter_conjuncts,
+                        hive_orc_use_column_names);
                 _cur_reader = std::move(orc_reader);
             }
             need_to_get_parsed_schema = true;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run64.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run64.hql
new file mode 100644
index 00000000000..744b83418db
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run64.hql
@@ -0,0 +1,29 @@
+use default;
+
+create table simulation_hive1_orc(
+  `a`  boolean,                                     
+  `b`  int,                                    
+  `c`  string 
+)stored as orc
+LOCATION '/user/doris/preinstalled_data/orc_table/simulation_hive1_orc';
+msck repair table simulation_hive1_orc;
+
+create table test_hive_rename_column_parquet(
+  `new_a`  boolean,                                     
+  `new_b`  int,                                    
+  `c`  string,                                     
+  `new_d`  int,                                         
+  `f`  string        
+)stored as parquet
+LOCATION 
'/user/doris/preinstalled_data/parquet_table/test_hive_rename_column_parquet';
+msck repair table test_hive_rename_column_parquet;
+
+create table test_hive_rename_column_orc(
+  `new_a`  boolean,                                     
+  `new_b`  int,                                    
+  `c`  string,                                     
+  `new_d`  int,                                         
+  `f`  string        
+)stored as orc
+LOCATION '/user/doris/preinstalled_data/orc_table/test_hive_rename_column_orc';
+msck repair table test_hive_rename_column_orc;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/simulation_hive1_orc/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/simulation_hive1_orc/000000_0
new file mode 100644
index 00000000000..848dc3250ee
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/simulation_hive1_orc/000000_0
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0
new file mode 100644
index 00000000000..398aed3001f
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_1
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_1
new file mode 100644
index 00000000000..e58535d6661
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_1
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_2
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_2
new file mode 100644
index 00000000000..84490d9f085
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_2
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_3
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_3
new file mode 100644
index 00000000000..2c54adff6f2
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/test_hive_rename_column_orc/000000_0_copy_3
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0
new file mode 100644
index 00000000000..deea62bcfb3
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_1
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_1
new file mode 100644
index 00000000000..45ae5dee1ab
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_1
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_2
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_2
new file mode 100644
index 00000000000..e37fc5d2eb7
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_2
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_3
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_3
new file mode 100644
index 00000000000..97bb0ab8475
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/test_hive_rename_column_parquet/000000_0_copy_3
 differ
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
index 517ba8be5f8..f572daf9446 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
@@ -421,6 +421,10 @@ public abstract class FileQueryScanNode extends 
FileScanNode {
             transactionalHiveDesc.setDeleteDeltas(deleteDeltaDescs);
             
tableFormatFileDesc.setTransactionalHiveParams(transactionalHiveDesc);
             rangeDesc.setTableFormatParams(tableFormatFileDesc);
+        } else if (fileSplit instanceof HiveSplit) {
+            TTableFormatFileDesc tableFormatFileDesc = new 
TTableFormatFileDesc();
+            
tableFormatFileDesc.setTableFormatType(TableFormatType.HIVE.value());
+            rangeDesc.setTableFormatParams(tableFormatFileDesc);
         }
 
         setScanParams(rangeDesc, fileSplit);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 067ec326251..b9c8a91bd47 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -556,6 +556,10 @@ public class SessionVariable implements Serializable, 
Writable {
 
     public static final String ENABLE_PUSHDOWN_MINMAX_ON_UNIQUE = 
"enable_pushdown_minmax_on_unique";
 
+    public static final String HIVE_PARQUET_USE_COLUMN_NAMES = 
"hive_parquet_use_column_names";
+
+    public static final String HIVE_ORC_USE_COLUMN_NAMES = 
"hive_orc_use_column_names";
+
     public static final String KEEP_CARRIAGE_RETURN = "keep_carriage_return";
 
     public static final String ENABLE_PUSHDOWN_STRING_MINMAX = 
"enable_pushdown_string_minmax";
@@ -1770,11 +1774,25 @@ public class SessionVariable implements Serializable, 
Writable {
     public int createTablePartitionMaxNum = 10000;
 
 
+    @VariableMgr.VarAttr(name = HIVE_PARQUET_USE_COLUMN_NAMES,
+            description = {"默认情况下按名称访问 Parquet 列。将此属性设置为“false”可按 Hive 
表定义中的序号位置访问列。",
+                    "Access Parquet columns by name by default. Set this 
property to `false` to access columns "
+                            + "by their ordinal position in the Hive table 
definition."})
+    public boolean hiveParquetUseColumnNames = true;
+
+
+    @VariableMgr.VarAttr(name = HIVE_ORC_USE_COLUMN_NAMES,
+            description = {"默认情况下按名称访问 Orc 列。将此属性设置为“false”可按 Hive 
表定义中的序号位置访问列。",
+                    "Access Parquet columns by name by default. Set this 
property to `false` to access columns "
+                            + "by their ordinal position in the Hive table 
definition."})
+    public boolean hiveOrcUseColumnNames = true;
+
     @VariableMgr.VarAttr(name = KEEP_CARRIAGE_RETURN,
             description = {"在同时处理\r和\r\n作为CSV的行分隔符时，是否保留\r",
                     "When processing both \\n and \\r\\n as CSV line 
separators, should \\r be retained?"})
     public boolean keepCarriageReturn = false;
 
+
     @VariableMgr.VarAttr(name = FORCE_JNI_SCANNER,
             description = {"强制使用jni方式读取外表", "Force the use of jni mode to read 
external table"})
     private boolean forceJniScanner = false;
@@ -3435,6 +3453,8 @@ public class SessionVariable implements Serializable, 
Writable {
 
         tResult.setReadCsvEmptyLineAsNull(readCsvEmptyLineAsNull);
         tResult.setSerdeDialect(getSerdeDialect());
+        tResult.setHiveOrcUseColumnNames(hiveOrcUseColumnNames);
+        tResult.setHiveParquetUseColumnNames(hiveParquetUseColumnNames);
         tResult.setKeepCarriageReturn(keepCarriageReturn);
         return tResult;
     }
diff --git a/gensrc/thrift/PaloInternalService.thrift 
b/gensrc/thrift/PaloInternalService.thrift
index e2e25619abe..41d113497d3 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -311,6 +311,12 @@ struct TQueryOptions {
   119: optional bool keep_carriage_return = false; // \n,\r\n split line in 
CSV.
 
   122: optional i32 runtime_bloom_filter_min_size = 1048576;
+  
+  //Access Parquet/ORC columns by name by default. Set this property to 
`false` to access columns
+  //by their ordinal position in the Hive table definition.  
+  123: optional bool hive_parquet_use_column_names = true;
+  124: optional bool hive_orc_use_column_names = true;
+
   // For cloud, to control if the content would be written into file cache
   1000: optional bool disable_file_cache = false
 }
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out
 
b/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out
new file mode 100644
index 00000000000..fa260b96221
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out
@@ -0,0 +1,435 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !hive1_orc_1_true --
+true   10      hello world
+false  20      keep
+
+-- !hive1_orc_2_true --
+10     hello world     true
+20     keep    false
+
+-- !hive1_orc_3_true --
+hello world    true
+keep   false
+
+-- !hive1_orc_4_true --
+2
+
+-- !hive1_orc_5_true --
+2
+
+-- !hive1_orc_6_true --
+10
+20
+
+-- !hive1_orc_7_true --
+10     1
+20     1
+
+-- !hive1_orc_8_true --
+true   10      hello world
+
+-- !hive1_orc_9_true --
+false  20      keep
+
+-- !hive1_orc_10_true --
+false  20      keep
+
+-- !hive1_orc_11_true --
+false  20      keep
+
+-- !hive1_orc_12_true --
+hello world
+keep
+
+-- !hive1_orc_1_false --
+true   10      hello world
+false  20      keep
+
+-- !hive1_orc_2_false --
+10     hello world     true
+20     keep    false
+
+-- !hive1_orc_3_false --
+hello world    true
+keep   false
+
+-- !hive1_orc_4_false --
+2
+
+-- !hive1_orc_5_false --
+2
+
+-- !hive1_orc_6_false --
+10
+20
+
+-- !hive1_orc_7_false --
+10     1
+20     1
+
+-- !hive1_orc_8_false --
+true   10      hello world
+
+-- !hive1_orc_9_false --
+false  20      keep
+
+-- !hive1_orc_10_false --
+false  20      keep
+
+-- !hive1_orc_11_false --
+false  20      keep
+
+-- !hive1_orc_12_false --
+hello world
+keep
+
+-- !rename_orc_1_true --
+\N     \N      hello world     \N      \N
+\N     \N      keep    \N      \N
+true   30      abcd    \N      \N
+false  40      new adcd        \N      \N
+true   50      xxx     \N      cols
+false  60      yyy     \N      yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_orc_2_true --
+\N
+\N
+30
+40
+50
+60
+70
+80
+
+-- !rename_orc_3_true --
+\N     2
+30     1
+40     1
+50     1
+60     1
+70     1
+80     1
+
+-- !rename_orc_4_true --
+true   30      abcd    \N      \N
+true   50      xxx     \N      cols
+true   70      hahaha  8888    abcd
+
+-- !rename_orc_5_true --
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_orc_6_true --
+\N     \N      hello world     \N      \N
+\N     \N      keep    \N      \N
+true   30      abcd    \N      \N
+false  40      new adcd        \N      \N
+true   50      xxx     \N      cols
+false  60      yyy     \N      yyyyyy
+
+-- !rename_orc_7_true --
+true   30      abcd    \N      \N
+
+-- !rename_orc_8_true --
+true
+true
+true
+
+-- !rename_orc_9_true --
+
+-- !rename_orc_10_true --
+
+-- !rename_orc_11_true --
+\N     \N
+\N     \N
+30     true
+40     false
+50     true
+60     false
+70     true
+80     false
+
+-- !rename_orc_12_true --
+\N     \N      hello world     \N      \N
+\N     \N      keep    \N      \N
+\N     \N      abcd    30      true
+\N     \N      new adcd        40      false
+cols   \N      xxx     50      true
+yyyyyy \N      yyy     60      false
+abcd   8888    hahaha  70      true
+efg    9999    cmake   80      false
+
+-- !rename_orc_13_true --
+false  40      new adcd        \N      \N
+true   50      xxx     \N      cols
+false  60      yyy     \N      yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_orc_1_false --
+true   10      hello world     \N      \N
+false  20      keep    \N      \N
+true   30      abcd    \N      \N
+false  40      new adcd        \N      \N
+true   50      xxx     60      cols
+false  60      yyy     100     yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_orc_2_false --
+10
+20
+30
+40
+50
+60
+70
+80
+
+-- !rename_orc_3_false --
+10     1
+20     1
+30     1
+40     1
+50     1
+60     1
+70     1
+80     1
+
+-- !rename_orc_4_false --
+true   10      hello world     \N      \N
+true   30      abcd    \N      \N
+true   50      xxx     60      cols
+true   70      hahaha  8888    abcd
+
+-- !rename_orc_5_false --
+true   50      xxx     60      cols
+false  60      yyy     100     yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_orc_6_false --
+true   10      hello world     \N      \N
+false  20      keep    \N      \N
+true   30      abcd    \N      \N
+false  40      new adcd        \N      \N
+
+-- !rename_orc_7_false --
+true   30      abcd    \N      \N
+
+-- !rename_orc_8_false --
+true
+true
+true
+true
+
+-- !rename_orc_9_false --
+
+-- !rename_orc_10_false --
+
+-- !rename_orc_11_false --
+10     true
+20     false
+30     true
+40     false
+50     true
+60     false
+70     true
+80     false
+
+-- !rename_orc_12_false --
+\N     \N      hello world     10      true
+\N     \N      keep    20      false
+\N     \N      abcd    30      true
+\N     \N      new adcd        40      false
+cols   60      xxx     50      true
+yyyyyy 100     yyy     60      false
+abcd   8888    hahaha  70      true
+efg    9999    cmake   80      false
+
+-- !rename_orc_13_false --
+true   10      hello world     \N      \N
+false  20      keep    \N      \N
+false  40      new adcd        \N      \N
+true   50      xxx     60      cols
+false  60      yyy     100     yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_parquet_1_true --
+\N     \N      hello world     \N      \N
+\N     \N      keep    \N      \N
+true   30      abcd    \N      \N
+false  40      new adcd        \N      \N
+true   50      xxx     \N      cols
+false  60      yyy     \N      yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_parquet_2_true --
+\N
+\N
+30
+40
+50
+60
+70
+80
+
+-- !rename_parquet_3_true --
+\N     2
+30     1
+40     1
+50     1
+60     1
+70     1
+80     1
+
+-- !rename_parquet_4_true --
+true   30      abcd    \N      \N
+true   50      xxx     \N      cols
+true   70      hahaha  8888    abcd
+
+-- !rename_parquet_5_true --
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_parquet_6_true --
+\N     \N      hello world     \N      \N
+\N     \N      keep    \N      \N
+true   30      abcd    \N      \N
+false  40      new adcd        \N      \N
+true   50      xxx     \N      cols
+false  60      yyy     \N      yyyyyy
+
+-- !rename_parquet_7_true --
+true   30      abcd    \N      \N
+
+-- !rename_parquet_8_true --
+true
+true
+true
+
+-- !rename_parquet_9_true --
+
+-- !rename_parquet_10_true --
+
+-- !rename_parquet_11_true --
+\N     \N
+\N     \N
+30     true
+40     false
+50     true
+60     false
+70     true
+80     false
+
+-- !rename_parquet_12_true --
+\N     \N      hello world     \N      \N
+\N     \N      keep    \N      \N
+\N     \N      abcd    30      true
+\N     \N      new adcd        40      false
+cols   \N      xxx     50      true
+yyyyyy \N      yyy     60      false
+abcd   8888    hahaha  70      true
+efg    9999    cmake   80      false
+
+-- !rename_parquet_13_true --
+false  40      new adcd        \N      \N
+true   50      xxx     \N      cols
+false  60      yyy     \N      yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_parquet_1_false --
+true   10      hello world     \N      \N
+false  20      keep    \N      \N
+true   30      abcd    \N      \N
+false  40      new adcd        \N      \N
+true   50      xxx     60      cols
+false  60      yyy     100     yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_parquet_2_false --
+10
+20
+30
+40
+50
+60
+70
+80
+
+-- !rename_parquet_3_false --
+10     1
+20     1
+30     1
+40     1
+50     1
+60     1
+70     1
+80     1
+
+-- !rename_parquet_4_false --
+true   10      hello world     \N      \N
+true   30      abcd    \N      \N
+true   50      xxx     60      cols
+true   70      hahaha  8888    abcd
+
+-- !rename_parquet_5_false --
+true   50      xxx     60      cols
+false  60      yyy     100     yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
+-- !rename_parquet_6_false --
+true   10      hello world     \N      \N
+false  20      keep    \N      \N
+true   30      abcd    \N      \N
+false  40      new adcd        \N      \N
+
+-- !rename_parquet_7_false --
+true   30      abcd    \N      \N
+
+-- !rename_parquet_8_false --
+true
+true
+true
+true
+
+-- !rename_parquet_9_false --
+
+-- !rename_parquet_10_false --
+
+-- !rename_parquet_11_false --
+10     true
+20     false
+30     true
+40     false
+50     true
+60     false
+70     true
+80     false
+
+-- !rename_parquet_12_false --
+\N     \N      hello world     10      true
+\N     \N      keep    20      false
+\N     \N      abcd    30      true
+\N     \N      new adcd        40      false
+cols   60      xxx     50      true
+yyyyyy 100     yyy     60      false
+abcd   8888    hahaha  70      true
+efg    9999    cmake   80      false
+
+-- !rename_parquet_13_false --
+true   10      hello world     \N      \N
+false  20      keep    \N      \N
+false  40      new adcd        \N      \N
+true   50      xxx     60      cols
+false  60      yyy     100     yyyyyy
+true   70      hahaha  8888    abcd
+false  80      cmake   9999    efg
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy
 
b/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy
new file mode 100644
index 00000000000..88d8a586e68
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy
@@ -0,0 +1,196 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_hive_rename_column_orc_parquet", 
"p0,external,hive,external_docker,external_docker_hive") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String hivePrefix  ="hive3";
+        setHivePrefix(hivePrefix)
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        String hmsPort = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
+    
+        String catalog_name = "test_hive_schema_change2"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+        create catalog if not exists ${catalog_name} properties (
+            'type'='hms',
+            'hadoop.username' = 'hadoop',
+            'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
+            'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hmsPort}'
+        );
+        """
+
+        sql """ switch ${catalog_name} """
+        sql """ use `default` """
+        
+
+        sql """ set hive_orc_use_column_names=true; """ 
+        qt_hive1_orc_1_true """ select  * from  simulation_hive1_orc order by 
b; """
+        qt_hive1_orc_2_true """ select  b,c,a from  simulation_hive1_orc order 
by b; """
+        qt_hive1_orc_3_true """ select  c,a from  simulation_hive1_orc order 
by b; """
+        qt_hive1_orc_4_true """ select  count(*) from  simulation_hive1_orc; 
"""
+        qt_hive1_orc_5_true """ select  count(a) from  simulation_hive1_orc; 
"""
+        qt_hive1_orc_6_true """ select  b from  simulation_hive1_orc order by 
b; """
+        qt_hive1_orc_7_true """ select  b,count(*) from  simulation_hive1_orc 
group by b order by b; """
+        qt_hive1_orc_8_true """ select  * from  simulation_hive1_orc where a 
+b  = 11 ; """
+        qt_hive1_orc_9_true """ select  * from  simulation_hive1_orc where a 
+b  != 11 ; """
+        qt_hive1_orc_10_true """ select  * from  simulation_hive1_orc where a 
+b  != 11  and c = "keep"; """
+        qt_hive1_orc_11_true """ select  * from  simulation_hive1_orc where a 
+b  != 11  and c != "keepxxx"; """
+        qt_hive1_orc_12_true """ select  c from  simulation_hive1_orc order by 
c; """
+
+
+        sql """ set hive_orc_use_column_names=false; """ 
+        qt_hive1_orc_1_false """ select  * from  simulation_hive1_orc order by 
b; """
+        qt_hive1_orc_2_false """ select  b,c,a from  simulation_hive1_orc 
order by b; """
+        qt_hive1_orc_3_false """ select  c,a from  simulation_hive1_orc order 
by b; """
+        qt_hive1_orc_4_false """ select  count(*) from  simulation_hive1_orc; 
"""
+        qt_hive1_orc_5_false """ select  count(a) from  simulation_hive1_orc; 
"""
+        qt_hive1_orc_6_false """ select  b from  simulation_hive1_orc order by 
b; """
+        qt_hive1_orc_7_false """ select  b,count(*) from  simulation_hive1_orc 
group by b order by b; """
+        qt_hive1_orc_8_false """ select  * from  simulation_hive1_orc where a 
+b  = 11 ; """
+        qt_hive1_orc_9_false """ select  * from  simulation_hive1_orc where a 
+b  != 11 ; """
+        qt_hive1_orc_10_false """ select  * from  simulation_hive1_orc where a 
+b  != 11  and c = "keep"; """
+        qt_hive1_orc_11_false """ select  * from  simulation_hive1_orc where a 
+b  != 11  and c != "keepxxx"; """
+        qt_hive1_orc_12_false """ select  c from  simulation_hive1_orc order 
by c; """
+
+
+        sql """ set hive_orc_use_column_names=true; """ 
+        qt_rename_orc_1_true """ select  * from test_hive_rename_column_orc 
order by new_b,c """;
+        qt_rename_orc_2_true """ select  new_b from 
test_hive_rename_column_orc order by new_b,c """;
+        qt_rename_orc_3_true """ select new_b,count(*) from 
test_hive_rename_column_orc   group by new_b order by new_b """;
+        qt_rename_orc_4_true """ select * from test_hive_rename_column_orc 
where new_a = 1 order by new_b,c  """;
+        qt_rename_orc_5_true """ select * from test_hive_rename_column_orc 
where new_d is not null order by new_b,c  """
+        qt_rename_orc_6_true """ select * from test_hive_rename_column_orc 
where new_d is null order by new_b,c; """
+        qt_rename_orc_7_true """ select * from test_hive_rename_column_orc 
where new_b + new_a = 31 order by new_b,c; """
+        qt_rename_orc_8_true """ select new_a  from 
test_hive_rename_column_orc where new_a = 1 order by new_b,c; """
+        qt_rename_orc_9_true """ select new_b  from 
test_hive_rename_column_orc where new_b = 1 order by new_b; """
+        qt_rename_orc_10_true """ select new_b,new_d  from 
test_hive_rename_column_orc where new_d +30*new_b=100  order by new_b,c; """
+        qt_rename_orc_11_true """ select new_b,new_a  from 
test_hive_rename_column_orc order by new_b,c,new_a;   """
+        qt_rename_orc_12_true """ select f,new_d,c,new_b,new_a from 
test_hive_rename_column_orc order by new_b,c; """
+        qt_rename_orc_13_true """ select * from test_hive_rename_column_orc 
where new_b + new_a != 31 order by new_b,c; """
+
+
+
+
+        sql """ set hive_orc_use_column_names=false; """ 
+        qt_rename_orc_1_false """ select  * from test_hive_rename_column_orc 
order by new_b,c """;
+        qt_rename_orc_2_false """ select  new_b from 
test_hive_rename_column_orc order by new_b,c """;
+        qt_rename_orc_3_false """ select new_b,count(*) from 
test_hive_rename_column_orc   group by new_b order by new_b """;
+        qt_rename_orc_4_false """ select * from test_hive_rename_column_orc 
where new_a = 1 order by new_b,c  """;
+        qt_rename_orc_5_false """ select * from test_hive_rename_column_orc 
where new_d is not null order by new_b  """
+        qt_rename_orc_6_false """ select * from test_hive_rename_column_orc 
where new_d is null order by new_b,c; """
+        qt_rename_orc_7_false """ select * from test_hive_rename_column_orc 
where new_b + new_a = 31 order by new_b,c; """
+        qt_rename_orc_8_false """ select new_a  from 
test_hive_rename_column_orc where new_a = 1 order by new_b,c; """
+        qt_rename_orc_9_false """ select new_b  from 
test_hive_rename_column_orc where new_b = 1 order by new_b; """
+        qt_rename_orc_10_false """ select new_b,new_d  from 
test_hive_rename_column_orc where new_d +30*new_b=100  order by new_b,c;  """
+        qt_rename_orc_11_false """ select new_b,new_a  from 
test_hive_rename_column_orc order by new_b,c,new_a;   """
+        qt_rename_orc_12_false """ select f,new_d,c,new_b,new_a from 
test_hive_rename_column_orc order by new_b,c; """
+        qt_rename_orc_13_false """ select * from test_hive_rename_column_orc 
where new_b + new_a != 31 order by new_b,c; """
+
+
+        sql """ set hive_parquet_use_column_names=true; """ 
+        qt_rename_parquet_1_true """ select  * from 
test_hive_rename_column_parquet order by new_b,c """;
+        qt_rename_parquet_2_true """ select  new_b from 
test_hive_rename_column_parquet order by new_b,c """;
+        qt_rename_parquet_3_true """ select new_b,count(*) from 
test_hive_rename_column_parquet   group by new_b order by new_b """;
+        qt_rename_parquet_4_true """ select * from 
test_hive_rename_column_parquet where new_a = 1 order by new_b,c  """;
+        qt_rename_parquet_5_true """ select * from 
test_hive_rename_column_parquet where new_d is not null order by new_b,c  """
+        qt_rename_parquet_6_true """ select * from 
test_hive_rename_column_parquet where new_d is null order by new_b,c; """
+        qt_rename_parquet_7_true """ select * from 
test_hive_rename_column_parquet where new_b + new_a = 31 order by new_b,c; """
+        qt_rename_parquet_8_true """ select new_a  from 
test_hive_rename_column_parquet where new_a = 1 order by new_b,c; """
+        qt_rename_parquet_9_true """ select new_b  from 
test_hive_rename_column_parquet where new_b = 1 order by new_b; """
+        qt_rename_parquet_10_true """ select new_b,new_d  from 
test_hive_rename_column_parquet where new_d +30*new_b=100 order by new_b,c; """
+        qt_rename_parquet_11_true """ select new_b,new_a  from 
test_hive_rename_column_parquet order by new_b,c,new_a;   """
+        qt_rename_parquet_12_true """ select f,new_d,c,new_b,new_a from 
test_hive_rename_column_parquet order by new_b,c; """
+        qt_rename_parquet_13_true """ select * from 
test_hive_rename_column_parquet where new_b + new_a != 31 order by new_b,c; """
+
+
+
+
+        sql """ set hive_parquet_use_column_names=false; """ 
+        qt_rename_parquet_1_false """ select  * from 
test_hive_rename_column_parquet order by new_b,c """;
+        qt_rename_parquet_2_false """ select  new_b from 
test_hive_rename_column_parquet order by new_b,c """;
+        qt_rename_parquet_3_false """ select new_b,count(*) from 
test_hive_rename_column_parquet   group by new_b order by new_b """;
+        qt_rename_parquet_4_false """ select * from 
test_hive_rename_column_parquet where new_a = 1 order by new_b,c  """;
+        qt_rename_parquet_5_false """ select * from 
test_hive_rename_column_parquet where new_d is not null order by new_b,c  """
+        qt_rename_parquet_6_false """ select * from 
test_hive_rename_column_parquet where new_d is null order by new_b,c; """
+        qt_rename_parquet_7_false """ select * from 
test_hive_rename_column_parquet where new_b + new_a = 31 order by new_b,c; """
+        qt_rename_parquet_8_false """ select new_a  from 
test_hive_rename_column_parquet where new_a = 1 order by new_b,c; """
+        qt_rename_parquet_9_false """ select new_b  from 
test_hive_rename_column_parquet where new_b = 1 order by new_b; """
+        qt_rename_parquet_10_false """ select new_b,new_d  from 
test_hive_rename_column_parquet where new_d +30*new_b=100  order by new_b,c; """
+        qt_rename_parquet_11_false """ select new_b,new_a  from 
test_hive_rename_column_parquet order by new_b,c,new_a;   """
+        qt_rename_parquet_12_false """ select f,new_d,c,new_b,new_a from 
test_hive_rename_column_parquet order by new_b,c; """
+        qt_rename_parquet_13_false """ select * from 
test_hive_rename_column_parquet where new_b + new_a != 31 order by new_b,c; """
+
+        
+
+
+
+    }
+}
+/*
+CREATE TABLE  simulation_hive1_orc(
+    `_col0` boolean,
+    `_col1` INT,
+    `_col2` STRING 
+)stored as orc;
+insert into simulation_hive1_orc values(true,10,"hello 
world"),(false,20,"keep");
+select  * from simulation_hive1_orc; 
+alter table simulation_hive1_orc change column `_col0` a boolean;
+alter table simulation_hive1_orc change column `_col1` b int;
+alter table simulation_hive1_orc change column `_col2` c string;
+select  * from simulation_hive1_orc;
+show create table simulation_hive1_orc;
+
+
+CREATE TABLE  test_hive_rename_column_orc(
+    a boolean,
+    b INT,
+    c STRING
+)stored as orc;
+insert into  test_hive_rename_column_orc values (true,10,"hello 
world"),(false,20,"keep");
+alter table test_hive_rename_column_orc change column  a  new_a boolean;
+alter table test_hive_rename_column_orc change column  b new_b int;
+insert into  test_hive_rename_column_orc values 
(true,30,"abcd"),(false,40,"new adcd");
+select  * from test_hive_rename_column_orc;
+alter table test_hive_rename_column_orc add columns(d  int,f string);
+insert into  test_hive_rename_column_orc values 
(true,50,"xxx",60,"cols"),(false,60,"yyy",100,"yyyyyy");
+alter table test_hive_rename_column_orc change column  d new_d int;
+insert into  test_hive_rename_column_orc values 
(true,70,"hahaha",8888,"abcd"),(false,80,"cmake",9999,"efg");
+select  * from test_hive_rename_column_orc;
+show create table test_hive_rename_column_orc;
+
+
+
+CREATE TABLE  test_hive_rename_column_parquet(
+    a boolean,
+    b INT,
+    c STRING
+)stored as parquet;
+insert into  test_hive_rename_column_parquet values (true,10,"hello 
world"),(false,20,"keep");
+alter table test_hive_rename_column_parquet change column  a  new_a boolean;
+alter table test_hive_rename_column_parquet change column  b new_b int;
+insert into  test_hive_rename_column_parquet values 
(true,30,"abcd"),(false,40,"new adcd");
+select  * from test_hive_rename_column_parquet;
+alter table test_hive_rename_column_parquet add columns(d  int,f string);
+insert into  test_hive_rename_column_parquet values 
(true,50,"xxx",60,"cols"),(false,60,"yyy",100,"yyyyyy");
+alter table test_hive_rename_column_parquet change column  d new_d int;
+insert into  test_hive_rename_column_parquet values 
(true,70,"hahaha",8888,"abcd"),(false,80,"cmake",9999,"efg");
+select  * from test_hive_rename_column_parquet;
+show create table test_hive_rename_column_parquet;
+*/
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-2.1 updated: [feature](hive)Support reading renamed Parquet Hive and Orc Hive tables. (#38432) (#38809)

Reply via email to