This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 1bfe0f03935 [feature](iceberg)support read iceberg complex
type,iceberg.orc format and position delete. (#33935) (#34256)
1bfe0f03935 is described below
commit 1bfe0f03935e751e66b95e691b86c64a6c344493
Author: daidai <[email protected]>
AuthorDate: Mon Apr 29 14:40:12 2024 +0800
[feature](iceberg)support read iceberg complex type,iceberg.orc format and
position delete. (#33935) (#34256)
master #33935
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 46 +-
be/src/vec/exec/format/orc/vorc_reader.h | 19 +
be/src/vec/exec/format/table/iceberg_reader.cpp | 508 +++++++++++----------
be/src/vec/exec/format/table/iceberg_reader.h | 136 ++++--
be/src/vec/exec/scan/vfile_scanner.cpp | 23 +-
.../doris/datasource/iceberg/IcebergUtils.java | 15 +-
.../iceberg/iceberg_complex_type.out | 165 +++++++
.../iceberg/iceberg_position_delete.out | 196 ++++++++
.../iceberg/iceberg_schema_change.out | 305 +++++++++++++
.../iceberg/iceberg_complex_type.groovy | 92 ++++
.../iceberg/iceberg_position_delete.groovy | 195 ++++++++
.../iceberg/iceberg_schema_change.groovy | 162 +++++++
12 files changed, 1597 insertions(+), 265 deletions(-)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 7619191a2e7..55e9b1af290 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -305,6 +305,19 @@ Status
OrcReader::get_parsed_schema(std::vector<std::string>* col_names,
return Status::OK();
}
+Status OrcReader::get_schema_col_name_attribute(std::vector<std::string>*
col_names,
+ std::vector<uint64_t>*
col_attributes,
+ std::string attribute) {
+ RETURN_IF_ERROR(_create_file_reader());
+ auto& root_type = _is_acid ? _remove_acid(_reader->getType()) :
_reader->getType();
+ for (int i = 0; i < root_type.getSubtypeCount(); ++i) {
+ col_names->emplace_back(get_field_name_lower_case(&root_type, i));
+ col_attributes->emplace_back(
+
std::stol(root_type.getSubtype(i)->getAttributeValue(attribute)));
+ }
+ return Status::OK();
+}
+
Status OrcReader::_init_read_columns() {
auto& root_type = _reader->getType();
std::vector<std::string> orc_cols;
@@ -722,7 +735,11 @@ Status OrcReader::set_fill_columns(
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
if (VSlotRef* slot_ref = typeid_cast<VSlotRef*>(expr)) {
- auto& expr_name = slot_ref->expr_name();
+ auto expr_name = slot_ref->expr_name();
+ auto iter = _table_col_to_file_col.find(expr_name);
+ if (iter != _table_col_to_file_col.end()) {
+ expr_name = iter->second;
+ }
predicate_columns.emplace(expr_name,
std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
@@ -1587,6 +1604,7 @@ Status OrcReader::get_next_block(Block* block, size_t*
read_rows, bool* eof) {
*read_rows = 0;
return Status::OK();
}
+ _execute_filter_position_delete_rowids(*_filter);
RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block,
columns_to_filter, *_filter));
if (!_not_single_slot_filter_conjuncts.empty()) {
@@ -1694,6 +1712,10 @@ Status OrcReader::get_next_block(Block* block, size_t*
read_rows, bool* eof) {
for (auto& conjunct : _non_dict_filter_conjuncts) {
filter_conjuncts.emplace_back(conjunct);
}
+ //include missing_columns != missing_columns ; missing_column is
null; missing_column != file_columns etc...
+ for (auto& [missing_col, conjunct] :
_lazy_read_ctx.predicate_missing_columns) {
+ filter_conjuncts.emplace_back(conjunct);
+ }
std::vector<IColumn::Filter*> filters;
if (_delete_rows_filter_ptr) {
filters.push_back(_delete_rows_filter_ptr.get());
@@ -1710,6 +1732,7 @@ Status OrcReader::get_next_block(Block* block, size_t*
read_rows, bool* eof) {
static_cast<void>(_convert_dict_cols_to_string_cols(block,
&batch_vec));
return Status::OK();
}
+ _execute_filter_position_delete_rowids(result_filter);
RETURN_IF_CATCH_EXCEPTION(
Block::filter_block_internal(block, columns_to_filter,
result_filter));
if (!_not_single_slot_filter_conjuncts.empty()) {
@@ -1841,6 +1864,10 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data,
uint16_t* sel, uint16_t s
for (auto& conjunct : _non_dict_filter_conjuncts) {
filter_conjuncts.emplace_back(conjunct);
}
+ //include missing_columns != missing_columns ; missing_column is null;
missing_column != file_columns etc...
+ for (auto& [missing_col, conjunct] :
_lazy_read_ctx.predicate_missing_columns) {
+ filter_conjuncts.emplace_back(conjunct);
+ }
std::vector<IColumn::Filter*> filters;
if (_delete_rows_filter_ptr) {
filters.push_back(_delete_rows_filter_ptr.get());
@@ -1919,6 +1946,9 @@ bool OrcReader::_can_filter_by_dict(int slot_id) {
break;
}
}
+ if (slot == nullptr) {
+ return false;
+ }
if (!slot->type().is_string_type()) {
return false;
}
@@ -2369,5 +2399,19 @@ void ORCFileInputStream::_collect_profile_before_close()
{
_file_reader->collect_profile_before_close();
}
}
+void OrcReader::_execute_filter_position_delete_rowids(IColumn::Filter&
filter) {
+ if (_position_delete_ordered_rowids == nullptr) {
+ return;
+ }
+ auto start = _row_reader->getRowNumber();
+ auto nums = _batch->numElements;
+ auto l = std::lower_bound(_position_delete_ordered_rowids->begin(),
+ _position_delete_ordered_rowids->end(), start);
+ auto r = std::upper_bound(_position_delete_ordered_rowids->begin(),
+ _position_delete_ordered_rowids->end(), start +
nums - 1);
+ for (; l < r; l++) {
+ filter[*l - start] = 0;
+ }
+}
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h
b/be/src/vec/exec/format/orc/vorc_reader.h
index 0b07f147c4f..f5bb7004ca2 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -177,6 +177,19 @@ public:
Status get_parsed_schema(std::vector<std::string>* col_names,
std::vector<TypeDescriptor>* col_types) override;
+ Status get_schema_col_name_attribute(std::vector<std::string>* col_names,
+ std::vector<uint64_t>* col_attributes,
+ std::string attribute);
+ void set_table_col_to_file_col(
+ std::unordered_map<std::string, std::string>
table_col_to_file_col) {
+ _table_col_to_file_col = table_col_to_file_col;
+ }
+
+ void set_position_delete_rowids(vector<int64_t>* delete_rows) {
+ _position_delete_ordered_rowids = delete_rows;
+ }
+ void _execute_filter_position_delete_rowids(IColumn::Filter& filter);
+
void set_delete_rows(const TransactionalHiveReader::AcidRowIDSet*
delete_rows) {
_delete_rows = delete_rows;
}
@@ -246,6 +259,8 @@ private:
OrcReader* _orc_reader = nullptr;
};
+ //class RowFilter : public orc::RowReader
+
// Create inner orc file,
// return EOF if file is empty
// return EROOR if encounter error.
@@ -586,6 +601,10 @@ private:
// resolve schema change
std::unordered_map<std::string,
std::unique_ptr<converter::ColumnTypeConverter>> _converters;
+ //for iceberg table , when table column name != file column name
+ std::unordered_map<std::string, std::string> _table_col_to_file_col;
+ //support iceberg position delete .
+ std::vector<int64_t>* _position_delete_ordered_rowids = nullptr;
};
class ORCFileInputStream : public orc::InputStream, public ProfileCollector {
diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp
b/be/src/vec/exec/format/table/iceberg_reader.cpp
index 8c05b8c2a08..7e5a5bf6db7 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.cpp
+++ b/be/src/vec/exec/format/table/iceberg_reader.cpp
@@ -49,12 +49,10 @@
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
-#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_factory.hpp"
#include "vec/exec/format/format_common.h"
#include "vec/exec/format/generic_reader.h"
-#include "vec/exec/format/parquet/parquet_common.h"
-#include "vec/exec/format/parquet/vparquet_reader.h"
+#include "vec/exec/format/orc/vorc_reader.h"
#include "vec/exec/format/table/table_format_reader.h"
namespace cctz {
@@ -74,17 +72,6 @@ class VExprContext;
} // namespace doris
namespace doris::vectorized {
-
-using DeleteRows = std::vector<int64_t>;
-using DeleteFile = phmap::parallel_flat_hash_map<
- std::string, std::unique_ptr<DeleteRows>, std::hash<std::string>,
- std::equal_to<std::string>,
- std::allocator<std::pair<const std::string,
std::unique_ptr<DeleteRows>>>, 8, std::mutex>;
-
-const int64_t MIN_SUPPORT_DELETE_FILES_VERSION = 2;
-const std::string ICEBERG_ROW_POS = "pos";
-const std::string ICEBERG_FILE_PATH = "file_path";
-
IcebergTableReader::IcebergTableReader(std::unique_ptr<GenericReader>
file_format_reader,
RuntimeProfile* profile, RuntimeState*
state,
const TFileScanRangeParams& params,
@@ -110,33 +97,6 @@
IcebergTableReader::IcebergTableReader(std::unique_ptr<GenericReader> file_forma
ADD_CHILD_TIMER(_profile, "DeleteRowsSortTime", iceberg_profile);
}
-Status IcebergTableReader::init_reader(
- const std::vector<std::string>& file_col_names,
- const std::unordered_map<int, std::string>& col_id_name_map,
- std::unordered_map<std::string, ColumnValueRangeType>*
colname_to_value_range,
- const VExprContextSPtrs& conjuncts, const TupleDescriptor*
tuple_descriptor,
- const RowDescriptor* row_descriptor,
- const std::unordered_map<std::string, int>* colname_to_slot_id,
- const VExprContextSPtrs* not_single_slot_filter_conjuncts,
- const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts) {
- ParquetReader* parquet_reader =
static_cast<ParquetReader*>(_file_format_reader.get());
- _col_id_name_map = col_id_name_map;
- _file_col_names = file_col_names;
- _colname_to_value_range = colname_to_value_range;
- auto parquet_meta_kv = parquet_reader->get_metadata_key_values();
- static_cast<void>(_gen_col_name_maps(parquet_meta_kv));
- _gen_file_col_names();
- _gen_new_colname_to_value_range();
- parquet_reader->set_table_to_file_col_map(_table_col_to_file_col);
- parquet_reader->iceberg_sanitize(_all_required_col_names);
- Status status = parquet_reader->init_reader(
- _all_required_col_names, _not_in_file_col_names,
&_new_colname_to_value_range,
- conjuncts, tuple_descriptor, row_descriptor, colname_to_slot_id,
- not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
-
- return status;
-}
-
Status IcebergTableReader::get_next_block(Block* block, size_t* read_rows,
bool* eof) {
// already get rows from be
if (_push_down_agg_type == TPushAggOp::type::COUNT &&
_remaining_push_down_count > 0) {
@@ -220,18 +180,17 @@ Status IcebergTableReader::init_row_filters(const
TFileRangeDesc& range) {
}
if (delete_file_type == POSITION_DELETE) {
- RETURN_IF_ERROR(_position_delete(files));
+ RETURN_IF_ERROR(_position_delete_base(files));
+ } else if (delete_file_type == EQUALITY_DELETE) {
+ // todo: equality delete
+ // If it is a count operation and it has equality delete file
kind,
+ // the push down operation of the count for this split needs to
be canceled.
+ return Status::NotSupported("NOT SUPPORT EQUALITY_DELETE!");
}
-
- // todo: equality delete
- // If it is a count operation and it has equality delete file kind,
- // the push down operation of the count for this split needs to be
canceled.
-
COUNTER_UPDATE(_iceberg_profile.num_delete_files, files.size());
return Status::OK();
}
-
-Status IcebergTableReader::_position_delete(
+Status IcebergTableReader::_position_delete_base(
const std::vector<TIcebergDeleteFileDesc>& delete_files) {
std::string data_file_path = _range.path;
// the path in _range is remove the namenode prefix,
@@ -243,133 +202,31 @@ Status IcebergTableReader::_position_delete(
}
}
- // position delete
- ParquetReader* parquet_reader =
(ParquetReader*)(_file_format_reader.get());
- RowRange whole_range = parquet_reader->get_whole_range();
- bool init_schema = false;
- std::vector<std::string> delete_file_col_names;
- std::vector<TypeDescriptor> delete_file_col_types;
std::vector<DeleteRows*> delete_rows_array;
int64_t num_delete_rows = 0;
std::vector<DeleteFile*> erase_data;
for (auto& delete_file : delete_files) {
- if (whole_range.last_row <= delete_file.position_lower_bound ||
- whole_range.first_row > delete_file.position_upper_bound) {
- continue;
- }
-
SCOPED_TIMER(_iceberg_profile.delete_files_read_time);
Status create_status = Status::OK();
- DeleteFile* delete_file_cache = _kv_cache->get<
- DeleteFile>(_delet_file_cache_key(delete_file.path), [&]() ->
DeleteFile* {
- TFileRangeDesc delete_range;
- // must use __set() method to make sure __isset is true
- delete_range.__set_fs_name(_range.fs_name);
- delete_range.path = delete_file.path;
- delete_range.start_offset = 0;
- delete_range.size = -1;
- delete_range.file_size = -1;
- ParquetReader delete_reader(_profile, _params, delete_range,
102400,
-
const_cast<cctz::time_zone*>(&_state->timezone_obj()),
- _io_ctx, _state);
- if (!init_schema) {
-
static_cast<void>(delete_reader.get_parsed_schema(&delete_file_col_names,
-
&delete_file_col_types));
- init_schema = true;
- }
- create_status = delete_reader.open();
- if (!create_status.ok()) {
- return nullptr;
- }
- create_status = delete_reader.init_reader(delete_file_col_names,
_not_in_file_col_names,
- nullptr, {}, nullptr,
nullptr, nullptr,
- nullptr, nullptr, false);
- if (!create_status.ok()) {
- return nullptr;
- }
-
- std::unordered_map<std::string, std::tuple<std::string, const
SlotDescriptor*>>
- partition_columns;
- std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-
static_cast<void>(delete_reader.set_fill_columns(partition_columns,
missing_columns));
-
- bool dictionary_coded = true;
- const tparquet::FileMetaData* meta_data =
delete_reader.get_meta_data();
- for (int i = 0; i < delete_file_col_names.size(); ++i) {
- if (delete_file_col_names[i] == ICEBERG_FILE_PATH) {
- for (int j = 0; j < meta_data->row_groups.size(); ++j) {
- auto& column_chunk =
meta_data->row_groups[j].columns[i];
- if (!(column_chunk.__isset.meta_data &&
-
column_chunk.meta_data.__isset.dictionary_page_offset)) {
- dictionary_coded = false;
- break;
- }
+ auto* delete_file_cache = _kv_cache->get<DeleteFile>(
+ _delet_file_cache_key(delete_file.path), [&]() -> DeleteFile* {
+ auto* position_delete = new DeleteFile;
+ TFileRangeDesc delete_file_range;
+ // must use __set() method to make sure __isset is true
+ delete_file_range.__set_fs_name(_range.fs_name);
+ delete_file_range.path = delete_file.path;
+ delete_file_range.start_offset = 0;
+ delete_file_range.size = -1;
+ delete_file_range.file_size = -1;
+ //read position delete file base on delete_file_range ,
generate DeleteFile , add DeleteFile to kv_cache
+ create_status =
_read_position_delete_file(&delete_file_range, position_delete);
+
+ if (!create_status) {
+ return nullptr;
}
- break;
- }
- }
- DeleteFile* position_delete = new DeleteFile;
- bool eof = false;
- while (!eof) {
- Block block = Block();
- for (int i = 0; i < delete_file_col_names.size(); ++i) {
- DataTypePtr data_type =
DataTypeFactory::instance().create_data_type(
- delete_file_col_types[i], false);
- if (delete_file_col_names[i] == ICEBERG_FILE_PATH &&
dictionary_coded) {
- // the dictionary data in ColumnDictI32 is referenced
by StringValue, it does keep
- // the dictionary data in its life circle, so the
upper caller should keep the
- // dictionary data alive after ColumnDictI32.
- MutableColumnPtr dict_column = ColumnDictI32::create();
-
block.insert(ColumnWithTypeAndName(std::move(dict_column), data_type,
-
delete_file_col_names[i]));
- } else {
- MutableColumnPtr data_column =
data_type->create_column();
-
block.insert(ColumnWithTypeAndName(std::move(data_column), data_type,
-
delete_file_col_names[i]));
- }
- }
- eof = false;
- size_t read_rows = 0;
- create_status = delete_reader.get_next_block(&block,
&read_rows, &eof);
- if (!create_status.ok()) {
- return nullptr;
- }
- if (read_rows > 0) {
- ColumnPtr path_column =
block.get_by_name(ICEBERG_FILE_PATH).column;
- DCHECK_EQ(path_column->size(), read_rows);
- ColumnPtr pos_column =
block.get_by_name(ICEBERG_ROW_POS).column;
- using ColumnType = typename
PrimitiveTypeTraits<TYPE_BIGINT>::ColumnType;
- const int64_t* src_data =
- assert_cast<const
ColumnType&>(*pos_column).get_data().data();
- IcebergTableReader::PositionDeleteRange range;
- if (dictionary_coded) {
- range = _get_range(assert_cast<const
ColumnDictI32&>(*path_column));
- } else {
- range = _get_range(assert_cast<const
ColumnString&>(*path_column));
- }
- for (int i = 0; i < range.range.size(); ++i) {
- std::string key = range.data_file_path[i];
- auto iter = position_delete->find(key);
- DeleteRows* delete_rows;
- if (iter == position_delete->end()) {
- delete_rows = new DeleteRows;
- std::unique_ptr<DeleteRows>
delete_rows_ptr(delete_rows);
- (*position_delete)[key] =
std::move(delete_rows_ptr);
- } else {
- delete_rows = iter->second.get();
- }
- const int64_t* cpy_start = src_data +
range.range[i].first;
- const int64_t cpy_count = range.range[i].second -
range.range[i].first;
- int64_t origin_size = delete_rows->size();
- delete_rows->resize(origin_size + cpy_count);
- int64_t* dest_position = &(*delete_rows)[origin_size];
- memcpy(dest_position, cpy_start, cpy_count *
sizeof(int64_t));
- }
- }
- }
- return position_delete;
- });
+ return position_delete;
+ });
if (create_status.is<ErrorCode::END_OF_FILE>()) {
continue;
} else if (!create_status.ok()) {
@@ -382,10 +239,7 @@ Status IcebergTableReader::_position_delete(
if (row_ids->size() > 0) {
delete_rows_array.emplace_back(row_ids);
num_delete_rows += row_ids->size();
- if (row_ids->front() >= whole_range.first_row &&
- row_ids->back() < whole_range.last_row) {
- erase_data.emplace_back(delete_file_cache);
- }
+ erase_data.emplace_back(delete_file_cache);
}
};
delete_file_map.if_contains(data_file_path, get_value);
@@ -393,7 +247,7 @@ Status IcebergTableReader::_position_delete(
if (num_delete_rows > 0) {
SCOPED_TIMER(_iceberg_profile.delete_rows_sort_time);
_sort_delete_rows(delete_rows_array, num_delete_rows);
- parquet_reader->set_delete_rows(&_delete_rows);
+ this->set_delete_rows();
COUNTER_UPDATE(_iceberg_profile.num_delete_rows, num_delete_rows);
}
// the deleted rows are copy out, we can erase them.
@@ -428,9 +282,9 @@ IcebergTableReader::PositionDeleteRange
IcebergTableReader::_get_range(
int index = 0;
while (index < read_rows) {
StringRef data_path = file_path_column.get_data_at(index);
- int left = index;
- int right = read_rows - 1;
- while (left < right) {
+ int left = index - 1;
+ int right = read_rows;
+ while (left + 1 != right) {
int mid = left + (right - left) / 2;
if (file_path_column.get_data_at(mid) > data_path) {
right = mid;
@@ -451,23 +305,23 @@ void
IcebergTableReader::_sort_delete_rows(std::vector<std::vector<int64_t>*>& d
return;
}
if (delete_rows_array.size() == 1) {
- _delete_rows.resize(num_delete_rows);
- memcpy(&_delete_rows[0], &((*delete_rows_array.front())[0]),
+ _iceberg_delete_rows.resize(num_delete_rows);
+ memcpy(&_iceberg_delete_rows[0], &((*delete_rows_array.front())[0]),
sizeof(int64_t) * num_delete_rows);
return;
}
if (delete_rows_array.size() == 2) {
- _delete_rows.resize(num_delete_rows);
+ _iceberg_delete_rows.resize(num_delete_rows);
std::merge(delete_rows_array.front()->begin(),
delete_rows_array.front()->end(),
delete_rows_array.back()->begin(),
delete_rows_array.back()->end(),
- _delete_rows.begin());
+ _iceberg_delete_rows.begin());
return;
}
using vec_pair = std::pair<std::vector<int64_t>::iterator,
std::vector<int64_t>::iterator>;
- _delete_rows.resize(num_delete_rows);
- auto row_id_iter = _delete_rows.begin();
- auto iter_end = _delete_rows.end();
+ _iceberg_delete_rows.resize(num_delete_rows);
+ auto row_id_iter = _iceberg_delete_rows.begin();
+ auto iter_end = _iceberg_delete_rows.end();
std::vector<vec_pair> rows_array;
for (auto rows : delete_rows_array) {
if (rows->size() > 0) {
@@ -493,6 +347,222 @@ void
IcebergTableReader::_sort_delete_rows(std::vector<std::vector<int64_t>*>& d
}
}
+/*
+ * Generate _all_required_col_names and _not_in_file_col_names.
+ *
+ * _all_required_col_names is all the columns required by user sql.
+ * If the column name has been modified after the data file was written,
+ * put the old name in data file to _all_required_col_names.
+ *
+ * _not_in_file_col_names is all the columns required by user sql but not in
the data file.
+ * e.g. New columns added after this data file was written.
+ * The columns added with names used by old dropped columns should consider as
a missing column,
+ * which should be in _not_in_file_col_names.
+ */
+void IcebergTableReader::_gen_file_col_names() {
+ _all_required_col_names.clear();
+ _not_in_file_col_names.clear();
+ for (int i = 0; i < _file_col_names.size(); ++i) {
+ auto name = _file_col_names[i];
+ auto iter = _table_col_to_file_col.find(name);
+ if (iter == _table_col_to_file_col.end()) {
+ // If the user creates the iceberg table, directly append the
parquet file that already exists,
+ // there is no 'iceberg.schema' field in the footer of parquet,
the '_table_col_to_file_col' may be empty.
+ // Because we are ignoring case, so, it is converted to lowercase
here
+ auto name_low = to_lower(name);
+ _all_required_col_names.emplace_back(name_low);
+ if (_has_iceberg_schema) {
+ _not_in_file_col_names.emplace_back(name);
+ } else {
+ _table_col_to_file_col.emplace(name, name_low);
+ _file_col_to_table_col.emplace(name_low, name);
+ if (name != name_low) {
+ _has_schema_change = true;
+ }
+ }
+ } else {
+ _all_required_col_names.emplace_back(iter->second);
+ }
+ }
+}
+
+/*
+ * Generate _new_colname_to_value_range, by replacing the column name in
+ * _colname_to_value_range with column name in data file.
+ */
+void IcebergTableReader::_gen_new_colname_to_value_range() {
+ for (auto it = _colname_to_value_range->begin(); it !=
_colname_to_value_range->end(); it++) {
+ auto iter = _table_col_to_file_col.find(it->first);
+ if (iter == _table_col_to_file_col.end()) {
+ _new_colname_to_value_range.emplace(it->first, it->second);
+ } else {
+ _new_colname_to_value_range.emplace(iter->second, it->second);
+ }
+ }
+}
+
+void IcebergTableReader::_gen_position_delete_file_range(Block& block,
DeleteFile* position_delete,
+ size_t read_rows,
+ bool
file_path_column_dictionary_coded) {
+ ColumnPtr path_column = block.get_by_name(ICEBERG_FILE_PATH).column;
+ DCHECK_EQ(path_column->size(), read_rows);
+ ColumnPtr pos_column = block.get_by_name(ICEBERG_ROW_POS).column;
+ using ColumnType = typename PrimitiveTypeTraits<TYPE_BIGINT>::ColumnType;
+ const int64_t* src_data = assert_cast<const
ColumnType&>(*pos_column).get_data().data();
+ IcebergTableReader::PositionDeleteRange range;
+ if (file_path_column_dictionary_coded) {
+ range = _get_range(assert_cast<const ColumnDictI32&>(*path_column));
+ } else {
+ range = _get_range(assert_cast<const ColumnString&>(*path_column));
+ }
+ for (int i = 0; i < range.range.size(); ++i) {
+ std::string key = range.data_file_path[i];
+ auto iter = position_delete->find(key);
+ DeleteRows* delete_rows;
+ if (iter == position_delete->end()) {
+ delete_rows = new DeleteRows;
+ std::unique_ptr<DeleteRows> delete_rows_ptr(delete_rows);
+ (*position_delete)[key] = std::move(delete_rows_ptr);
+ } else {
+ delete_rows = iter->second.get();
+ }
+ const int64_t* cpy_start = src_data + range.range[i].first;
+ const int64_t cpy_count = range.range[i].second - range.range[i].first;
+ int64_t origin_size = delete_rows->size();
+ delete_rows->resize(origin_size + cpy_count);
+ int64_t* dest_position = &(*delete_rows)[origin_size];
+ memcpy(dest_position, cpy_start, cpy_count * sizeof(int64_t));
+ }
+}
+
+Status IcebergParquetReader::init_reader(
+ const std::vector<std::string>& file_col_names,
+ const std::unordered_map<int, std::string>& col_id_name_map,
+ std::unordered_map<std::string, ColumnValueRangeType>*
colname_to_value_range,
+ const VExprContextSPtrs& conjuncts, const TupleDescriptor*
tuple_descriptor,
+ const RowDescriptor* row_descriptor,
+ const std::unordered_map<std::string, int>* colname_to_slot_id,
+ const VExprContextSPtrs* not_single_slot_filter_conjuncts,
+ const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts) {
+ _file_format = Fileformat::PARQUET;
+ ParquetReader* parquet_reader =
static_cast<ParquetReader*>(_file_format_reader.get());
+ _col_id_name_map = col_id_name_map;
+ _file_col_names = file_col_names;
+ _colname_to_value_range = colname_to_value_range;
+ auto parquet_meta_kv = parquet_reader->get_metadata_key_values();
+ static_cast<void>(_gen_col_name_maps(parquet_meta_kv));
+ _gen_file_col_names();
+ _gen_new_colname_to_value_range();
+ parquet_reader->set_table_to_file_col_map(_table_col_to_file_col);
+ parquet_reader->iceberg_sanitize(_all_required_col_names);
+ Status status = parquet_reader->init_reader(
+ _all_required_col_names, _not_in_file_col_names,
&_new_colname_to_value_range,
+ conjuncts, tuple_descriptor, row_descriptor, colname_to_slot_id,
+ not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
+
+ return status;
+}
+
+Status IcebergParquetReader ::_read_position_delete_file(const TFileRangeDesc*
delete_range,
+ DeleteFile*
position_delete) {
+ ParquetReader parquet_delete_reader(
+ _profile, _params, *delete_range, READ_DELETE_FILE_BATCH_SIZE,
+ const_cast<cctz::time_zone*>(&_state->timezone_obj()), _io_ctx,
_state);
+
+ RETURN_IF_ERROR(parquet_delete_reader.open());
+ RETURN_IF_ERROR(parquet_delete_reader.init_reader(delete_file_col_names,
{}, nullptr, {},
+ nullptr, nullptr,
nullptr, nullptr, nullptr,
+ false));
+
+ std::unordered_map<std::string, std::tuple<std::string, const
SlotDescriptor*>>
+ partition_columns;
+ std::unordered_map<std::string, VExprContextSPtr> missing_columns;
+
static_cast<void>(parquet_delete_reader.set_fill_columns(partition_columns,
missing_columns));
+
+ const tparquet::FileMetaData* meta_data =
parquet_delete_reader.get_meta_data();
+ bool dictionary_coded = true;
+ for (int j = 0; j < meta_data->row_groups.size(); ++j) {
+ auto& column_chunk =
meta_data->row_groups[j].columns[ICEBERG_FILE_PATH_INDEX];
+ if (!(column_chunk.__isset.meta_data &&
+ column_chunk.meta_data.__isset.dictionary_page_offset)) {
+ dictionary_coded = false;
+ break;
+ }
+ }
+ DataTypePtr data_type_file_path {new DataTypeString};
+ DataTypePtr data_type_pos {new DataTypeInt64};
+ bool eof = false;
+ while (!eof) {
+ Block block = {dictionary_coded
+ ? ColumnWithTypeAndName
{ColumnDictI32::create(),
+ data_type_file_path,
ICEBERG_FILE_PATH}
+ : ColumnWithTypeAndName {data_type_file_path,
ICEBERG_FILE_PATH},
+
+ {data_type_pos, ICEBERG_ROW_POS}};
+ size_t read_rows = 0;
+ RETURN_IF_ERROR(parquet_delete_reader.get_next_block(&block,
&read_rows, &eof));
+
+ if (read_rows <= 0) {
+ break;
+ }
+ _gen_position_delete_file_range(block, position_delete, read_rows,
dictionary_coded);
+ }
+ return Status::OK();
+};
+
+Status IcebergOrcReader::init_reader(
+ const std::vector<std::string>& file_col_names,
+ const std::unordered_map<int, std::string>& col_id_name_map,
+ std::unordered_map<std::string, ColumnValueRangeType>*
colname_to_value_range,
+ const VExprContextSPtrs& conjuncts, const TupleDescriptor*
tuple_descriptor,
+ const RowDescriptor* row_descriptor,
+ const std::unordered_map<std::string, int>* colname_to_slot_id,
+ const VExprContextSPtrs* not_single_slot_filter_conjuncts,
+ const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts) {
+ _file_format = Fileformat::ORC;
+ auto* orc_reader = static_cast<OrcReader*>(_file_format_reader.get());
+ _col_id_name_map = col_id_name_map;
+ _file_col_names = file_col_names;
+ _colname_to_value_range = colname_to_value_range;
+
+ RETURN_IF_ERROR(_gen_col_name_maps(orc_reader));
+ _gen_file_col_names();
+ _gen_new_colname_to_value_range();
+ orc_reader->set_table_col_to_file_col(_table_col_to_file_col);
+ Status status =
+ orc_reader->init_reader(&_all_required_col_names,
&_new_colname_to_value_range,
+ conjuncts, false, tuple_descriptor,
row_descriptor,
+ not_single_slot_filter_conjuncts,
slot_id_to_filter_conjuncts);
+ return status;
+}
+
+Status IcebergOrcReader::_read_position_delete_file(const TFileRangeDesc*
delete_range,
+ DeleteFile*
position_delete) {
+ OrcReader orc_delete_reader(_profile, _state, _params, *delete_range,
+ READ_DELETE_FILE_BATCH_SIZE,
_state->timezone(), _io_ctx);
+ std::unordered_map<std::string, ColumnValueRangeType>
colname_to_value_range;
+ Status init_status = orc_delete_reader.init_reader(
+ &delete_file_col_names, &colname_to_value_range, {}, false, {},
{}, nullptr, nullptr);
+
+ std::unordered_map<std::string, std::tuple<std::string, const
SlotDescriptor*>>
+ partition_columns;
+ std::unordered_map<std::string, VExprContextSPtr> missing_columns;
+ static_cast<void>(orc_delete_reader.set_fill_columns(partition_columns,
missing_columns));
+
+ bool eof = false;
+ DataTypePtr data_type_file_path {new DataTypeString};
+ DataTypePtr data_type_pos {new DataTypeInt64};
+ while (!eof) {
+ Block block = {{data_type_file_path, ICEBERG_FILE_PATH},
{data_type_pos, ICEBERG_ROW_POS}};
+
+ size_t read_rows = 0;
+ RETURN_IF_ERROR(orc_delete_reader.get_next_block(&block, &read_rows,
&eof));
+
+ _gen_position_delete_file_range(block, position_delete, read_rows,
false);
+ }
+ return Status::OK();
+}
+
/*
* To support schema evolution, Iceberg write the column id to column name map
to
* parquet file key_value_metadata.
@@ -507,7 +577,7 @@ void
IcebergTableReader::_sort_delete_rows(std::vector<std::vector<int64_t>*>& d
* 1. col1_new -> col1
* 2. col1 -> col1_new
*/
-Status IcebergTableReader::_gen_col_name_maps(std::vector<tparquet::KeyValue>
parquet_meta_kv) {
+Status
IcebergParquetReader::_gen_col_name_maps(std::vector<tparquet::KeyValue>
parquet_meta_kv) {
for (int i = 0; i < parquet_meta_kv.size(); ++i) {
tparquet::KeyValue kv = parquet_meta_kv[i];
if (kv.key == "iceberg.schema") {
@@ -545,58 +615,30 @@ Status
IcebergTableReader::_gen_col_name_maps(std::vector<tparquet::KeyValue> pa
return Status::OK();
}
-/*
- * Generate _all_required_col_names and _not_in_file_col_names.
- *
- * _all_required_col_names is all the columns required by user sql.
- * If the column name has been modified after the data file was written,
- * put the old name in data file to _all_required_col_names.
- *
- * _not_in_file_col_names is all the columns required by user sql but not in
the data file.
- * e.g. New columns added after this data file was written.
- * The columns added with names used by old dropped columns should consider as
a missing column,
- * which should be in _not_in_file_col_names.
- */
-void IcebergTableReader::_gen_file_col_names() {
- _all_required_col_names.clear();
- _not_in_file_col_names.clear();
- for (int i = 0; i < _file_col_names.size(); ++i) {
- auto name = _file_col_names[i];
- auto iter = _table_col_to_file_col.find(name);
- if (iter == _table_col_to_file_col.end()) {
- // If the user creates the iceberg table, directly append the
parquet file that already exists,
- // there is no 'iceberg.schema' field in the footer of parquet,
the '_table_col_to_file_col' may be empty.
- // Because we are ignoring case, so, it is converted to lowercase
here
- auto name_low = to_lower(name);
- _all_required_col_names.emplace_back(name_low);
- if (_has_iceberg_schema) {
- _not_in_file_col_names.emplace_back(name);
- } else {
- _table_col_to_file_col.emplace(name, name_low);
- _file_col_to_table_col.emplace(name_low, name);
- if (name != name_low) {
- _has_schema_change = true;
- }
- }
- } else {
- _all_required_col_names.emplace_back(iter->second);
+Status IcebergOrcReader::_gen_col_name_maps(OrcReader* orc_reader) {
+ std::vector<std::string> col_names;
+ std::vector<uint64_t> col_ids;
+ RETURN_IF_ERROR(
+ orc_reader->get_schema_col_name_attribute(&col_names, &col_ids,
ICEBERG_ORC_ATTRIBUTE));
+ _has_iceberg_schema = true;
+ _table_col_to_file_col.clear();
+ _file_col_to_table_col.clear();
+ for (size_t i = 0; i < col_ids.size(); i++) {
+ auto col_id = col_ids[i];
+ auto& file_col_name = col_names[i];
+
+ if (_col_id_name_map.find(col_id) == _col_id_name_map.end()) {
+ _has_schema_change = true;
+ continue;
}
- }
-}
-
-/*
- * Generate _new_colname_to_value_range, by replacing the column name in
- * _colname_to_value_range with column name in data file.
- */
-void IcebergTableReader::_gen_new_colname_to_value_range() {
- for (auto it = _colname_to_value_range->begin(); it !=
_colname_to_value_range->end(); it++) {
- auto iter = _table_col_to_file_col.find(it->first);
- if (iter == _table_col_to_file_col.end()) {
- _new_colname_to_value_range.emplace(it->first, it->second);
- } else {
- _new_colname_to_value_range.emplace(iter->second, it->second);
+ auto& table_col_name = _col_id_name_map[col_id];
+ _table_col_to_file_col.emplace(table_col_name, file_col_name);
+ _file_col_to_table_col.emplace(file_col_name, table_col_name);
+ if (table_col_name != file_col_name) {
+ _has_schema_change = true;
}
}
+ return Status::OK();
}
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/table/iceberg_reader.h
b/be/src/vec/exec/format/table/iceberg_reader.h
index 04be8d53f24..50c8d31bed9 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.h
+++ b/be/src/vec/exec/format/table/iceberg_reader.h
@@ -28,10 +28,16 @@
#include "common/status.h"
#include "exec/olap_common.h"
+#include "runtime/define_primitive_type.h"
+#include "runtime/primitive_type.h"
+#include "runtime/runtime_state.h"
+#include "runtime/types.h"
#include "table_format_reader.h"
#include "util/runtime_profile.h"
#include "vec/columns/column_dictionary.h"
-
+#include "vec/exec/format/orc/vorc_reader.h"
+#include "vec/exec/format/parquet/vparquet_reader.h"
+#include "vec/exprs/vslot_ref.h"
namespace tparquet {
class KeyValue;
} // namespace tparquet
@@ -60,8 +66,6 @@ class ShardedKVCache;
class VExprContext;
class IcebergTableReader : public TableFormatReader {
- ENABLE_FACTORY_CREATOR(IcebergTableReader);
-
public:
struct PositionDeleteRange {
std::vector<std::string> data_file_path;
@@ -74,42 +78,40 @@ public:
int64_t push_down_count);
~IcebergTableReader() override = default;
- Status init_row_filters(const TFileRangeDesc& range) override;
+ Status init_row_filters(const TFileRangeDesc& range) final;
- Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+ Status get_next_block(Block* block, size_t* read_rows, bool* eof) final;
Status set_fill_columns(
const std::unordered_map<std::string, std::tuple<std::string,
const SlotDescriptor*>>&
partition_columns,
- const std::unordered_map<std::string, VExprContextSPtr>&
missing_columns) override;
+ const std::unordered_map<std::string, VExprContextSPtr>&
missing_columns) final;
- bool fill_all_columns() const override;
+ bool fill_all_columns() const final;
Status get_columns(std::unordered_map<std::string, TypeDescriptor>*
name_to_type,
- std::unordered_set<std::string>* missing_cols) override;
+ std::unordered_set<std::string>* missing_cols) final;
- Status init_reader(
- const std::vector<std::string>& file_col_names,
- const std::unordered_map<int, std::string>& col_id_name_map,
- std::unordered_map<std::string, ColumnValueRangeType>*
colname_to_value_range,
- const VExprContextSPtrs& conjuncts, const TupleDescriptor*
tuple_descriptor,
- const RowDescriptor* row_descriptor,
- const std::unordered_map<std::string, int>* colname_to_slot_id,
- const VExprContextSPtrs* not_single_slot_filter_conjuncts,
- const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts);
+ Status _position_delete_base(const std::vector<TIcebergDeleteFileDesc>&
delete_files);
enum { DATA, POSITION_DELETE, EQUALITY_DELETE };
+ enum Fileformat { NONE, PARQUET, ORC, AVRO };
-private:
+ virtual void set_delete_rows() = 0;
+
+protected:
struct IcebergProfile {
RuntimeProfile::Counter* num_delete_files;
RuntimeProfile::Counter* num_delete_rows;
RuntimeProfile::Counter* delete_files_read_time;
RuntimeProfile::Counter* delete_rows_sort_time;
};
-
- Status _position_delete(const std::vector<TIcebergDeleteFileDesc>&
delete_files);
-
+ using DeleteRows = std::vector<int64_t>;
+ using DeleteFile = phmap::parallel_flat_hash_map<
+ std::string, std::unique_ptr<DeleteRows>, std::hash<std::string>,
+ std::equal_to<std::string>,
+ std::allocator<std::pair<const std::string,
std::unique_ptr<DeleteRows>>>, 8,
+ std::mutex>;
/**
* https://iceberg.apache.org/spec/#position-delete-files
* The rows in the delete file must be sorted by file_path then position
to optimize filtering rows while scanning.
@@ -123,8 +125,8 @@ private:
PositionDeleteRange _get_range(const ColumnString& file_path_column);
- Status _gen_col_name_maps(std::vector<tparquet::KeyValue> parquet_meta_kv);
void _gen_file_col_names();
+
void _gen_new_colname_to_value_range();
static std::string _delet_file_cache_key(const std::string& path) { return
"delete_" + path; }
@@ -135,7 +137,7 @@ private:
// owned by scan node
ShardedKVCache* _kv_cache;
IcebergProfile _iceberg_profile;
- std::vector<int64_t> _delete_rows;
+ std::vector<int64_t> _iceberg_delete_rows;
// col names from _file_slot_descs
std::vector<std::string> _file_col_names;
// file column name to table column name map. For iceberg schema evolution.
@@ -143,13 +145,13 @@ private:
// table column name to file column name map. For iceberg schema evolution.
std::unordered_map<std::string, std::string> _table_col_to_file_col;
std::unordered_map<std::string, ColumnValueRangeType>*
_colname_to_value_range;
- // copy from _colname_to_value_range with new column name that is in
parquet file, to support schema evolution.
+ // copy from _colname_to_value_range with new column name that is in
parquet/orc file, to support schema evolution.
std::unordered_map<std::string, ColumnValueRangeType>
_new_colname_to_value_range;
// column id to name map. Collect from FE slot descriptor.
std::unordered_map<int, std::string> _col_id_name_map;
- // col names in the parquet file
+ // col names in the parquet,orc file
std::vector<std::string> _all_required_col_names;
- // col names in table but not in parquet file
+ // col names in table but not in parquet,orc file
std::vector<std::string> _not_in_file_col_names;
io::IOContext* _io_ctx;
@@ -157,6 +159,88 @@ private:
bool _has_iceberg_schema = false;
int64_t _remaining_push_down_count;
+ Fileformat _file_format = Fileformat::NONE;
+
+ const int64_t MIN_SUPPORT_DELETE_FILES_VERSION = 2;
+ const std::string ICEBERG_ROW_POS = "pos";
+ const std::string ICEBERG_FILE_PATH = "file_path";
+ const std::vector<std::string> delete_file_col_names {ICEBERG_ROW_POS,
ICEBERG_FILE_PATH};
+ const std::vector<TypeDescriptor> delete_file_col_types {{TYPE_STRING},
{TYPE_BIGINT}};
+ const int ICEBERG_FILE_PATH_INDEX = 0;
+ const int ICEBERG_FILE_POS_INDEX = 1;
+ const int READ_DELETE_FILE_BATCH_SIZE = 102400;
+
+ //Read position_delete_file TFileRangeDesc, generate DeleteFile
+ virtual Status _read_position_delete_file(const TFileRangeDesc*,
DeleteFile*) = 0;
+
+ void _gen_position_delete_file_range(Block& block, DeleteFile* const
position_delete,
+ size_t read_rows, bool
file_path_column_dictionary_coded);
+};
+
+class IcebergParquetReader final : public IcebergTableReader {
+public:
+ ENABLE_FACTORY_CREATOR(IcebergParquetReader);
+
+ IcebergParquetReader(std::unique_ptr<GenericReader> file_format_reader,
RuntimeProfile* profile,
+ RuntimeState* state, const TFileScanRangeParams&
params,
+ const TFileRangeDesc& range, ShardedKVCache* kv_cache,
+ io::IOContext* io_ctx, int64_t push_down_count)
+ : IcebergTableReader(std::move(file_format_reader), profile,
state, params, range,
+ kv_cache, io_ctx, push_down_count) {}
+ Status init_reader(
+ const std::vector<std::string>& file_col_names,
+ const std::unordered_map<int, std::string>& col_id_name_map,
+ std::unordered_map<std::string, ColumnValueRangeType>*
colname_to_value_range,
+ const VExprContextSPtrs& conjuncts, const TupleDescriptor*
tuple_descriptor,
+ const RowDescriptor* row_descriptor,
+ const std::unordered_map<std::string, int>* colname_to_slot_id,
+ const VExprContextSPtrs* not_single_slot_filter_conjuncts,
+ const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts);
+
+ Status _read_position_delete_file(const TFileRangeDesc* delete_range,
+ DeleteFile* position_delete) override;
+
+ void set_delete_rows() override {
+ auto* parquet_reader = (ParquetReader*)(_file_format_reader.get());
+ parquet_reader->set_delete_rows(&_iceberg_delete_rows);
+ }
+
+ Status _gen_col_name_maps(std::vector<tparquet::KeyValue> parquet_meta_kv);
+};
+class IcebergOrcReader final : public IcebergTableReader {
+public:
+ ENABLE_FACTORY_CREATOR(IcebergOrcReader);
+
+ Status _read_position_delete_file(const TFileRangeDesc* delete_range,
+ DeleteFile* position_delete) override;
+
+ IcebergOrcReader(std::unique_ptr<GenericReader> file_format_reader,
RuntimeProfile* profile,
+ RuntimeState* state, const TFileScanRangeParams& params,
+ const TFileRangeDesc& range, ShardedKVCache* kv_cache,
io::IOContext* io_ctx,
+ int64_t push_down_count)
+ : IcebergTableReader(std::move(file_format_reader), profile,
state, params, range,
+ kv_cache, io_ctx, push_down_count) {}
+
+ void set_delete_rows() override {
+ auto* orc_reader = (OrcReader*)_file_format_reader.get();
+ orc_reader->set_position_delete_rowids(&_iceberg_delete_rows);
+ }
+
+ Status init_reader(
+ const std::vector<std::string>& file_col_names,
+ const std::unordered_map<int, std::string>& col_id_name_map,
+ std::unordered_map<std::string, ColumnValueRangeType>*
colname_to_value_range,
+ const VExprContextSPtrs& conjuncts, const TupleDescriptor*
tuple_descriptor,
+ const RowDescriptor* row_descriptor,
+ const std::unordered_map<std::string, int>* colname_to_slot_id,
+ const VExprContextSPtrs* not_single_slot_filter_conjuncts,
+ const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts);
+
+ Status _gen_col_name_maps(OrcReader* orc_reader);
+
+private:
+ const std::string ICEBERG_ORC_ATTRIBUTE = "iceberg.id";
};
+
} // namespace vectorized
} // namespace doris
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp
b/be/src/vec/exec/scan/vfile_scanner.cpp
index fb3eaec0789..c4bb8e20a15 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -820,15 +820,16 @@ Status VFileScanner::_get_next_reader() {
}
if (range.__isset.table_format_params &&
range.table_format_params.table_format_type == "iceberg") {
- std::unique_ptr<IcebergTableReader> iceberg_reader =
-
IcebergTableReader::create_unique(std::move(parquet_reader), _profile,
- _state, *_params,
range, _kv_cache,
- _io_ctx.get(),
_get_push_down_count());
+ std::unique_ptr<IcebergParquetReader> iceberg_reader =
+
IcebergParquetReader::create_unique(std::move(parquet_reader), _profile,
+ _state, *_params,
range, _kv_cache,
+ _io_ctx.get(),
_get_push_down_count());
init_status = iceberg_reader->init_reader(
_file_col_names, _col_id_name_map,
_colname_to_value_range,
_push_down_conjuncts, _real_tuple_desc,
_default_val_row_desc.get(),
_col_name_to_slot_id,
&_not_single_slot_filter_conjuncts,
&_slot_id_to_filter_conjuncts);
+
RETURN_IF_ERROR(iceberg_reader->init_row_filters(range));
_cur_reader = std::move(iceberg_reader);
} else {
@@ -870,6 +871,20 @@ Status VFileScanner::_get_next_reader() {
&_not_single_slot_filter_conjuncts,
&_slot_id_to_filter_conjuncts);
RETURN_IF_ERROR(tran_orc_reader->init_row_filters(range));
_cur_reader = std::move(tran_orc_reader);
+ } else if (range.__isset.table_format_params &&
+ range.table_format_params.table_format_type ==
"iceberg") {
+ std::unique_ptr<IcebergOrcReader> iceberg_reader =
IcebergOrcReader::create_unique(
+ std::move(orc_reader), _profile, _state, *_params,
range, _kv_cache,
+ _io_ctx.get(), _get_push_down_count());
+
+ init_status = iceberg_reader->init_reader(
+ _file_col_names, _col_id_name_map,
_colname_to_value_range,
+ _push_down_conjuncts, _real_tuple_desc,
_default_val_row_desc.get(),
+ _col_name_to_slot_id,
&_not_single_slot_filter_conjuncts,
+ &_slot_id_to_filter_conjuncts);
+
+ RETURN_IF_ERROR(iceberg_reader->init_row_filters(range));
+ _cur_reader = std::move(iceberg_reader);
} else {
init_status = orc_reader->init_reader(
&_file_col_names, _colname_to_value_range,
_push_down_conjuncts, false,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 70c384a0c4b..05519f84595 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -37,7 +37,10 @@ import org.apache.doris.analysis.Subquery;
import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.MapType;
import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.catalog.StructField;
+import org.apache.doris.catalog.StructType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.TimeUtils;
@@ -65,6 +68,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.stream.Collectors;
/**
* Iceberg utils
@@ -503,8 +507,17 @@ public class IcebergUtils {
Types.ListType list = (Types.ListType) type;
return
ArrayType.create(icebergTypeToDorisType(list.elementType()), true);
case MAP:
+ Types.MapType map = (Types.MapType) type;
+ return new MapType(
+ icebergTypeToDorisType(map.keyType()),
+ icebergTypeToDorisType(map.valueType())
+ );
case STRUCT:
- return Type.UNSUPPORTED;
+ Types.StructType struct = (Types.StructType) type;
+ ArrayList<StructField> nestedTypes =
struct.fields().stream().map(
+ x -> new StructField(x.name(),
icebergTypeToDorisType(x.type()))
+ ).collect(Collectors.toCollection(ArrayList::new));
+ return new StructType(nestedTypes);
default:
throw new IllegalArgumentException("Cannot transform unknown
type: " + type);
}
diff --git
a/regression-test/data/external_table_p2/iceberg/iceberg_complex_type.out
b/regression-test/data/external_table_p2/iceberg/iceberg_complex_type.out
new file mode 100644
index 00000000000..2250381e6f8
--- /dev/null
+++ b/regression-test/data/external_table_p2/iceberg/iceberg_complex_type.out
@@ -0,0 +1,165 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !parquet_v1_1 --
+id INT Yes true \N
+col2 ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<INT>>>>> Yes true \N
+col3 MAP<ARRAY<FLOAT>,MAP<INT,MAP<INT,FLOAT>>> Yes true \N
+col4 STRUCT<x:ARRAY<INT>,y:ARRAY<DOUBLE>,z:MAP<BOOLEAN,TEXT>> Yes
true \N
+col5
MAP<INT,MAP<INT,MAP<INT,MAP<INT,MAP<FLOAT,MAP<DOUBLE,STRUCT<x:INT,y:ARRAY<DOUBLE>>>>>>>>
Yes true \N
+col6
STRUCT<xx:ARRAY<INT>,yy:ARRAY<MAP<DOUBLE,FLOAT>>,zz:STRUCT<xxx:STRUCT<xxxx:STRUCT<xxxxx:DECIMAL(13,
2)>>>> Yes true \N
+
+-- !parquet_v1_2 --
+1 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+2 [[[[[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]] {[2]:{2:{2:2}}}
{"x": [2], "y": [2], "z": {1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y":
[2]}}}}}}} {"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx":
{"xxxxx": 10123.33}}}}
+3 [[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2,
2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]], [[[[2, 2, 3, 9]], [[2,
2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2,
3, 9], [2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+4 [[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]
{[2]:{2:{2:222}}} {"x": [2], "y": [2, 2, 22935, 99, 59, 955], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx": [2, 2, 3,
9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+5 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2, 239, 39293259, 2223,
23, 59, 29, 9353]}}}}}}} {"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz":
{"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+6 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}, {2:2}, {2:2}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4},
{2:4}, {2:4}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+
+-- !parquet_v1_3 --
+6
+
+-- !parquet_v1_4 --
+1
+1
+2
+1
+1
+1
+
+-- !parquet_v1_5 --
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+
+-- !parquet_v1_6 --
+
+-- !parquet_v1_7 --
+6 1
+5 1
+
+-- !parquet_v2_1 --
+id INT Yes true \N
+col2 ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<INT>>>>> Yes true \N
+col3 MAP<ARRAY<FLOAT>,MAP<INT,MAP<INT,FLOAT>>> Yes true \N
+col4 STRUCT<x:ARRAY<INT>,y:ARRAY<DOUBLE>,z:MAP<BOOLEAN,TEXT>> Yes
true \N
+col5
MAP<INT,MAP<INT,MAP<INT,MAP<INT,MAP<FLOAT,MAP<DOUBLE,STRUCT<x:INT,y:ARRAY<DOUBLE>>>>>>>>
Yes true \N
+col6
STRUCT<xx:ARRAY<INT>,yy:ARRAY<MAP<DOUBLE,FLOAT>>,zz:STRUCT<xxx:STRUCT<xxxx:STRUCT<xxxxx:DECIMAL(13,
2)>>>> Yes true \N
+
+-- !parquet_v2_2 --
+1 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+2 [[[[[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]] {[2]:{2:{2:2}}}
{"x": [2], "y": [2], "z": {1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y":
[2]}}}}}}} {"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx":
{"xxxxx": 10123.33}}}}
+3 [[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2,
2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]], [[[[2, 2, 3, 9]], [[2,
2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2,
3, 9], [2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+4 [[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]
{[2]:{2:{2:222}}} {"x": [2], "y": [2, 2, 22935, 99, 59, 955], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx": [2, 2, 3,
9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+5 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2, 239, 39293259, 2223,
23, 59, 29, 9353]}}}}}}} {"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz":
{"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+6 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}, {2:2}, {2:2}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4},
{2:4}, {2:4}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+
+-- !parquet_v2_3 --
+6
+
+-- !parquet_v2_4 --
+1
+1
+2
+1
+1
+1
+
+-- !parquet_v2_5 --
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+
+-- !parquet_v2_6 --
+
+-- !parquet_v2_7 --
+6 1
+5 1
+
+-- !orc_v1_1 --
+id INT Yes true \N
+col2 ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<INT>>>>> Yes true \N
+col3 MAP<ARRAY<FLOAT>,MAP<INT,MAP<INT,FLOAT>>> Yes true \N
+col4 STRUCT<x:ARRAY<INT>,y:ARRAY<DOUBLE>,z:MAP<BOOLEAN,TEXT>> Yes
true \N
+col5
MAP<INT,MAP<INT,MAP<INT,MAP<INT,MAP<FLOAT,MAP<DOUBLE,STRUCT<x:INT,y:ARRAY<DOUBLE>>>>>>>>
Yes true \N
+col6
STRUCT<xx:ARRAY<INT>,yy:ARRAY<MAP<DOUBLE,FLOAT>>,zz:STRUCT<xxx:STRUCT<xxxx:STRUCT<xxxxx:DECIMAL(13,
2)>>>> Yes true \N
+
+-- !orc_v1_2 --
+1 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+2 [[[[[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]] {[2]:{2:{2:2}}}
{"x": [2], "y": [2], "z": {1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y":
[2]}}}}}}} {"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx":
{"xxxxx": 10123.33}}}}
+3 [[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2,
2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]], [[[[2, 2, 3, 9]], [[2,
2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2,
3, 9], [2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+4 [[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]
{[2]:{2:{2:222}}} {"x": [2], "y": [2, 2, 22935, 99, 59, 955], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx": [2, 2, 3,
9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+5 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2, 239, 39293259, 2223,
23, 59, 29, 9353]}}}}}}} {"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz":
{"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+6 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}, {2:2}, {2:2}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4},
{2:4}, {2:4}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+
+-- !orc_v1_3 --
+6
+
+-- !orc_v1_4 --
+1
+1
+2
+1
+1
+1
+
+-- !orc_v1_5 --
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+
+-- !orc_v1_6 --
+
+-- !orc_v1_7 --
+6 1
+5 1
+
+-- !orc_v2_1 --
+id INT Yes true \N
+col2 ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<INT>>>>> Yes true \N
+col3 MAP<ARRAY<FLOAT>,MAP<INT,MAP<INT,FLOAT>>> Yes true \N
+col4 STRUCT<x:ARRAY<INT>,y:ARRAY<DOUBLE>,z:MAP<BOOLEAN,TEXT>> Yes
true \N
+col5
MAP<INT,MAP<INT,MAP<INT,MAP<INT,MAP<FLOAT,MAP<DOUBLE,STRUCT<x:INT,y:ARRAY<DOUBLE>>>>>>>>
Yes true \N
+col6
STRUCT<xx:ARRAY<INT>,yy:ARRAY<MAP<DOUBLE,FLOAT>>,zz:STRUCT<xxx:STRUCT<xxxx:STRUCT<xxxxx:DECIMAL(13,
2)>>>> Yes true \N
+
+-- !orc_v2_2 --
+1 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+2 [[[[[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]] {[2]:{2:{2:2}}}
{"x": [2], "y": [2], "z": {1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y":
[2]}}}}}}} {"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx":
{"xxxxx": 10123.33}}}}
+3 [[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2,
2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]], [[[[2, 2, 3, 9]], [[2,
2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2,
3, 9], [2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+4 [[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]
{[2]:{2:{2:222}}} {"x": [2], "y": [2, 2, 22935, 99, 59, 955], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx": [2, 2, 3,
9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+5 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2, 239, 39293259, 2223,
23, 59, 29, 9353]}}}}}}} {"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz":
{"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+6 [[[[[2, 2, 3, 9]]]]] {[2]:{2:{2:2}}} {"x": [2], "y": [2], "z":
{1:"example"}} {2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}} {"xx":
[2, 2, 3, 9], "yy": [{2:2}, {2:2}, {2:2}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4},
{2:4}, {2:4}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+
+-- !orc_v2_3 --
+6
+
+-- !orc_v2_4 --
+1
+1
+2
+1
+1
+1
+
+-- !orc_v2_5 --
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+
+-- !orc_v2_6 --
+
+-- !orc_v2_7 --
+6 1
+5 1
+
diff --git
a/regression-test/data/external_table_p2/iceberg/iceberg_position_delete.out
b/regression-test/data/external_table_p2/iceberg/iceberg_position_delete.out
new file mode 100644
index 00000000000..2d61ebaaf50
--- /dev/null
+++ b/regression-test/data/external_table_p2/iceberg/iceberg_position_delete.out
@@ -0,0 +1,196 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !gen_data_1 --
+
+-- !gen_data_2 --
+
+-- !gen_data_3 --
+
+-- !gen_data_4 --
+
+-- !gen_data_5 --
+
+-- !gen_data_6 --
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+
+-- !gen_data_7 --
+2
+2
+2
+
+-- !gen_data_8 --
+2 512
+3 512
+4 512
+6 512
+7 512
+8 512
+9 512
+11 512
+12 512
+13 512
+14 512
+
+-- !gen_data_9 --
+
+-- !gen_data_10 --
+
+-- !gen_data_11 --
+
+-- !gen_data_12 --
+
+-- !gen_data_13 --
+
+-- !gen_data_14 --
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+
+-- !gen_data_15 --
+7 12345xxx
+7 12345xxx
+7 12345xxx
+
+-- !gen_data_16 --
+
+-- !gen_data_17 --
+
+-- !gen_data_18 --
+
+-- !gen_data_19 --
+5632
+
+-- !gen_data_20 --
+5632
+
+-- !orc_1 --
+
+-- !orc_2 --
+
+-- !orc_3 --
+
+-- !orc_4 --
+
+-- !orc_5 --
+
+-- !orc_6 --
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+
+-- !orc_7 --
+2
+2
+2
+
+-- !orc_8 --
+2 512
+3 512
+4 512
+6 512
+7 512
+8 512
+9 512
+11 512
+12 512
+13 512
+14 512
+
+-- !orc_9 --
+
+-- !orc_10 --
+
+-- !orc_11 --
+
+-- !orc_12 --
+
+-- !orc_13 --
+
+-- !orc_14 --
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+
+-- !orc_15 --
+7 12345xxx
+7 12345xxx
+7 12345xxx
+
+-- !orc_16 --
+
+-- !orc_17 --
+
+-- !orc_18 --
+
+-- !orc_19 --
+5632
+
+-- !orc_20 --
+5632
+
+-- !parquet_1 --
+
+-- !parquet_2 --
+
+-- !parquet_3 --
+
+-- !parquet_4 --
+
+-- !parquet_5 --
+
+-- !parquet_6 --
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+
+-- !parquet_7 --
+2
+2
+2
+
+-- !parquet_8 --
+2 512
+3 512
+4 512
+6 512
+7 512
+8 512
+9 512
+11 512
+12 512
+13 512
+14 512
+
+-- !parquet_9 --
+
+-- !parquet_10 --
+
+-- !parquet_11 --
+
+-- !parquet_12 --
+
+-- !parquet_13 --
+
+-- !parquet_14 --
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+2 select xxxxxxxxx
+
+-- !parquet_15 --
+7 12345xxx
+7 12345xxx
+7 12345xxx
+
+-- !parquet_16 --
+
+-- !parquet_17 --
+
+-- !parquet_18 --
+
+-- !parquet_19 --
+5632
+
+-- !parquet_20 --
+5632
+
diff --git
a/regression-test/data/external_table_p2/iceberg/iceberg_schema_change.out
b/regression-test/data/external_table_p2/iceberg/iceberg_schema_change.out
new file mode 100644
index 00000000000..4faf8c695ec
--- /dev/null
+++ b/regression-test/data/external_table_p2/iceberg/iceberg_schema_change.out
@@ -0,0 +1,305 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !parquet_v1_1 --
+rename_col8 BIGINT Yes true \N
+rename_col9 DOUBLE Yes true \N
+rename_col10 DECIMAL(20, 5) Yes true \N
+id INT Yes true \N
+rename_col1 ARRAY<BIGINT> Yes true \N
+rename_col2 ARRAY<DOUBLE> Yes true \N
+rename_col3 ARRAY<DECIMAL(20, 4)> Yes true \N
+rename_col4 MAP<INT,BIGINT> Yes true \N
+rename_col5 MAP<INT,DOUBLE> Yes true \N
+rename_col6 MAP<INT,DECIMAL(20, 5)> Yes true \N
+rename_col7 STRUCT<add:DOUBLE,x:BIGINT,y:DOUBLE> Yes true \N
+col_add INT Yes true \N
+col_add2 INT Yes true \N
+
+-- !parquet_v1_2 --
+1 1.2000000476837158 1.12345 1 [1, 2, 3]
[1.100000023841858, 2.200000047683716, 3.299999952316284] [1.1234,
2.2345, 3.3456] {1:10, 2:20} {1:1.100000023841858,
2:2.200000047683716} {1:1.12345, 2:2.23456} {"add": null, "x": 1, "y":
1.100000023841858} \N \N
+1 1.2000000476837158 1.12345 2 [4, 5, 6]
[4.400000095367432, 5.5, 6.599999904632568] [4.4567, 5.5678, 6.6789]
{3:30, 4:40} {3:3.299999952316284, 4:4.400000095367432} {3:3.34567,
4:4.45678} {"add": null, "x": 2, "y": 2.200000047683716} \N \N
+1 1.2000000476837158 1.12345 3 [7, 8, 9]
[7.699999809265137, 8.800000190734863, 9.899999618530273] [7.7890,
8.8901, 9.9012] {5:50, 6:60} {5:5.5, 6:6.599999904632568}
{5:5.56789, 6:6.67890} {"add": null, "x": 3, "y": 3.299999952316284} \N
\N
+1 1.2000000476837158 1.12345 4 [10, 11, 12]
[10.100000381469727, 11.109999656677246, 12.119999885559082] [10.1011,
11.1112, 12.1213] {7:70, 8:80} {7:7.699999809265137,
8:8.800000190734863} {7:7.78901, 8:8.89012} {"add": null, "x": 4, "y":
4.400000095367432} \N \N
+1 1.2000000476837158 1.12345 5 [13, 14, 15]
[13.130000114440918, 14.140000343322754, 15.149999618530273] [13.1314,
14.1415, 15.1516] {9:90, 10:100} {9:9.899999618530273,
10:10.100000381469727} {9:9.89012, 10:10.10123} {"add": null, "x": 5,
"y": 5.5} \N \N
+21447483648 1.7976931348623157E308 1234567890.12345 6
[21447483648, 21474483649, 21474483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235,
1234567890.2346, 1234567890.3457] {214748348:2147483648,
24748649:214743383649} {214748648:1.7976931348623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": 214748223648, "y":
1.7976931346232156e+308} 1 2
+2144748345648 1.7976931348623157E308 1234567890.23456 7
[2144748345648, 214742435483649, 214742435483650]
[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]
[12345673890.1235, 12345367890.2346, 12344567890.3457]
{214748348:2147483632148, 24748649:213144743383649}
{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}
{214743648:1234567890.12345, 21474649:1234567890.23456} {"add":
1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308} 2
3
+21447483648 1.7976931348623157E308 1234567890.12345 8
[21447483648, 21474483649, 21474483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235,
1234567890.2346, 1234567890.3457] {214748348:2147483648,
24748649:214743383649} {214748648:1.7976931348623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": 214748223648, "y":
1.7976931346232156e+308} 3 4
+2144748345648 1.7976931348623157E308 1234567890.23456 9
[2144748345648, 214742435483649, 214742435483650, 214742435483650,
214742435483650, 214742435483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235,
12345367890.2346, 12344567890.3457] {214748348:2147483632148,
24748649:213144743383649} {214748648:1.717693623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": [...]
+
+-- !parquet_v1_3 --
+9
+
+-- !parquet_v1_4 --
+6
+7
+8
+9
+
+-- !parquet_v1_5 --
+1
+2
+3
+4
+
+-- !parquet_v1_6 --
+6
+7
+8
+9
+
+-- !parquet_v1_7 --
+{"add": null, "x": 1, "y": 1.100000023841858}
+{"add": null, "x": 2, "y": 2.200000047683716}
+{"add": null, "x": 3, "y": 3.299999952316284}
+{"add": null, "x": 4, "y": 4.400000095367432}
+{"add": null, "x": 5, "y": 5.5}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+
+-- !parquet_v1_8 --
+3
+4
+5
+
+-- !parquet_v1_9 --
+9 1
+8 1
+7 1
+6 1
+5 0
+4 0
+3 0
+2 0
+1 0
+
+-- !parquet_v1_10 --
+
+-- !parquet_v2_1 --
+rename_col8 BIGINT Yes true \N
+rename_col9 DOUBLE Yes true \N
+rename_col10 DECIMAL(20, 5) Yes true \N
+id INT Yes true \N
+rename_col1 ARRAY<BIGINT> Yes true \N
+rename_col2 ARRAY<DOUBLE> Yes true \N
+rename_col3 ARRAY<DECIMAL(20, 4)> Yes true \N
+rename_col4 MAP<INT,BIGINT> Yes true \N
+rename_col5 MAP<INT,DOUBLE> Yes true \N
+rename_col6 MAP<INT,DECIMAL(20, 5)> Yes true \N
+rename_col7 STRUCT<add:DOUBLE,x:BIGINT,y:DOUBLE> Yes true \N
+col_add INT Yes true \N
+col_add2 INT Yes true \N
+
+-- !parquet_v2_2 --
+1 1.2000000476837158 1.12345 1 [1, 2, 3]
[1.100000023841858, 2.200000047683716, 3.299999952316284] [1.1234,
2.2345, 3.3456] {1:10, 2:20} {1:1.100000023841858,
2:2.200000047683716} {1:1.12345, 2:2.23456} {"add": null, "x": 1, "y":
1.100000023841858} \N \N
+1 1.2000000476837158 1.12345 2 [4, 5, 6]
[4.400000095367432, 5.5, 6.599999904632568] [4.4567, 5.5678, 6.6789]
{3:30, 4:40} {3:3.299999952316284, 4:4.400000095367432} {3:3.34567,
4:4.45678} {"add": null, "x": 2, "y": 2.200000047683716} \N \N
+1 1.2000000476837158 1.12345 3 [7, 8, 9]
[7.699999809265137, 8.800000190734863, 9.899999618530273] [7.7890,
8.8901, 9.9012] {5:50, 6:60} {5:5.5, 6:6.599999904632568}
{5:5.56789, 6:6.67890} {"add": null, "x": 3, "y": 3.299999952316284} \N
\N
+1 1.2000000476837158 1.12345 4 [10, 11, 12]
[10.100000381469727, 11.109999656677246, 12.119999885559082] [10.1011,
11.1112, 12.1213] {7:70, 8:80} {7:7.699999809265137,
8:8.800000190734863} {7:7.78901, 8:8.89012} {"add": null, "x": 4, "y":
4.400000095367432} \N \N
+1 1.2000000476837158 1.12345 5 [13, 14, 15]
[13.130000114440918, 14.140000343322754, 15.149999618530273] [13.1314,
14.1415, 15.1516] {9:90, 10:100} {9:9.899999618530273,
10:10.100000381469727} {9:9.89012, 10:10.10123} {"add": null, "x": 5,
"y": 5.5} \N \N
+21447483648 1.7976931348623157E308 1234567890.12345 6
[21447483648, 21474483649, 21474483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235,
1234567890.2346, 1234567890.3457] {214748348:2147483648,
24748649:214743383649} {214748648:1.7976931348623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": 214748223648, "y":
1.7976931346232156e+308} 1 2
+2144748345648 1.7976931348623157E308 1234567890.23456 7
[2144748345648, 214742435483649, 214742435483650]
[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]
[12345673890.1235, 12345367890.2346, 12344567890.3457]
{214748348:2147483632148, 24748649:213144743383649}
{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}
{214743648:1234567890.12345, 21474649:1234567890.23456} {"add":
1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308} 2
3
+21447483648 1.7976931348623157E308 1234567890.12345 8
[21447483648, 21474483649, 21474483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235,
1234567890.2346, 1234567890.3457] {214748348:2147483648,
24748649:214743383649} {214748648:1.7976931348623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": 214748223648, "y":
1.7976931346232156e+308} 3 4
+2144748345648 1.7976931348623157E308 1234567890.23456 9
[2144748345648, 214742435483649, 214742435483650, 214742435483650,
214742435483650, 214742435483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235,
12345367890.2346, 12344567890.3457] {214748348:2147483632148,
24748649:213144743383649} {214748648:1.717693623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": [...]
+
+-- !parquet_v2_3 --
+9
+
+-- !parquet_v2_4 --
+6
+7
+8
+9
+
+-- !parquet_v2_5 --
+1
+2
+3
+4
+
+-- !parquet_v2_6 --
+6
+7
+8
+9
+
+-- !parquet_v2_7 --
+{"add": null, "x": 1, "y": 1.100000023841858}
+{"add": null, "x": 2, "y": 2.200000047683716}
+{"add": null, "x": 3, "y": 3.299999952316284}
+{"add": null, "x": 4, "y": 4.400000095367432}
+{"add": null, "x": 5, "y": 5.5}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+
+-- !parquet_v2_8 --
+3
+4
+5
+
+-- !parquet_v2_9 --
+9 1
+8 1
+7 1
+6 1
+5 0
+4 0
+3 0
+2 0
+1 0
+
+-- !parquet_v2_10 --
+
+-- !orc_v1_1 --
+rename_col8 BIGINT Yes true \N
+rename_col9 DOUBLE Yes true \N
+rename_col10 DECIMAL(20, 5) Yes true \N
+id INT Yes true \N
+rename_col1 ARRAY<BIGINT> Yes true \N
+rename_col2 ARRAY<DOUBLE> Yes true \N
+rename_col3 ARRAY<DECIMAL(20, 4)> Yes true \N
+rename_col4 MAP<INT,BIGINT> Yes true \N
+rename_col5 MAP<INT,DOUBLE> Yes true \N
+rename_col6 MAP<INT,DECIMAL(20, 5)> Yes true \N
+rename_col7 STRUCT<add:DOUBLE,x:BIGINT,y:DOUBLE> Yes true \N
+col_add INT Yes true \N
+col_add2 INT Yes true \N
+
+-- !orc_v1_2 --
+1 1.2000000476837158 1.12345 1 [1, 2, 3]
[1.100000023841858, 2.200000047683716, 3.299999952316284] [1.1234,
2.2345, 3.3456] {1:10, 2:20} {1:1.100000023841858,
2:2.200000047683716} {1:1.12345, 2:2.23456} {"add": null, "x": 1, "y":
1.100000023841858} \N \N
+1 1.2000000476837158 1.12345 2 [4, 5, 6]
[4.400000095367432, 5.5, 6.599999904632568] [4.4567, 5.5678, 6.6789]
{3:30, 4:40} {3:3.299999952316284, 4:4.400000095367432} {3:3.34567,
4:4.45678} {"add": null, "x": 2, "y": 2.200000047683716} \N \N
+1 1.2000000476837158 1.12345 3 [7, 8, 9]
[7.699999809265137, 8.800000190734863, 9.899999618530273] [7.7890,
8.8901, 9.9012] {5:50, 6:60} {5:5.5, 6:6.599999904632568}
{5:5.56789, 6:6.67890} {"add": null, "x": 3, "y": 3.299999952316284} \N
\N
+1 1.2000000476837158 1.12345 4 [10, 11, 12]
[10.100000381469727, 11.109999656677246, 12.119999885559082] [10.1011,
11.1112, 12.1213] {7:70, 8:80} {7:7.699999809265137,
8:8.800000190734863} {7:7.78901, 8:8.89012} {"add": null, "x": 4, "y":
4.400000095367432} \N \N
+1 1.2000000476837158 1.12345 5 [13, 14, 15]
[13.130000114440918, 14.140000343322754, 15.149999618530273] [13.1314,
14.1415, 15.1516] {9:90, 10:100} {9:9.899999618530273,
10:10.100000381469727} {9:9.89012, 10:10.10123} {"add": null, "x": 5,
"y": 5.5} \N \N
+21447483648 1.7976931348623157E308 1234567890.12345 6
[21447483648, 21474483649, 21474483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235,
1234567890.2346, 1234567890.3457] {214748348:2147483648,
24748649:214743383649} {214748648:1.7976931348623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": 214748223648, "y":
1.7976931346232156e+308} 1 2
+2144748345648 1.7976931348623157E308 1234567890.23456 7
[2144748345648, 214742435483649, 214742435483650]
[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]
[12345673890.1235, 12345367890.2346, 12344567890.3457]
{214748348:2147483632148, 24748649:213144743383649}
{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}
{214743648:1234567890.12345, 21474649:1234567890.23456} {"add":
1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308} 2
3
+21447483648 1.7976931348623157E308 1234567890.12345 8
[21447483648, 21474483649, 21474483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235,
1234567890.2346, 1234567890.3457] {214748348:2147483648,
24748649:214743383649} {214748648:1.7976931348623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": 214748223648, "y":
1.7976931346232156e+308} 3 4
+2144748345648 1.7976931348623157E308 1234567890.23456 9
[2144748345648, 214742435483649, 214742435483650, 214742435483650,
214742435483650, 214742435483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235,
12345367890.2346, 12344567890.3457] {214748348:2147483632148,
24748649:213144743383649} {214748648:1.717693623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": [...]
+
+-- !orc_v1_3 --
+9
+
+-- !orc_v1_4 --
+6
+7
+8
+9
+
+-- !orc_v1_5 --
+1
+2
+3
+4
+
+-- !orc_v1_6 --
+6
+7
+8
+9
+
+-- !orc_v1_7 --
+{"add": null, "x": 1, "y": 1.100000023841858}
+{"add": null, "x": 2, "y": 2.200000047683716}
+{"add": null, "x": 3, "y": 3.299999952316284}
+{"add": null, "x": 4, "y": 4.400000095367432}
+{"add": null, "x": 5, "y": 5.5}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+
+-- !orc_v1_8 --
+3
+4
+5
+
+-- !orc_v1_9 --
+9 1
+8 1
+7 1
+6 1
+5 0
+4 0
+3 0
+2 0
+1 0
+
+-- !orc_v1_10 --
+
+-- !orc_v2_1 --
+rename_col8 BIGINT Yes true \N
+rename_col9 DOUBLE Yes true \N
+rename_col10 DECIMAL(20, 5) Yes true \N
+id INT Yes true \N
+rename_col1 ARRAY<BIGINT> Yes true \N
+rename_col2 ARRAY<DOUBLE> Yes true \N
+rename_col3 ARRAY<DECIMAL(20, 4)> Yes true \N
+rename_col4 MAP<INT,BIGINT> Yes true \N
+rename_col5 MAP<INT,DOUBLE> Yes true \N
+rename_col6 MAP<INT,DECIMAL(20, 5)> Yes true \N
+rename_col7 STRUCT<add:DOUBLE,x:BIGINT,y:DOUBLE> Yes true \N
+col_add INT Yes true \N
+col_add2 INT Yes true \N
+
+-- !orc_v2_2 --
+1 1.2000000476837158 1.12345 1 [1, 2, 3]
[1.100000023841858, 2.200000047683716, 3.299999952316284] [1.1234,
2.2345, 3.3456] {1:10, 2:20} {1:1.100000023841858,
2:2.200000047683716} {1:1.12345, 2:2.23456} {"add": null, "x": 1, "y":
1.100000023841858} \N \N
+1 1.2000000476837158 1.12345 2 [4, 5, 6]
[4.400000095367432, 5.5, 6.599999904632568] [4.4567, 5.5678, 6.6789]
{3:30, 4:40} {3:3.299999952316284, 4:4.400000095367432} {3:3.34567,
4:4.45678} {"add": null, "x": 2, "y": 2.200000047683716} \N \N
+1 1.2000000476837158 1.12345 3 [7, 8, 9]
[7.699999809265137, 8.800000190734863, 9.899999618530273] [7.7890,
8.8901, 9.9012] {5:50, 6:60} {5:5.5, 6:6.599999904632568}
{5:5.56789, 6:6.67890} {"add": null, "x": 3, "y": 3.299999952316284} \N
\N
+1 1.2000000476837158 1.12345 4 [10, 11, 12]
[10.100000381469727, 11.109999656677246, 12.119999885559082] [10.1011,
11.1112, 12.1213] {7:70, 8:80} {7:7.699999809265137,
8:8.800000190734863} {7:7.78901, 8:8.89012} {"add": null, "x": 4, "y":
4.400000095367432} \N \N
+1 1.2000000476837158 1.12345 5 [13, 14, 15]
[13.130000114440918, 14.140000343322754, 15.149999618530273] [13.1314,
14.1415, 15.1516] {9:90, 10:100} {9:9.899999618530273,
10:10.100000381469727} {9:9.89012, 10:10.10123} {"add": null, "x": 5,
"y": 5.5} \N \N
+21447483648 1.7976931348623157E308 1234567890.12345 6
[21447483648, 21474483649, 21474483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235,
1234567890.2346, 1234567890.3457] {214748348:2147483648,
24748649:214743383649} {214748648:1.7976931348623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": 214748223648, "y":
1.7976931346232156e+308} 1 2
+2144748345648 1.7976931348623157E308 1234567890.23456 7
[2144748345648, 214742435483649, 214742435483650]
[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]
[12345673890.1235, 12345367890.2346, 12344567890.3457]
{214748348:2147483632148, 24748649:213144743383649}
{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}
{214743648:1234567890.12345, 21474649:1234567890.23456} {"add":
1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308} 2
3
+21447483648 1.7976931348623157E308 1234567890.12345 8
[21447483648, 21474483649, 21474483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235,
1234567890.2346, 1234567890.3457] {214748348:2147483648,
24748649:214743383649} {214748648:1.7976931348623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": 214748223648, "y":
1.7976931346232156e+308} 3 4
+2144748345648 1.7976931348623157E308 1234567890.23456 9
[2144748345648, 214742435483649, 214742435483650, 214742435483650,
214742435483650, 214742435483650] [1.7976931348623157e+308,
1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235,
12345367890.2346, 12344567890.3457] {214748348:2147483632148,
24748649:213144743383649} {214748648:1.717693623157e+308,
27483649:1.7976931348623157e+308} {214743648:1234567890.12345,
21474649:1234567890.23456} {"add": 1234567890.12345, "x": [...]
+
+-- !orc_v2_3 --
+9
+
+-- !orc_v2_4 --
+6
+7
+8
+9
+
+-- !orc_v2_5 --
+1
+2
+3
+4
+
+-- !orc_v2_6 --
+6
+7
+8
+9
+
+-- !orc_v2_7 --
+{"add": null, "x": 1, "y": 1.100000023841858}
+{"add": null, "x": 2, "y": 2.200000047683716}
+{"add": null, "x": 3, "y": 3.299999952316284}
+{"add": null, "x": 4, "y": 4.400000095367432}
+{"add": null, "x": 5, "y": 5.5}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+
+-- !orc_v2_8 --
+3
+4
+5
+
+-- !orc_v2_9 --
+9 1
+8 1
+7 1
+6 1
+5 0
+4 0
+3 0
+2 0
+1 0
+
+-- !orc_v2_10 --
+
diff --git
a/regression-test/suites/external_table_p2/iceberg/iceberg_complex_type.groovy
b/regression-test/suites/external_table_p2/iceberg/iceberg_complex_type.groovy
new file mode 100644
index 00000000000..f465a9afe37
--- /dev/null
+++
b/regression-test/suites/external_table_p2/iceberg/iceberg_complex_type.groovy
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("iceberg_complex_type",
"p2,external,iceberg,external_remote,external_remote_iceberg") {
+
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+
+ String catalog_name = "test_external_iceberg_complex_type"
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHdfsPort = context.config.otherConfigs.get("extHdfsPort")
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='hadoop',
+ 'warehouse' =
'hdfs://${extHiveHmsHost}:${extHdfsPort}/usr/hive/warehouse/hadoop_catalog'
+ );
+ """
+
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+ sql """ use multi_catalog;"""
+
+
+
+ qt_parquet_v1_1 """ desc complex_parquet_v1 ;"""
+ qt_parquet_v1_2 """ select * from complex_parquet_v1 order by id;
"""
+ qt_parquet_v1_3 """ select count(*) from complex_parquet_v1 ;"""
+ qt_parquet_v1_4 """ select array_size(col2) from complex_parquet_v1
where col2 is not null order by id ; """
+ qt_parquet_v1_5 """ select map_keys(col3) from complex_parquet_v1
order by id; """
+ qt_parquet_v1_6 """ select struct_element(col4, 1) from
complex_parquet_v1 where id >=7 order by id; """
+ qt_parquet_v1_7 """ select id,count(col2) from complex_parquet_v1
group by id order by id desc limit 2; """
+
+
+ qt_parquet_v2_1 """ desc complex_parquet_v2 ;"""
+ qt_parquet_v2_2 """ select * from complex_parquet_v2 order by id;
"""
+ qt_parquet_v2_3 """ select count(*) from complex_parquet_v2 ;"""
+ qt_parquet_v2_4 """ select array_size(col2) from complex_parquet_v2
where col2 is not null order by id ; """
+ qt_parquet_v2_5 """ select map_keys(col3) from complex_parquet_v2
order by id; """
+ qt_parquet_v2_6 """ select struct_element(col4, 1) from
complex_parquet_v2 where id >=7 order by id; """
+ qt_parquet_v2_7 """ select id,count(col2) from complex_parquet_v2
group by id order by id desc limit 2; """
+
+
+ qt_orc_v1_1 """ desc complex_orc_v1 ;"""
+ qt_orc_v1_2 """ select * from complex_orc_v1 order by id; """
+ qt_orc_v1_3 """ select count(*) from complex_orc_v1 ;"""
+ qt_orc_v1_4 """ select array_size(col2) from complex_orc_v1 where
col2 is not null order by id ; """
+ qt_orc_v1_5 """ select map_keys(col3) from complex_orc_v1 order by
id; """
+ qt_orc_v1_6 """ select struct_element(col4, 1) from complex_orc_v1
where id >=7 order by id; """
+ qt_orc_v1_7 """ select id,count(col2) from complex_orc_v1 group by
id order by id desc limit 2; """
+
+
+ qt_orc_v2_1 """ desc complex_orc_v2 ;"""
+ qt_orc_v2_2 """ select * from complex_orc_v2 order by id; """
+ qt_orc_v2_3 """ select count(*) from complex_orc_v2 ;"""
+ qt_orc_v2_4 """ select array_size(col2) from complex_orc_v2 where
col2 is not null order by id ; """
+ qt_orc_v2_5 """ select map_keys(col3) from complex_orc_v2 order by
id; """
+ qt_orc_v2_6 """ select struct_element(col4, 1) from complex_orc_v2
where id >=7 order by id; """
+ qt_orc_v2_7 """ select id,count(col2) from complex_orc_v2 group by
id order by id desc limit 2; """
+
+
+
+
+ }
+}
+
+/*
+schema :
+ id int
+ col2 array<array<array<array<array<int>>>>>
+ col3 map<array<float>,map<int,map<int,float>>>
+ col4
struct<x:array<int>,y:array<double>,z:map<boolean,string>>
+ col5
map<int,map<int,map<int,map<int,map<float,map<double,struct<x:int,y:array<double>>>>>>>>
+ col6
struct<xx:array<int>,yy:array<map<double,float>>,zz:struct<xxx:struct<xxxx:struct<xxxxx:decimal(13,2)>>>>
+
+*/
\ No newline at end of file
diff --git
a/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
b/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
new file mode 100644
index 00000000000..4cb497c3078
--- /dev/null
+++
b/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("iceberg_position_delete",
"p2,external,iceberg,external_remote,external_remote_iceberg") {
+
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+
+ String catalog_name = "test_external_iceberg_position_delete"
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHdfsPort = context.config.otherConfigs.get("extHdfsPort")
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='hadoop',
+ 'warehouse' =
'hdfs://${extHiveHmsHost}:${extHdfsPort}/usr/hive/warehouse/hadoop_catalog'
+ );
+ """
+
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+ sql """ use multi_catalog;"""
+
+ qt_gen_data_1 """ select * from iceberg_position_gen_data where name
= 'xyzxxxxxx' and id != 9;"""
+ qt_gen_data_2 """ select * from iceberg_position_gen_data where id =
1; """
+ qt_gen_data_3 """ select * from iceberg_position_gen_data where id =
5; """
+ qt_gen_data_4 """ select * from iceberg_position_gen_data where id =
10; """
+ qt_gen_data_5 """ select * from iceberg_position_gen_data where id =
15; """
+ qt_gen_data_6 """ select * from iceberg_position_gen_data where id = 2
limit 3;"""
+ qt_gen_data_7 """ select id from iceberg_position_gen_data where id =
2 limit 3;"""
+ qt_gen_data_8 """ select id,count(name) from iceberg_position_gen_data
where id != 1 group by id order by id ;"""
+ qt_gen_data_9 """ select id from iceberg_position_gen_data where id =
1; """
+ qt_gen_data_10 """ select name from iceberg_position_gen_data where id
= 5; """
+ qt_gen_data_11 """ select id from iceberg_position_gen_data where id =
10; """
+ qt_gen_data_12 """ select name from iceberg_position_gen_data where id
= 15;"""
+ qt_gen_data_13 """ select * from iceberg_position_gen_data where id =
15 and name = 'select xxxxxxxxx';"""
+ qt_gen_data_14 """ select * from iceberg_position_gen_data where id =
2 and name = 'select xxxxxxxxx' limit 3;"""
+ qt_gen_data_15 """ select * from iceberg_position_gen_data where id =
7 and name = '12345xxx' limit 3;"""
+ qt_gen_data_16 """ select * from iceberg_position_gen_data where name
= 'hello world' ;"""
+ qt_gen_data_17 """ select name from iceberg_position_gen_data where
name = 'hello world' ;"""
+ qt_gen_data_18 """ select id from iceberg_position_gen_data where
name = 'hello world' ;"""
+ qt_gen_data_19 """ select count(*) from iceberg_position_gen_data
where name != 'final entryxxxxxx' ;"""
+ qt_gen_data_20 """ select count(*) from iceberg_position_gen_data; """
+
+
+ qt_orc_1 """ select * from iceberg_position_orc where name =
'xyzxxxxxx' and id != 9;"""
+ qt_orc_2 """ select * from iceberg_position_orc where id = 1; """
+ qt_orc_3 """ select * from iceberg_position_orc where id = 5; """
+ qt_orc_4 """ select * from iceberg_position_orc where id = 10; """
+ qt_orc_5 """ select * from iceberg_position_orc where id = 15; """
+ qt_orc_6 """ select * from iceberg_position_orc where id = 2 limit
3;"""
+ qt_orc_7 """ select id from iceberg_position_orc where id = 2 limit
3;"""
+ qt_orc_8 """ select id,count(name) from iceberg_position_orc where id
!= 1 group by id order by id ;"""
+ qt_orc_9 """ select id from iceberg_position_orc where id = 1; """
+ qt_orc_10 """ select name from iceberg_position_orc where id = 5; """
+ qt_orc_11 """ select id from iceberg_position_orc where id = 10; """
+ qt_orc_12 """ select name from iceberg_position_orc where id = 15;"""
+ qt_orc_13 """ select * from iceberg_position_orc where id = 15 and
name = 'select xxxxxxxxx';"""
+ qt_orc_14 """ select * from iceberg_position_orc where id = 2 and name
= 'select xxxxxxxxx' limit 3;"""
+ qt_orc_15 """ select * from iceberg_position_orc where id = 7 and name
= '12345xxx' limit 3;"""
+ qt_orc_16 """ select * from iceberg_position_orc where name = 'hello
world' ;"""
+ qt_orc_17 """ select name from iceberg_position_orc where name =
'hello world' ;"""
+ qt_orc_18 """ select id from iceberg_position_orc where name = 'hello
world' ;"""
+ qt_orc_19 """ select count(*) from iceberg_position_orc where name !=
'final entryxxxxxx' ;"""
+ qt_orc_20 """ select count(*) from iceberg_position_orc; """
+
+ qt_parquet_1 """ select * from iceberg_position_parquet where name =
'xyzxxxxxx' and id != 9;"""
+ qt_parquet_2 """ select * from iceberg_position_parquet where id = 1;
"""
+ qt_parquet_3 """ select * from iceberg_position_parquet where id = 5;
"""
+ qt_parquet_4 """ select * from iceberg_position_parquet where id = 10;
"""
+ qt_parquet_5 """ select * from iceberg_position_parquet where id = 15;
"""
+ qt_parquet_6 """ select * from iceberg_position_parquet where id = 2
limit 3;"""
+ qt_parquet_7 """ select id from iceberg_position_parquet where id = 2
limit 3;"""
+ qt_parquet_8 """ select id,count(name) from iceberg_position_parquet
where id != 1 group by id order by id ;"""
+ qt_parquet_9 """ select id from iceberg_position_parquet where id = 1;
"""
+ qt_parquet_10 """ select name from iceberg_position_parquet where id =
5; """
+ qt_parquet_11 """ select id from iceberg_position_parquet where id =
10; """
+ qt_parquet_12 """ select name from iceberg_position_parquet where id =
15;"""
+ qt_parquet_13 """ select * from iceberg_position_parquet where id = 15
and name = 'select xxxxxxxxx';"""
+ qt_parquet_14 """ select * from iceberg_position_parquet where id = 2
and name = 'select xxxxxxxxx' limit 3;"""
+ qt_parquet_15 """ select * from iceberg_position_parquet where id = 7
and name = '12345xxx' limit 3;"""
+ qt_parquet_16 """ select * from iceberg_position_parquet where name =
'hello world' ;"""
+ qt_parquet_17 """ select name from iceberg_position_parquet where
name = 'hello world' ;"""
+ qt_parquet_18 """ select id from iceberg_position_parquet where name
= 'hello world' ;"""
+ qt_parquet_19 """ select count(*) from iceberg_position_parquet where
name != 'final entryxxxxxx' ;"""
+ qt_parquet_20 """ select count(*) from iceberg_position_parquet; """
+
+ }
+}
+/*
+
+
+create table iceberg_position_gen_data(
+ id int,
+ name string
+)
+USING iceberg
+TBLPROPERTIES (
+ 'format-version' = '2',
+ 'write.format.default' = 'orc',
+ 'write.update.mode' = 'merge-on-read',
+ 'write.merge.mode' = 'merge-on-read',
+ 'write.delete.mode' = 'merge-on-read'
+);
+
+INSERT INTO iceberg_position_gen_data VALUES
+(1, "hello world"),
+(2, "select xxxxxxxxx"),
+(3, "example xxxx"),
+(4, "more dataxxx"),
+(5, "another examplexxx"),
+(6, "testxxx"),
+(7, "12345xxx"),
+(8, "abcdefxxxx"),
+(9, "xyzxxxxxx"),
+(10, "inserted dataxxxxx"),
+(11, "SQLxxxxx"),
+(12, "tablexxxx"),
+(13, "rowxxxx"),
+(14, "data entryxxxx"),
+(15, "final entryxxxxxx");
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+
+
+
+create table iceberg_position_parquet(
+ id int,
+ name string
+)
+USING iceberg
+TBLPROPERTIES (
+ 'format-version' = '2',
+ 'write.format.default' = 'parquet',
+ 'write.update.mode' = 'merge-on-read',
+ 'write.merge.mode' = 'merge-on-read',
+ 'write.delete.mode' = 'merge-on-read'
+);
+create table iceberg_position_orc(
+ id int,
+ name string
+)
+USING iceberg
+TBLPROPERTIES (
+ 'format-version' = '2',
+ 'write.format.default' = 'orc',
+ 'write.update.mode' = 'merge-on-read',
+ 'write.merge.mode' = 'merge-on-read',
+ 'write.delete.mode' = 'merge-on-read'
+);
+
+insert into iceberg_position_parquet select * from iceberg_position_gen_data;
+insert into iceberg_position_orc select * from iceberg_position_parquet;
+
+
+delete from iceberg_position_gen_data where id = 1;
+delete from iceberg_position_gen_data where id = 5;
+delete from iceberg_position_gen_data where id = 10;
+delete from iceberg_position_gen_data where id = 15;
+
+delete from iceberg_position_parquet where id = 1;
+delete from iceberg_position_parquet where id = 5;
+delete from iceberg_position_parquet where id = 10;
+delete from iceberg_position_parquet where id = 15;
+
+delete from iceberg_position_orc where id = 1;
+delete from iceberg_position_orc where id = 5;
+delete from iceberg_position_orc where id = 10;
+delete from iceberg_position_orc where id = 15;
+*/
+
+
diff --git
a/regression-test/suites/external_table_p2/iceberg/iceberg_schema_change.groovy
b/regression-test/suites/external_table_p2/iceberg/iceberg_schema_change.groovy
new file mode 100644
index 00000000000..5e036683595
--- /dev/null
+++
b/regression-test/suites/external_table_p2/iceberg/iceberg_schema_change.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("iceberg_schema_change",
"p2,external,iceberg,external_remote,external_remote_iceberg") {
+
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+
+ String catalog_name = "test_external_iceberg_schema_change"
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHdfsPort = context.config.otherConfigs.get("extHdfsPort")
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='hadoop',
+ 'warehouse' =
'hdfs://${extHiveHmsHost}:${extHdfsPort}/usr/hive/warehouse/hadoop_catalog'
+ );
+ """
+
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+ sql """ use multi_catalog;"""
+
+
+
+
+
+ qt_parquet_v1_1 """ desc complex_parquet_v1_schema_change ;"""
+ qt_parquet_v1_2 """ select * from complex_parquet_v1_schema_change
order by id; """
+ qt_parquet_v1_3 """ select count(*) from
complex_parquet_v1_schema_change ;"""
+ qt_parquet_v1_4 """ select id from complex_parquet_v1_schema_change
where col_add +1 = col_add2 order by id;"""
+ qt_parquet_v1_5 """ select col_add from
complex_parquet_v1_schema_change where col_add is not null order by col_add ;
"""
+ qt_parquet_v1_6 """ select id from complex_parquet_v1_schema_change
where col_add + 5 = id order by id; """
+ qt_parquet_v1_7 """ select rename_col7 from
complex_parquet_v1_schema_change order by id; """
+ qt_parquet_v1_8 """ select col_add2 from
complex_parquet_v1_schema_change where id >=7 order by id; """
+ qt_parquet_v1_9 """ select id,count(col_add) from
complex_parquet_v1_schema_change group by id order by id desc ; """
+ qt_parquet_v1_10 """ select col_add from
complex_parquet_v1_schema_change where col_add -1 = col_add2 order by id; """
+
+
+
+ qt_parquet_v2_1 """ desc complex_parquet_v2_schema_change ;"""
+ qt_parquet_v2_2 """ select * from complex_parquet_v2_schema_change
order by id; """
+ qt_parquet_v2_3 """ select count(*) from
complex_parquet_v2_schema_change ;"""
+ qt_parquet_v2_4 """ select id from complex_parquet_v2_schema_change
where col_add +1 = col_add2 order by id;"""
+ qt_parquet_v2_5 """ select col_add from
complex_parquet_v2_schema_change where col_add is not null order by col_add ;
"""
+ qt_parquet_v2_6 """ select id from complex_parquet_v2_schema_change
where col_add + 5 = id order by id; """
+ qt_parquet_v2_7 """ select rename_col7 from
complex_parquet_v2_schema_change order by id; """
+ qt_parquet_v2_8 """ select col_add2 from
complex_parquet_v2_schema_change where id >=7 order by id; """
+ qt_parquet_v2_9 """ select id,count(col_add) from
complex_parquet_v2_schema_change group by id order by id desc ; """
+ qt_parquet_v2_10 """ select col_add from
complex_parquet_v2_schema_change where col_add -1 = col_add2 order by id; """
+
+
+
+
+ qt_orc_v1_1 """ desc complex_orc_v1_schema_change ;"""
+ qt_orc_v1_2 """ select * from complex_orc_v1_schema_change order by
id; """
+ qt_orc_v1_3 """ select count(*) from complex_orc_v1_schema_change
;"""
+ qt_orc_v1_4 """ select id from complex_orc_v1_schema_change where
col_add +1 = col_add2 order by id;"""
+ qt_orc_v1_5 """ select col_add from complex_orc_v1_schema_change
where col_add is not null order by col_add ; """
+ qt_orc_v1_6 """ select id from complex_orc_v1_schema_change where
col_add + 5 = id order by id; """
+ qt_orc_v1_7 """ select rename_col7 from complex_orc_v1_schema_change
order by id; """
+ qt_orc_v1_8 """ select col_add2 from complex_orc_v1_schema_change
where id >=7 order by id; """
+ qt_orc_v1_9 """ select id,count(col_add) from
complex_orc_v1_schema_change group by id order by id desc ; """
+ qt_orc_v1_10 """ select col_add from complex_orc_v1_schema_change
where col_add -1 = col_add2 order by id; """
+
+
+
+ qt_orc_v2_1 """ desc complex_orc_v2_schema_change ;"""
+ qt_orc_v2_2 """ select * from complex_orc_v2_schema_change order by
id; """
+ qt_orc_v2_3 """ select count(*) from complex_orc_v2_schema_change
;"""
+ qt_orc_v2_4 """ select id from complex_orc_v2_schema_change where
col_add +1 = col_add2 order by id;"""
+ qt_orc_v2_5 """ select col_add from complex_orc_v2_schema_change
where col_add is not null order by col_add ; """
+ qt_orc_v2_6 """ select id from complex_orc_v2_schema_change where
col_add + 5 = id order by id; """
+ qt_orc_v2_7 """ select rename_col7 from complex_orc_v2_schema_change
order by id; """
+ qt_orc_v2_8 """ select col_add2 from complex_orc_v2_schema_change
where id >=7 order by id; """
+ qt_orc_v2_9 """ select id,count(col_add) from
complex_orc_v2_schema_change group by id order by id desc ; """
+ qt_orc_v2_10 """ select col_add from complex_orc_v2_schema_change
where col_add -1 = col_add2 order by id; """
+
+
+
+ }
+}
+/*
+before schema:
+ id int,
+ col1 array<int>,
+ col2 array<float>,
+ col3 array<decimal(12,4)>,
+ col4 map<int,int>,
+ col5 map<int,float>,
+ col6 map<int,decimal(12,5)>,
+ col7 struct<x:int,y:float,z:decimal(12,5)>,
+ col8 int,
+ col9 float,
+ col10 decimal(12,5),
+ col_del int
+
+
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col1.element type
bigint;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col2.element type
double;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col3.element type
decimal(20,4);
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col4.value type
bigint;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col5.value type
double;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col6.value type
decimal(20,5);
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.x type bigint;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.y type double;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.z type
decimal(20,5);
+alter table complex_parquet_v2_schema_change CHANGE COLUMN col8 col8 bigint;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN col9 col9 double;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN col10 col10
decimal(20,5);
+alter table complex_parquet_v2_schema_change drop column col7.z;
+alter table complex_parquet_v2_schema_change add column col7.add double;
+alter table complex_parquet_v2_schema_change change column col7.add first;
+alter table complex_parquet_v2_schema_change rename COLUMN col1 to rename_col1;
+alter table complex_parquet_v2_schema_change rename COLUMN col2 to rename_col2;
+alter table complex_parquet_v2_schema_change rename COLUMN col3 to rename_col3;
+alter table complex_parquet_v2_schema_change rename COLUMN col4 to rename_col4;
+alter table complex_parquet_v2_schema_change rename COLUMN col5 to rename_col5;
+alter table complex_parquet_v2_schema_change rename COLUMN col6 to rename_col6;
+alter table complex_parquet_v2_schema_change rename COLUMN col7 to rename_col7;
+alter table complex_parquet_v2_schema_change rename COLUMN col8 to rename_col8;
+alter table complex_parquet_v2_schema_change rename COLUMN col9 to rename_col9;
+alter table complex_parquet_v2_schema_change rename COLUMN col10 to
rename_col10;
+alter table complex_parquet_v2_schema_change drop column col_del;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col8 first;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col9 after
rename_col8;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col10 after
rename_col9;
+alter table complex_parquet_v2_schema_change add column col_add int;
+alter table complex_parquet_v2_schema_change add column col_add2 int;
+
+after schema:
+ rename_col8 bigint
+ rename_col9 double
+ rename_col10 decimal(20,5)
+ id int
+ rename_col1 array<bigint>
+ rename_col2 array<double>
+ rename_col3 array<decimal(20,4)>
+ rename_col4 map<int,bigint>
+ rename_col5 map<int,double>
+ rename_col6 map<int,decimal(20,5)>
+ rename_col7 struct<add:double,x:bigint,y:double>
+ col_add int
+ col_add2 int
+
+*/
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]