This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6f9eed180e0 [fix](iceberg)fix read parquet page index core when read
iceberg with equal delete. (#55190)
6f9eed180e0 is described below
commit 6f9eed180e0cd9c46253f7a778e2c237df10df6b
Author: daidai <[email protected]>
AuthorDate: Sun Sep 7 12:22:50 2025 +0800
[fix](iceberg)fix read parquet page index core when read iceberg with
equal delete. (#55190)
### What problem does this PR solve?
Related PR: #54240
Problem Summary:
In PR #54240, some logic for reading the Parquet page index was removed,
which causes a coredump when reading an Iceberg table with equal
deletes.
```
erminate called after throwing an instance of 'std::out_of_range'
what(): unordered_map::at
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at
/mnt/disk2/chenqi/doris-master/be/src/common/signal_handler.h:420
1# 0x00007F84D27531D0 in /lib64/libpthread.so.0
2# __GI_raise in /lib64/libc.so.6
3# abort in /lib64/libc.so.6
4# 0x000055F08125DBB2 in
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
5# __cxxabiv1::__terminate(void (*)()) in
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
6# 0x000055F08125C0B1 in
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
7# 0x000055F08125C204 in
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
8# std::__throw_out_of_range(char const*) in
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
9#
doris::vectorized::ParquetReader::_process_page_index(tparquet::RowGroup
const&, doris::vectorized::RowGroupReader::RowGroupIndex const&,
std::vector<doris::vectorized::RowRange,
std::allocator<doris::vectorized::RowRange> >&) at
/mnt/disk2/chenqi/doris-master/be/src/vec/exec/format/parquet/vparquet_reader.cpp:1037
10# doris::vectorized::ParquetReader::_next_row_group_reader() at
/mnt/disk2/chenqi/doris-master/be/src/vec/exec/format/parquet/vparquet_reader.cpp:846
11#
doris::vectorized::ParquetReader::get_next_block(doris::vectorized::Block*,
unsigned long*, bool*) at
/mnt/disk2/chenqi/doris-master/be/src/vec/exec/format/parquet/vparquet_reader.cpp:752
12#
doris::vectorized::IcebergTableReader::get_next_block_inner(doris::vectorized::Block*,
unsigned long*, bool*) at
/mnt/disk2/chenqi/doris-master/be/src/vec/exec/format/table/iceberg_reader.cpp:99
13#
doris::vectorized::TableFormatReader::get_next_block(doris::vectorized::Block*,
unsigned long*, bool*) in /mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
...
```
---
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 9 ++--
.../iceberg/test_iceberg_equal_delete.out | Bin 0 -> 217 bytes
.../iceberg/test_iceberg_equal_delete.groovy | 50 +++++++++++++++++++++
3 files changed, 56 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 09b1883f1a6..82b7bcf9c42 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -1036,7 +1036,8 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
return Status::OK();
}
PageIndex page_index;
- if (!config::enable_parquet_page_index ||
!_has_page_index(row_group.columns, page_index)) {
+ if (!config::enable_parquet_page_index ||
!_has_page_index(row_group.columns, page_index) ||
+ _colname_to_slot_id == nullptr) {
read_whole_row_group();
return Status::OK();
}
@@ -1065,8 +1066,10 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
for (size_t idx = 0; idx < _read_table_columns.size(); idx++) {
const auto& read_table_col = _read_table_columns[idx];
const auto& read_file_col = _read_file_columns[idx];
-
- DCHECK(_colname_to_slot_id != nullptr &&
_colname_to_slot_id->contains(read_table_col));
+ if (!_colname_to_slot_id->contains(read_table_col)) {
+ // equal delete may add column to read_table_col, but this column
no slot_id.
+ continue;
+ }
auto slot_id = _colname_to_slot_id->at(read_table_col);
if (!_push_down_simple_expr.contains(slot_id)) {
continue;
diff --git
a/regression-test/data/external_table_p2/iceberg/test_iceberg_equal_delete.out
b/regression-test/data/external_table_p2/iceberg/test_iceberg_equal_delete.out
new file mode 100644
index 00000000000..d25b832fe97
Binary files /dev/null and
b/regression-test/data/external_table_p2/iceberg/test_iceberg_equal_delete.out
differ
diff --git
a/regression-test/suites/external_table_p2/iceberg/test_iceberg_equal_delete.groovy
b/regression-test/suites/external_table_p2/iceberg/test_iceberg_equal_delete.groovy
new file mode 100644
index 00000000000..97761bdeb60
--- /dev/null
+++
b/regression-test/suites/external_table_p2/iceberg/test_iceberg_equal_delete.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_equal_delete",
"p2,external,iceberg,external_remote,external_remote_iceberg") {
+ String enabled = context.config.otherConfigs.get("enableIcebergTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ return
+ }
+
+ String catalog = "test_iceberg_equal_delete"
+ String access_key = context.config.otherConfigs.get("dlf_access_key")
+ String secret_key = context.config.otherConfigs.get("dlf_secret_key")
+
+
+ sql """drop catalog if exists ${catalog};"""
+ sql """
+ create catalog if not exists ${catalog} properties (
+ "warehouse" =
"oss://selectdb-qa-datalake-test/iceberg_temp/warehouse",
+ "type" = "iceberg",
+ "oss.secret_key" = "${secret_key}",
+ "oss.endpoint" = "oss-cn-beijing-internal.aliyuncs.com",
+ "oss.access_key" = "${access_key}",
+ "iceberg.catalog.type" = "hadoop"
+ );
+ """
+
+
+ sql """ use ${catalog}.flink_db """
+ String tb = """ sample """
+
+ qt_q1 """ select * from ${tb} order by id """
+ qt_q2 """ select data from ${tb} where data = "sample data 8"; """
+ qt_q3 """ select data from ${tb} where data = "sample data 3" """
+ qt_q4 """ select * from ${tb} where id = 10 """
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]