This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7b270d1ae9 [Fix](mutli-catalog) Fix orc reader crashed when hdfs 
reading error by catching exception. (#22193)
7b270d1ae9 is described below

commit 7b270d1ae9b8ca76ac613141ac820301d513835c
Author: Qi Chen <[email protected]>
AuthorDate: Wed Jul 26 08:57:31 2023 +0800

    [Fix](mutli-catalog) Fix orc reader crashed when hdfs reading error by 
catching exception. (#22193)
    
    orc reader crashed when hdfs reading error.
    
    0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/home/zcp/repo_center/zcp_repo/be/src/common/signal_handler.h:413
    1# 0x00007F6F8B3C00C0 in /lib/x86_64-linux-gnu/libc.so.6
    2# raise in /lib/x86_64-linux-gnu/libc.so.6
    3# abort in /lib/x86_64-linux-gnu/libc.so.6
    4# _gnu_cxx::_verbose_terminate_handler() [clone .cold] at 
../../../../libstdc+-v3/libsupc+/vterminate.cc:75
    5# _cxxabiv1::_terminate(void ()) at 
../../../../libstdc+-v3/libsupc+/eh_terminate.cc:48
    6# 0x0000555CBC4718C1 in /mnt/hdd01/STRESS_ENV/be/lib/doris_be
    7# 0x0000555CBC471A14 in /mnt/hdd01/STRESS_ENV/be/lib/doris_be
    8# doris::vectorized::ORCFileInputStream::read(void*, unsigned long, 
unsigned long) at 
/home/zcp/repo_center/zcp_repo/be/src/vec/exec/format/orc/vorc_reader.cpp:121
    9# orc::SeekableFileInputStream::Next(void const*, int) in 
/mnt/hdd01/STRESS_ENV/be/lib/doris_be
    10# orc::DecompressionStream::readHeader() in 
/mnt/hdd01/STRESS_ENV/be/lib/doris_be
    11# orc::DecompressionStream::Next(void const*, int) in 
/mnt/hdd01/STRESS_ENV/be/lib/doris_be
    12# void orc::RleDecoderV2::next<long>(long*, unsigned long, char const*) 
in /mnt/hdd01/STRESS_ENV/be/lib/doris_be
    13# orc::StringDictionaryColumnReader::loadDictionary() in 
/mnt/hdd01/STRESS_ENV/be/lib/doris_be
    14# orc::StructColumnReader::loadStringDicts(std::unordered_map<unsigned 
long, std::_cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char> >, std::hash<unsigned long>, std::equal_to<unsigned long>, 
std::allocator<std::pair<unsigned long const, std::cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > > > > const&, 
std::unordered_map<std::cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char> >, orc::StringDictionary*, std::hash<std: [...]
    15# orc::RowReaderImpl::startNextStripe(orc::ReadPhase const&) in 
/mnt/hdd01/STRESS_ENV/be/lib/doris_be
    16# orc::RowReaderImpl::nextBatch(orc::ColumnVectorBatch&, void*) in 
/mnt/hdd01/STRESS_ENV/be/lib/doris_be
    17# doris::vectorized::OrcReader::get_next_block(doris::vectorized::Block*, 
unsigned long*, bool*) at 
/home/zcp/repo_center/zcp_repo/be/src/vec/exec/format/orc/vorc_reader.cpp:1420
    18# doris::vectorized::VFileScanner::_get_block_impl(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) at 
/home/zcp/repo_center/zcp_repo/be/src/vec/exec/scan/vfile_scanner.cpp:250
    19# doris::vectorized::VScanner::get_block(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) in /mnt/hdd01/STRESS_ENV/be/lib/doris_be
    20# 
doris::vectorized::ScannerScheduler::_scanner_scan(doris::vectorized::ScannerScheduler*,
 doris::vectorized::ScannerContext*, 
std::shared_ptr<doris::vectorized::VScanner>) at 
/home/zcp/repo_center/zcp_repo/be/src/vec/exec/scan/scanner_scheduler.cpp:335
    21# std::_Function_handler<void (), 
doris::vectorized::ScannerScheduler::_schedule_scanners(doris::vectorized::ScannerContext*)::$_1::operator()()
 const::
---
 be/src/vec/exec/format/orc/vorc_reader.cpp | 50 ++++++++++++++++++------------
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index e81f42a98e..77fd37e913 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -755,14 +755,14 @@ Status OrcReader::set_fill_columns(
     _fill_all_columns = true;
 
     // create orc row reader
-    _row_reader_options.range(_range_start_offset, _range_size);
-    _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : 
_ctz);
-    _row_reader_options.include(_read_cols);
-    if (_lazy_read_ctx.can_lazy_read) {
-        _row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns);
-        _orc_filter = std::unique_ptr<ORCFilterImpl>(new ORCFilterImpl(this));
-    }
     try {
+        _row_reader_options.range(_range_start_offset, _range_size);
+        _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : 
_ctz);
+        _row_reader_options.include(_read_cols);
+        if (_lazy_read_ctx.can_lazy_read) {
+            _row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns);
+            _orc_filter = std::unique_ptr<ORCFilterImpl>(new 
ORCFilterImpl(this));
+        }
         _row_reader_options.setEnableLazyDecoding(true);
         if (!_lazy_read_ctx.conjuncts.empty()) {
             _string_dict_filter = std::make_unique<StringDictFilterImpl>(this);
@@ -770,12 +770,12 @@ Status OrcReader::set_fill_columns(
         _row_reader = _reader->createRowReader(_row_reader_options, 
_orc_filter.get(),
                                                _string_dict_filter.get());
         _batch = _row_reader->createRowBatch(_batch_size);
+        auto& selected_type = _row_reader->getSelectedType();
+        int idx = 0;
+        _init_select_types(selected_type, idx);
     } catch (std::exception& e) {
         return Status::InternalError("Failed to create orc row reader. reason 
= {}", e.what());
     }
-    auto& selected_type = _row_reader->getSelectedType();
-    int idx = 0;
-    _init_select_types(selected_type, idx);
 
     if (!_slot_id_to_filter_conjuncts) {
         return Status::OK();
@@ -1383,11 +1383,16 @@ Status OrcReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof) {
             SCOPED_RAW_TIMER(&_statistics.get_batch_time);
             // reset decimal_scale_params_index;
             _decimal_scale_params_index = 0;
-            rr = _row_reader->nextBatch(*_batch, block);
-            if (rr == 0) {
-                *eof = true;
-                *read_rows = 0;
-                return Status::OK();
+            try {
+                rr = _row_reader->nextBatch(*_batch, block);
+                if (rr == 0) {
+                    *eof = true;
+                    *read_rows = 0;
+                    return Status::OK();
+                }
+            } catch (std::exception& e) {
+                return Status::InternalError("Orc row reader nextBatch failed. 
reason = {}",
+                                             e.what());
             }
         }
 
@@ -1434,11 +1439,16 @@ Status OrcReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof) {
             SCOPED_RAW_TIMER(&_statistics.get_batch_time);
             // reset decimal_scale_params_index;
             _decimal_scale_params_index = 0;
-            rr = _row_reader->nextBatch(*_batch, block);
-            if (rr == 0) {
-                *eof = true;
-                *read_rows = 0;
-                return Status::OK();
+            try {
+                rr = _row_reader->nextBatch(*_batch, block);
+                if (rr == 0) {
+                    *eof = true;
+                    *read_rows = 0;
+                    return Status::OK();
+                }
+            } catch (std::exception& e) {
+                return Status::InternalError("Orc row reader nextBatch failed. 
reason = {}",
+                                             e.what());
             }
         }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to