dutyu opened a new pull request, #26194:
URL: https://github.com/apache/doris/pull/26194
## Proposed changes
Some times there are some temporary files which are generated by some spark
jobs, we need filter these files.
```
errCode = 2, detailMessage = (xxx.xxx.xxx.xxx.xxx)
[CANCELLED][INTERNAL_ERROR]failed to init reader for file
hdfs://xxx/user/hive/warehouse/xxx.db/xxx/_temporary/0/_temporary/attempt_202311010952379045223848154629697_0013_m_000442_3355/part-00442-3f97ac18-0583-4d2c-8995-8d21ceda56ec-c000.snappy.orc,
err: [INTERNAL_ERROR]Init OrcReader failed. reason = Failed to parse the
postscript from
hdfs://xxx/user/hive/warehouse/xxx.db/xxx/_temporary/0/_temporary/attempt_202311010952379045223848154629697_0013_m_000442_3355/part-00442-3f97ac18-0583-4d2c-8995-8d21ceda56ec-c000.snappy.orc
0. @ 0x000000000bdbd89f
doris::Status doris::Status::Error<6, true,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>
>&>(std::basic_string_view<char, std::char_traits<char> >,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>
>&)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:173
in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
1. @ 0x000000000eccecfe
doris::vectorized::OrcReader::_create_file_reader()
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187
in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
2. @ 0x000000000ecceff9
doris::vectorized::OrcReader::init_reader(std::vector<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> >,
std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> > > > const*,
std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >,
std::variant<doris::ColumnValueRange<(doris::PrimitiveType)3>,
doris::ColumnValueRange<(doris::PrimitiveType)4>,
doris::ColumnValueRange<(doris::PrimitiveType)5>,
doris::ColumnValueRange<(doris::PrimitiveType)6>,
doris::ColumnValueRange<(doris::PrimitiveType)7>,
doris::ColumnValueRange<(doris::PrimitiveType)15>,
doris::ColumnValueRange<(doris::PrimitiveType)10>,
doris::ColumnValueRange<(doris::PrimitiveType)23>,
doris::ColumnValueRange<(doris::PrimitiveType)11>,
doris::ColumnValueRange<(doris::PrimitiveType)25>,
doris::ColumnValueRange<(doris::PrimitiveType)12>,
doris::ColumnValueRange<(doris::PrimitiveType)26>,
doris::ColumnValueRange<(doris
::PrimitiveType)20>, doris::ColumnValueRange<(doris::PrimitiveType)2>,
doris::ColumnValueRange<(doris::PrimitiveType)19>,
doris::ColumnValueRange<(doris::PrimitiveType)28>,
doris::ColumnValueRange<(doris::PrimitiveType)29>,
doris::ColumnValueRange<(doris::PrimitiveType)30> >,
std::hash<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > >,
std::allocator<std::pair<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > const,
std::variant<doris::ColumnValueRange<(doris::PrimitiveType)3>,
doris::ColumnValueRange<(doris::PrimitiveType)4>,
doris::ColumnValueRange<(doris::PrimitiveType)5>,
doris::ColumnValueRange<(doris::PrimitiveType)6>,
doris::ColumnValueRange<(doris::PrimitiveType)7>,
doris::ColumnValueRange<(doris::PrimitiveType)15>,
doris::ColumnValueRange<(doris::PrimitiveType)10>,
doris::ColumnValueRange<(doris::PrimitiveType)23>, do
ris::ColumnValueRange<(doris::PrimitiveType)11>,
doris::ColumnValueRange<(doris::PrimitiveType)25>,
doris::ColumnValueRange<(doris::PrimitiveType)12>,
doris::ColumnValueRange<(doris::PrimitiveType)26>,
doris::ColumnValueRange<(doris::PrimitiveType)20>,
doris::ColumnValueRange<(doris::PrimitiveType)2>,
doris::ColumnValueRange<(doris::PrimitiveType)19>,
doris::ColumnValueRange<(doris::PrimitiveType)28>,
doris::ColumnValueRange<(doris::PrimitiveType)29>,
doris::ColumnValueRange<(doris::PrimitiveType)30> > > > >*,
std::vector<std::shared_ptr<doris::vectorized::VExprContext>,
std::allocator<std::shared_ptr<doris::vectorized::VExprContext> > > const&,
bool, doris::TupleDescriptor const*, doris::RowDescriptor const*,
std::vector<std::shared_ptr<doris::vectorized::VExprContext>,
std::allocator<std::shared_ptr<doris::vectorized::VExprContext> > > const*,
std::unordered_map<int,
std::vector<std::shared_ptr<doris::vectorized::VExprContext>,
std::allocator<std::shared_ptr<doris::vectorized::VEx
prContext> > >, std::hash<int>, std::equal_to<int>,
std::allocator<std::pair<int const,
std::vector<std::shared_ptr<doris::vectorized::VExprContext>,
std::allocator<std::shared_ptr<doris::vectorized::VExprContext> > > > > >
const*) /root/doris-master/be/src/common/status.h:448 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
3. @ 0x000000000ff43da0
doris::vectorized::VFileScanner::_get_next_reader()
/root/doris-master/be/src/vec/exec/scan/vfile_scanner.cpp:800 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
4. @ 0x000000000ff4256f
doris::vectorized::VFileScanner::_get_block_impl(doris::RuntimeState*,
doris::vectorized::Block*, bool*)
/root/doris-master/be/src/common/status.h:448 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
5. @ 0x000000000ffdb1aa
doris::vectorized::VScanner::get_block(doris::RuntimeState*,
doris::vectorized::Block*, bool*)
/root/doris-master/be/src/vec/exec/scan/vscanner.cpp:0 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
6. @ 0x000000000ff3b379
doris::vectorized::ScannerScheduler::_scanner_scan(doris::vectorized::ScannerScheduler*,
doris::vectorized::ScannerContext*,
std::shared_ptr<doris::vectorized::VScanner>)
/root/doris-master/be/src/common/status.h:346 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
7. @ 0x000000000ff3ca11 std::_Function_handler<void (),
doris::vectorized::ScannerScheduler::_schedule_scanners(doris::vectorized::ScannerContext*)::$_1::operator()()
const::{lambda()#4}>::_M_invoke(std::_Any_data const&)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:701
in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
8. @ 0x000000000c986608
doris::ThreadPool::dispatch_thread()
/root/doris-master/be/src/util/threadpool.cpp:0 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
9. @ 0x000000000c97c2ac
doris::Thread::supervise_thread(void*)
/var/local/ldb-toolchain/bin/../usr/include/pthread.h:562 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
10. @ 0x0000000000007ea5 start_thread in /usr/lib64/libpthread-2.17.so 11.
@ 0x00000000000feb0d __clone in /usr/lib64/libc-2.17.so 0. @
0x000000000c647366
doris::Status doris::Status::Error<6, true,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&, std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> > >(std::basic_string_view<char, std::char_traits<char> >,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&, std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >&&)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:173
in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
1. @ 0x000000000ff42dc3
doris::vectorized::VFileScanner::_get_next_reader()
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187
in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
2. @ 0x000000000ff4256f
doris::vectorized::VFileScanner::_get_block_impl(doris::RuntimeState*,
doris::vectorized::Block*, bool*)
/root/doris-master/be/src/common/status.h:448 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
3. @ 0x000000000ffdb1aa
doris::vectorized::VScanner::get_block(doris::RuntimeState*,
doris::vectorized::Block*, bool*)
/root/doris-master/be/src/vec/exec/scan/vscanner.cpp:0 in
/hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be
4. @ 0x000000000ff3b379
doris::vectorized::ScannerScheduler::_scanner_scan(doris::vectorized::ScannerScheduler*,
doris::vectorized::ScannerContext*,
std::shared_ptr<doris::vectorized::VScanner>)
/root/doris-master/be/src/common/status.h:346 in
/hdpData10/yarn/localcache/usercache/hive/appcache/a…
```
## Further comments
If this is a relatively large or complex change, kick off the discussion at
[[email protected]](mailto:[email protected]) by explaining why you
chose the solution you did and what alternatives you considered, etc...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]