Gabriel39 commented on code in PR #10016:
URL: https://github.com/apache/incubator-doris/pull/10016#discussion_r892117835
##########
be/src/olap/rowset/segment_v2/column_reader.h:
##########
@@ -298,10 +298,10 @@ class FileColumnIterator final : public ColumnIterator {
ParsedPage _page;
// keep dict page decoder
- std::unique_ptr<PageDecoder> _dict_decoder;
+ std::shared_ptr<BinaryPlainPageDecoder> _dict_decoder;
// keep dict page handle to avoid released
- PageHandle _dict_page_handle;
+ std::shared_ptr<PageHandle> _dict_page_handle;
Review Comment:
Could this variable be deleted safely? I notice that this variable will be
set to a new value each time so I think it's unnecessary to use as a member
variable.
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -679,22 +702,29 @@ Status FileColumnIterator::_read_data_page(const
OrdinalPageIndexIterator& iter)
Slice dict_data;
PageFooterPB dict_footer;
_opts.type = INDEX_PAGE;
+ _dict_page_handle = std::make_shared<PageHandle>();
RETURN_IF_ERROR(_reader->read_page(_opts,
_reader->get_dict_page_pointer(),
- &_dict_page_handle,
&dict_data, &dict_footer,
- _compress_codec.get()));
+ _dict_page_handle.get(),
&dict_data,
+ &dict_footer,
_compress_codec.get()));
// ignore dict_footer.dict_page_footer().encoding() due to only
// PLAIN_ENCODING is supported for dict page right now
- _dict_decoder =
std::make_unique<BinaryPlainPageDecoder>(dict_data);
+ _dict_decoder =
std::make_shared<BinaryPlainPageDecoder>(dict_data);
RETURN_IF_ERROR(_dict_decoder->init());
- auto* pd_decoder =
(BinaryPlainPageDecoder*)_dict_decoder.get();
+ auto* pd_decoder = _dict_decoder.get();
_dict_word_info.reset(new StringRef[pd_decoder->_num_elems]);
pd_decoder->get_dict_word_info(_dict_word_info.get());
+ _page.dict_page_handle = _dict_page_handle;
}
- dict_page_decoder->set_dict_decoder(_dict_decoder.get(),
_dict_word_info.get());
+ dict_page_decoder->set_dict_decoder(_dict_decoder,
_dict_word_info);
}
}
+
+ if (_need_cache_parsed_page) {
+ ParsedPage* cache_page = new ParsedPage(_page);
Review Comment:
Why we need to clone a cache page? Looks like we have already cached this
page and don't need to duplicate another one
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -468,6 +468,12 @@ FileColumnIterator::FileColumnIterator(ColumnReader*
reader) : _reader(reader) {
Status FileColumnIterator::init(const ColumnIteratorOptions& opts) {
_opts = opts;
RETURN_IF_ERROR(get_block_compression_codec(_reader->get_compression(),
_compress_codec));
+ auto encoding = _reader->encoding_info()->encoding();
+ _need_cache_parsed_page =
Review Comment:
should we add a new config to enable or disable this feature here? If budget
for page cache is poor, I'm worried that it will lead to serious performance
fallback as we cache a totally parsed page.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]