taiyang-li commented on code in PR #2048:
URL: https://github.com/apache/orc/pull/2048#discussion_r1816269321
##########
c++/src/Reader.cc:
##########
@@ -1474,6 +1476,77 @@ namespace orc {
return ret;
}
+ void ReaderImpl::releaseBuffer(uint64_t boundary) {
+ if (readCache_) {
+ readCache_->evictEntriesBefore(boundary);
+ }
+ }
+
+ void ReaderImpl::preBuffer(const std::vector<int>& stripes,
+ const std::list<uint64_t>& includeTypes, const
CacheOptions& options) {
+ if (stripes.empty() || includeTypes.empty()) {
+ return;
+ }
+
+ orc::RowReaderOptions row_reader_options;
+ row_reader_options.includeTypes(includeTypes);
+ ColumnSelector column_selector(contents_.get());
+ std::vector<bool> selected_columns;
+ column_selector.updateSelected(selected_columns, row_reader_options);
+
+ std::vector<ReadRange> ranges;
+ ranges.reserve(includeTypes.size());
+ for (auto stripe : stripes) {
+ // get stripe information
+ const auto& stripe_info = footer_->stripes(stripe);
+ uint64_t stripe_footer_start =
+ stripe_info.offset() + stripe_info.index_length() +
stripe_info.data_length();
+ uint64_t stripe_footer_length = stripe_info.footer_length();
+
+ // get stripe footer
+ std::unique_ptr<SeekableInputStream> pb_stream = createDecompressor(
+ contents_->compression,
+ std::make_unique<SeekableFileInputStream>(contents_->stream.get(),
stripe_footer_start,
+ stripe_footer_length,
*contents_->pool),
+ contents_->blockSize, *contents_->pool, contents_->readerMetrics);
+ proto::StripeFooter stripe_footer;
+ if (!stripe_footer.ParseFromZeroCopyStream(pb_stream.get())) {
+ throw ParseError(std::string("bad StripeFooter from ") +
pb_stream->getName());
+ }
+
+ // traverse all streams in stripe footer, choose selected streams to
prebuffer
+ uint64_t offset = stripe_info.offset();
+ for (int i = 0; i < stripe_footer.streams_size(); i++) {
+ const proto::Stream& stream = stripe_footer.streams(i);
+ if (offset + stream.length() > stripe_footer_start) {
+ std::stringstream msg;
+ msg << "Malformed stream meta at stream index " << i << " in stripe
" << stripe
+ << ": streamOffset=" << offset << ", streamLength=" <<
stream.length()
+ << ", stripeOffset=" << stripe_info.offset()
+ << ", stripeIndexLength=" << stripe_info.index_length()
+ << ", stripeDataLength=" << stripe_info.data_length();
+ throw ParseError(msg.str());
+ }
+
+ if (stream.has_kind() && selected_columns[stream.column()]) {
+ const auto& kind = stream.kind();
+ if (kind == proto::Stream_Kind_DATA || kind ==
proto::Stream_Kind_DICTIONARY_DATA ||
Review Comment:

Above codes show where `getStream` is called in column reader. Only those
kind of streams are used in column reader. It is designed on purpose to avoid
useless prefetch.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]