ffacs commented on code in PR #2048:
URL: https://github.com/apache/orc/pull/2048#discussion_r1816237258
##########
c++/src/Reader.cc:
##########
@@ -1474,6 +1476,77 @@ namespace orc {
return ret;
}
+ void ReaderImpl::releaseBuffer(uint64_t boundary) {
+ if (readCache_) {
+ readCache_->evictEntriesBefore(boundary);
+ }
+ }
+
+ void ReaderImpl::preBuffer(const std::vector<int>& stripes,
+ const std::list<uint64_t>& includeTypes, const
CacheOptions& options) {
+ if (stripes.empty() || includeTypes.empty()) {
+ return;
+ }
+
+ orc::RowReaderOptions row_reader_options;
+ row_reader_options.includeTypes(includeTypes);
+ ColumnSelector column_selector(contents_.get());
+ std::vector<bool> selected_columns;
+ column_selector.updateSelected(selected_columns, row_reader_options);
+
+ std::vector<ReadRange> ranges;
+ ranges.reserve(includeTypes.size());
+ for (auto stripe : stripes) {
+ // get stripe information
+ const auto& stripe_info = footer_->stripes(stripe);
+ uint64_t stripe_footer_start =
+ stripe_info.offset() + stripe_info.index_length() +
stripe_info.data_length();
+ uint64_t stripe_footer_length = stripe_info.footer_length();
+
+ // get stripe footer
+ std::unique_ptr<SeekableInputStream> pb_stream = createDecompressor(
+ contents_->compression,
+ std::make_unique<SeekableFileInputStream>(contents_->stream.get(),
stripe_footer_start,
+ stripe_footer_length,
*contents_->pool),
+ contents_->blockSize, *contents_->pool, contents_->readerMetrics);
+ proto::StripeFooter stripe_footer;
+ if (!stripe_footer.ParseFromZeroCopyStream(pb_stream.get())) {
+ throw ParseError(std::string("bad StripeFooter from ") +
pb_stream->getName());
+ }
+
+ // traverse all streams in stripe footer, choose selected streams to
prebuffer
+ uint64_t offset = stripe_info.offset();
+ for (int i = 0; i < stripe_footer.streams_size(); i++) {
+ const proto::Stream& stream = stripe_footer.streams(i);
+ if (offset + stream.length() > stripe_footer_start) {
+ std::stringstream msg;
+ msg << "Malformed stream meta at stream index " << i << " in stripe
" << stripe
+ << ": streamOffset=" << offset << ", streamLength=" <<
stream.length()
+ << ", stripeOffset=" << stripe_info.offset()
+ << ", stripeIndexLength=" << stripe_info.index_length()
+ << ", stripeDataLength=" << stripe_info.data_length();
+ throw ParseError(msg.str());
+ }
+
+ if (stream.has_kind() && selected_columns[stream.column()]) {
+ const auto& kind = stream.kind();
+ if (kind == proto::Stream_Kind_DATA || kind ==
proto::Stream_Kind_DICTIONARY_DATA ||
Review Comment:
Why don't we prefetch other kind of streams here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]