This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch orc
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/orc by this push:
new 45123938e4a [Fix] Fixed issue with top level struct column having
present stream failing to access repeatedly when deferred materialization
occurs. (#309)
45123938e4a is described below
commit 45123938e4a39eb9d02c97d01036c7068426cbba
Author: Qi Chen <[email protected]>
AuthorDate: Thu Apr 24 02:09:18 2025 +0800
[Fix] Fixed issue with top level struct column having present stream
failing to access repeatedly when deferred materialization occurs. (#309)
---
c++/src/ColumnReader.cc | 23 +++++++++++++----------
c++/src/ColumnReader.hh | 5 +++--
c++/src/Reader.cc | 4 ++--
3 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 59b976f36ce..875ce81a9de 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -46,15 +46,17 @@ namespace orc {
}
}
- ColumnReader::ColumnReader(const Type& _type, StripeStreams& stripe)
+ ColumnReader::ColumnReader(const Type& _type, StripeStreams& stripe, bool
readPresentStream)
: type(_type),
columnId(type.getColumnId()),
memoryPool(stripe.getMemoryPool()),
metrics(stripe.getReaderMetrics()) {
- std::unique_ptr<SeekableInputStream> stream =
- stripe.getStream(columnId, proto::Stream_Kind_PRESENT, true);
- if (stream.get()) {
- notNullDecoder = createBooleanRleDecoder(std::move(stream), metrics);
+ if (readPresentStream) {
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_PRESENT, true);
+ if (stream.get()) {
+ notNullDecoder = createBooleanRleDecoder(std::move(stream), metrics);
+ }
}
}
@@ -1109,7 +1111,8 @@ namespace orc {
std::vector<std::unique_ptr<ColumnReader>> children;
public:
- StructColumnReader(const Type& type, StripeStreams& stipe, bool
useTightNumericVector = false);
+ StructColumnReader(const Type& type, StripeStreams& stipe, bool
useTightNumericVector = false,
+ bool isTopLevel = false);
uint64_t skip(uint64_t numValues, const ReadPhase& readPhase) override;
@@ -1133,8 +1136,8 @@ namespace orc {
};
StructColumnReader::StructColumnReader(const Type& type, StripeStreams&
stripe,
- bool useTightNumericVector)
- : ColumnReader(type, stripe) {
+ bool useTightNumericVector, bool
isTopLevel)
+ : ColumnReader(type, stripe, !isTopLevel) {
// count the number of selected sub-columns
const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
switch (static_cast<int64_t>(stripe.getEncoding(columnId).kind())) {
@@ -2309,7 +2312,7 @@ namespace orc {
* Create a reader for the given stripe.
*/
std::unique_ptr<ColumnReader> buildReader(const Type& type, StripeStreams&
stripe,
- bool useTightNumericVector) {
+ bool useTightNumericVector, bool
isTopLevel) {
switch (static_cast<int64_t>(type.getKind())) {
case SHORT: {
if (useTightNumericVector) {
@@ -2363,7 +2366,7 @@ namespace orc {
return std::make_unique<UnionColumnReader>(type, stripe,
useTightNumericVector);
case STRUCT:
- return std::make_unique<StructColumnReader>(type, stripe,
useTightNumericVector);
+ return std::make_unique<StructColumnReader>(type, stripe,
useTightNumericVector, isTopLevel);
case FLOAT: {
if (useTightNumericVector) {
diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh
index 8c0e36bd353..b22bbc9617c 100644
--- a/c++/src/ColumnReader.hh
+++ b/c++/src/ColumnReader.hh
@@ -129,7 +129,7 @@ namespace orc {
}
public:
- ColumnReader(const Type& type, StripeStreams& stipe);
+ ColumnReader(const Type& type, StripeStreams& stipe, bool
readPresentStream = true);
virtual ~ColumnReader();
@@ -188,7 +188,8 @@ namespace orc {
* Create a reader for the given stripe.
*/
std::unique_ptr<ColumnReader> buildReader(const Type& type, StripeStreams&
stripe,
- bool useTightNumericVector =
false);
+ bool useTightNumericVector = false,
+ bool isTopLevel = false);
void loadStringDicts(ColumnReader* columnReader,
const std::unordered_map<uint64_t, std::string>&
columnIdToNameMap,
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 44f2be21e19..542eda5c9e2 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -1250,7 +1250,7 @@ namespace orc {
StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo,
currentStripeFooter,
currentStripeInfo.offset(),
*contents->stream, streams,
writerTimezone, readerTimezone);
- reader = buildReader(*contents->schema, stripeStreams,
useTightNumericVector);
+ reader = buildReader(*contents->schema, stripeStreams,
useTightNumericVector, true);
if (stringDictFilter != nullptr) {
std::list<std::string> dictFilterColumnNames;
@@ -1407,7 +1407,7 @@ namespace orc {
* @return Id of the RowGroup that the row belongs to
*/
int RowReaderImpl::computeRGIdx(uint64_t rowIndexStride, long rowIdx) {
- return rowIndexStride == 0 ? 0 : (int)(rowIdx / rowIndexStride);
+ return rowIndexStride == 0 ? 0 : static_cast<int>(rowIdx /
static_cast<long>(rowIndexStride));
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]