This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch orc
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/orc by this push:
     new 45123938e4a [Fix] Fixed issue with top level struct column having 
present stream failing to access repeatedly when deferred materialization 
occurs. (#309)
45123938e4a is described below

commit 45123938e4a39eb9d02c97d01036c7068426cbba
Author: Qi Chen <[email protected]>
AuthorDate: Thu Apr 24 02:09:18 2025 +0800

    [Fix] Fixed issue with top level struct column having present stream 
failing to access repeatedly when deferred materialization occurs. (#309)
---
 c++/src/ColumnReader.cc | 23 +++++++++++++----------
 c++/src/ColumnReader.hh |  5 +++--
 c++/src/Reader.cc       |  4 ++--
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 59b976f36ce..875ce81a9de 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -46,15 +46,17 @@ namespace orc {
     }
   }
 
-  ColumnReader::ColumnReader(const Type& _type, StripeStreams& stripe)
+  ColumnReader::ColumnReader(const Type& _type, StripeStreams& stripe, bool 
readPresentStream)
       : type(_type),
         columnId(type.getColumnId()),
         memoryPool(stripe.getMemoryPool()),
         metrics(stripe.getReaderMetrics()) {
-    std::unique_ptr<SeekableInputStream> stream =
-        stripe.getStream(columnId, proto::Stream_Kind_PRESENT, true);
-    if (stream.get()) {
-      notNullDecoder = createBooleanRleDecoder(std::move(stream), metrics);
+    if (readPresentStream) {
+      std::unique_ptr<SeekableInputStream> stream =
+          stripe.getStream(columnId, proto::Stream_Kind_PRESENT, true);
+      if (stream.get()) {
+        notNullDecoder = createBooleanRleDecoder(std::move(stream), metrics);
+      }
     }
   }
 
@@ -1109,7 +1111,8 @@ namespace orc {
     std::vector<std::unique_ptr<ColumnReader>> children;
 
    public:
-    StructColumnReader(const Type& type, StripeStreams& stipe, bool 
useTightNumericVector = false);
+    StructColumnReader(const Type& type, StripeStreams& stipe, bool 
useTightNumericVector = false,
+                       bool isTopLevel = false);
 
     uint64_t skip(uint64_t numValues, const ReadPhase& readPhase) override;
 
@@ -1133,8 +1136,8 @@ namespace orc {
   };
 
   StructColumnReader::StructColumnReader(const Type& type, StripeStreams& 
stripe,
-                                         bool useTightNumericVector)
-      : ColumnReader(type, stripe) {
+                                         bool useTightNumericVector, bool 
isTopLevel)
+      : ColumnReader(type, stripe, !isTopLevel) {
     // count the number of selected sub-columns
     const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
     switch (static_cast<int64_t>(stripe.getEncoding(columnId).kind())) {
@@ -2309,7 +2312,7 @@ namespace orc {
    * Create a reader for the given stripe.
    */
   std::unique_ptr<ColumnReader> buildReader(const Type& type, StripeStreams& 
stripe,
-                                            bool useTightNumericVector) {
+                                            bool useTightNumericVector, bool 
isTopLevel) {
     switch (static_cast<int64_t>(type.getKind())) {
       case SHORT: {
         if (useTightNumericVector) {
@@ -2363,7 +2366,7 @@ namespace orc {
         return std::make_unique<UnionColumnReader>(type, stripe, 
useTightNumericVector);
 
       case STRUCT:
-        return std::make_unique<StructColumnReader>(type, stripe, 
useTightNumericVector);
+        return std::make_unique<StructColumnReader>(type, stripe, 
useTightNumericVector, isTopLevel);
 
       case FLOAT: {
         if (useTightNumericVector) {
diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh
index 8c0e36bd353..b22bbc9617c 100644
--- a/c++/src/ColumnReader.hh
+++ b/c++/src/ColumnReader.hh
@@ -129,7 +129,7 @@ namespace orc {
     }
 
    public:
-    ColumnReader(const Type& type, StripeStreams& stipe);
+    ColumnReader(const Type& type, StripeStreams& stipe, bool 
readPresentStream = true);
 
     virtual ~ColumnReader();
 
@@ -188,7 +188,8 @@ namespace orc {
    * Create a reader for the given stripe.
    */
   std::unique_ptr<ColumnReader> buildReader(const Type& type, StripeStreams& 
stripe,
-                                            bool useTightNumericVector = 
false);
+                                            bool useTightNumericVector = false,
+                                            bool isTopLevel = false);
 
   void loadStringDicts(ColumnReader* columnReader,
                        const std::unordered_map<uint64_t, std::string>& 
columnIdToNameMap,
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 44f2be21e19..542eda5c9e2 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -1250,7 +1250,7 @@ namespace orc {
       StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo, 
currentStripeFooter,
                                       currentStripeInfo.offset(), 
*contents->stream, streams,
                                       writerTimezone, readerTimezone);
-      reader = buildReader(*contents->schema, stripeStreams, 
useTightNumericVector);
+      reader = buildReader(*contents->schema, stripeStreams, 
useTightNumericVector, true);
 
       if (stringDictFilter != nullptr) {
         std::list<std::string> dictFilterColumnNames;
@@ -1407,7 +1407,7 @@ namespace orc {
    * @return Id of the RowGroup that the row belongs to
    */
   int RowReaderImpl::computeRGIdx(uint64_t rowIndexStride, long rowIdx) {
-    return rowIndexStride == 0 ? 0 : (int)(rowIdx / rowIndexStride);
+    return rowIndexStride == 0 ? 0 : static_cast<int>(rowIdx / 
static_cast<long>(rowIndexStride));
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to