This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch orc
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/orc by this push:
     new 78bbe2e4 [Fix] Fix load string dict issue for transactional hive 
tables. (#112)
78bbe2e4 is described below

commit 78bbe2e41f2140b803855d683fae5e1a4b734a37
Author: Qi Chen <[email protected]>
AuthorDate: Tue Aug 22 10:15:37 2023 +0800

    [Fix] Fix load string dict issue for transactional hive tables. (#112)
---
 c++/src/ColumnReader.cc | 72 ++++++++++++++++++++++++++++++++++++++++++-------
 c++/src/ColumnReader.hh |  5 ++++
 2 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 525f62c4..e94253e3 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -729,6 +729,7 @@ namespace orc {
     void nextInternalWithFilter(ColumnVectorBatch& rowBatch, uint64_t 
numValues, char* notNull,
                                 const ReadPhase& readPhase, uint16_t* 
sel_rowid_idx,
                                 size_t sel_size);
+    StringDictionary* loadDictionary();
 
    public:
     StringDictionaryColumnReader(const Type& type, StripeStreams& stipe);
@@ -745,7 +746,9 @@ namespace orc {
     void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& 
positions,
                         const ReadPhase& readPhase) override;
 
-    StringDictionary* loadDictionary();
+    void loadStringDicts(const std::unordered_map<uint64_t, std::string>& 
columnIdToNameMap,
+                         std::unordered_map<std::string, StringDictionary*>* 
columnNameToDictMap,
+                         const StringDictFilter* stringDictFilter) override;
   };
 
   StringDictionaryColumnReader::StringDictionaryColumnReader(const Type& type,
@@ -900,6 +903,17 @@ namespace orc {
     rle->seek(positions.at(columnId));
   }
 
+  void StringDictionaryColumnReader::loadStringDicts(
+      const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+      std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
+      const StringDictFilter* stringDictFilter) {
+    auto iter = columnIdToNameMap.find(getType().getColumnId());
+    if (iter == columnIdToNameMap.end()) {
+      return;
+    }
+    (*columnNameToDictMap)[iter->second] = loadDictionary();
+  }
+
   StringDictionary* StringDictionaryColumnReader::loadDictionary() {
     if (dictionaryLoaded) {
       return dictionary.get();
@@ -1110,7 +1124,7 @@ namespace orc {
 
     void loadStringDicts(const std::unordered_map<uint64_t, std::string>& 
columnIdToNameMap,
                          std::unordered_map<std::string, StringDictionary*>* 
columnNameToDictMap,
-                         const StringDictFilter* stringDictFilter);
+                         const StringDictFilter* stringDictFilter) override;
 
    private:
     template <bool encoded>
@@ -1198,14 +1212,7 @@ namespace orc {
       std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
       const StringDictFilter* stringDictFilter) {
     for (auto& ptr : children) {
-      auto iter = columnIdToNameMap.find(ptr->getType().getColumnId());
-      if (iter == columnIdToNameMap.end()) {
-        continue;
-      }
-      auto* stringDictionaryColumnReader = 
dynamic_cast<StringDictionaryColumnReader*>(ptr.get());
-      if (stringDictionaryColumnReader != nullptr) {
-        (*columnNameToDictMap)[iter->second] = 
stringDictionaryColumnReader->loadDictionary();
-      }
+      ptr->loadStringDicts(columnIdToNameMap, columnNameToDictMap, 
stringDictFilter);
     }
   }
 
@@ -1229,6 +1236,10 @@ namespace orc {
     void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& 
positions,
                         const ReadPhase& readPhase) override;
 
+    void loadStringDicts(const std::unordered_map<uint64_t, std::string>& 
columnIdToNameMap,
+                         std::unordered_map<std::string, StringDictionary*>* 
columnNameToDictMap,
+                         const StringDictFilter* stringDictFilter) override;
+
    private:
     template <bool encoded>
     void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char* 
notNull,
@@ -1339,6 +1350,15 @@ namespace orc {
     }
   }
 
+  void ListColumnReader::loadStringDicts(
+      const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+      std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
+      const StringDictFilter* stringDictFilter) {
+    if (child.get()) {
+      child->loadStringDicts(columnIdToNameMap, columnNameToDictMap, 
stringDictFilter);
+    }
+  }
+
   class MapColumnReader : public ColumnReader {
    private:
     std::unique_ptr<ColumnReader> keyReader;
@@ -1360,6 +1380,10 @@ namespace orc {
     void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& 
positions,
                         const ReadPhase& readPhase) override;
 
+    void loadStringDicts(const std::unordered_map<uint64_t, std::string>& 
columnIdToNameMap,
+                         std::unordered_map<std::string, StringDictionary*>* 
columnNameToDictMap,
+                         const StringDictFilter* stringDictFilter) override;
+
    private:
     template <bool encoded>
     void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char* 
notNull,
@@ -1489,6 +1513,18 @@ namespace orc {
     }
   }
 
+  void MapColumnReader::loadStringDicts(
+      const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+      std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
+      const StringDictFilter* stringDictFilter) {
+    if (keyReader.get()) {
+      keyReader->loadStringDicts(columnIdToNameMap, columnNameToDictMap, 
stringDictFilter);
+    }
+    if (elementReader.get()) {
+      elementReader->loadStringDicts(columnIdToNameMap, columnNameToDictMap, 
stringDictFilter);
+    }
+  }
+
   class UnionColumnReader : public ColumnReader {
    private:
     std::unique_ptr<ByteRleDecoder> rle;
@@ -1510,6 +1546,10 @@ namespace orc {
     void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>& 
positions,
                         const ReadPhase& readPhase) override;
 
+    void loadStringDicts(const std::unordered_map<uint64_t, std::string>& 
columnIdToNameMap,
+                         std::unordered_map<std::string, StringDictionary*>* 
columnNameToDictMap,
+                         const StringDictFilter* stringDictFilter) override;
+
    private:
     template <bool encoded>
     void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char* 
notNull,
@@ -1624,6 +1664,18 @@ namespace orc {
     }
   }
 
+  void UnionColumnReader::loadStringDicts(
+      const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+      std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
+      const StringDictFilter* stringDictFilter) {
+    for (size_t i = 0; i < numChildren; ++i) {
+      if (childrenReader[i] != nullptr) {
+        childrenReader[i]->loadStringDicts(columnIdToNameMap, 
columnNameToDictMap,
+                                           stringDictFilter);
+      }
+    }
+  }
+
   /**
    * Destructively convert the number from zigzag encoding to the
    * natural signed representation.
diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh
index c437d7cc..8c0e36bd 100644
--- a/c++/src/ColumnReader.hh
+++ b/c++/src/ColumnReader.hh
@@ -177,6 +177,11 @@ namespace orc {
      */
     virtual void seekToRowGroup(std::unordered_map<uint64_t, 
PositionProvider>& positions,
                                 const ReadPhase& readPhase = ReadPhase::ALL);
+
+    virtual void loadStringDicts(
+        const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+        std::unordered_map<std::string, StringDictionary*>* 
columnNameToDictMap,
+        const StringDictFilter* stringDictFilter) {}
   };
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to