This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch orc
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/orc by this push:
new 78bbe2e4 [Fix] Fix load string dict issue for transactional hive
tables. (#112)
78bbe2e4 is described below
commit 78bbe2e41f2140b803855d683fae5e1a4b734a37
Author: Qi Chen <[email protected]>
AuthorDate: Tue Aug 22 10:15:37 2023 +0800
[Fix] Fix load string dict issue for transactional hive tables. (#112)
---
c++/src/ColumnReader.cc | 72 ++++++++++++++++++++++++++++++++++++++++++-------
c++/src/ColumnReader.hh | 5 ++++
2 files changed, 67 insertions(+), 10 deletions(-)
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 525f62c4..e94253e3 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -729,6 +729,7 @@ namespace orc {
void nextInternalWithFilter(ColumnVectorBatch& rowBatch, uint64_t
numValues, char* notNull,
const ReadPhase& readPhase, uint16_t*
sel_rowid_idx,
size_t sel_size);
+ StringDictionary* loadDictionary();
public:
StringDictionaryColumnReader(const Type& type, StripeStreams& stipe);
@@ -745,7 +746,9 @@ namespace orc {
void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>&
positions,
const ReadPhase& readPhase) override;
- StringDictionary* loadDictionary();
+ void loadStringDicts(const std::unordered_map<uint64_t, std::string>&
columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>*
columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) override;
};
StringDictionaryColumnReader::StringDictionaryColumnReader(const Type& type,
@@ -900,6 +903,17 @@ namespace orc {
rle->seek(positions.at(columnId));
}
+ void StringDictionaryColumnReader::loadStringDicts(
+ const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) {
+ auto iter = columnIdToNameMap.find(getType().getColumnId());
+ if (iter == columnIdToNameMap.end()) {
+ return;
+ }
+ (*columnNameToDictMap)[iter->second] = loadDictionary();
+ }
+
StringDictionary* StringDictionaryColumnReader::loadDictionary() {
if (dictionaryLoaded) {
return dictionary.get();
@@ -1110,7 +1124,7 @@ namespace orc {
void loadStringDicts(const std::unordered_map<uint64_t, std::string>&
columnIdToNameMap,
std::unordered_map<std::string, StringDictionary*>*
columnNameToDictMap,
- const StringDictFilter* stringDictFilter);
+ const StringDictFilter* stringDictFilter) override;
private:
template <bool encoded>
@@ -1198,14 +1212,7 @@ namespace orc {
std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
const StringDictFilter* stringDictFilter) {
for (auto& ptr : children) {
- auto iter = columnIdToNameMap.find(ptr->getType().getColumnId());
- if (iter == columnIdToNameMap.end()) {
- continue;
- }
- auto* stringDictionaryColumnReader =
dynamic_cast<StringDictionaryColumnReader*>(ptr.get());
- if (stringDictionaryColumnReader != nullptr) {
- (*columnNameToDictMap)[iter->second] =
stringDictionaryColumnReader->loadDictionary();
- }
+ ptr->loadStringDicts(columnIdToNameMap, columnNameToDictMap,
stringDictFilter);
}
}
@@ -1229,6 +1236,10 @@ namespace orc {
void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>&
positions,
const ReadPhase& readPhase) override;
+ void loadStringDicts(const std::unordered_map<uint64_t, std::string>&
columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>*
columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) override;
+
private:
template <bool encoded>
void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char*
notNull,
@@ -1339,6 +1350,15 @@ namespace orc {
}
}
+ void ListColumnReader::loadStringDicts(
+ const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) {
+ if (child.get()) {
+ child->loadStringDicts(columnIdToNameMap, columnNameToDictMap,
stringDictFilter);
+ }
+ }
+
class MapColumnReader : public ColumnReader {
private:
std::unique_ptr<ColumnReader> keyReader;
@@ -1360,6 +1380,10 @@ namespace orc {
void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>&
positions,
const ReadPhase& readPhase) override;
+ void loadStringDicts(const std::unordered_map<uint64_t, std::string>&
columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>*
columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) override;
+
private:
template <bool encoded>
void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char*
notNull,
@@ -1489,6 +1513,18 @@ namespace orc {
}
}
+ void MapColumnReader::loadStringDicts(
+ const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) {
+ if (keyReader.get()) {
+ keyReader->loadStringDicts(columnIdToNameMap, columnNameToDictMap,
stringDictFilter);
+ }
+ if (elementReader.get()) {
+ elementReader->loadStringDicts(columnIdToNameMap, columnNameToDictMap,
stringDictFilter);
+ }
+ }
+
class UnionColumnReader : public ColumnReader {
private:
std::unique_ptr<ByteRleDecoder> rle;
@@ -1510,6 +1546,10 @@ namespace orc {
void seekToRowGroup(std::unordered_map<uint64_t, PositionProvider>&
positions,
const ReadPhase& readPhase) override;
+ void loadStringDicts(const std::unordered_map<uint64_t, std::string>&
columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>*
columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) override;
+
private:
template <bool encoded>
void nextInternal(ColumnVectorBatch& rowBatch, uint64_t numValues, char*
notNull,
@@ -1624,6 +1664,18 @@ namespace orc {
}
}
+ void UnionColumnReader::loadStringDicts(
+ const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>* columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) {
+ for (size_t i = 0; i < numChildren; ++i) {
+ if (childrenReader[i] != nullptr) {
+ childrenReader[i]->loadStringDicts(columnIdToNameMap,
columnNameToDictMap,
+ stringDictFilter);
+ }
+ }
+ }
+
/**
* Destructively convert the number from zigzag encoding to the
* natural signed representation.
diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh
index c437d7cc..8c0e36bd 100644
--- a/c++/src/ColumnReader.hh
+++ b/c++/src/ColumnReader.hh
@@ -177,6 +177,11 @@ namespace orc {
*/
virtual void seekToRowGroup(std::unordered_map<uint64_t,
PositionProvider>& positions,
const ReadPhase& readPhase = ReadPhase::ALL);
+
+ virtual void loadStringDicts(
+ const std::unordered_map<uint64_t, std::string>& columnIdToNameMap,
+ std::unordered_map<std::string, StringDictionary*>*
columnNameToDictMap,
+ const StringDictFilter* stringDictFilter) {}
};
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]