This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new be6e456 ORC-800: [C++] Add `key` and `value` nullness check in
`MapVectorBatch`
be6e456 is described below
commit be6e456c221058ddef58636b3811f83e61b1cdd5
Author: Zherui <[email protected]>
AuthorDate: Wed May 19 22:39:09 2021 -0400
ORC-800: [C++] Add `key` and `value` nullness check in `MapVectorBatch`
Fix toString() and getMemoryUse() of MapVectorBatch when either key or
value column is not selected.
This closes #701
---
c++/src/Reader.hh | 1 +
c++/src/Vector.cc | 8 ++++----
tools/test/TestMatch.cc | 22 ++++++++++++++++++++++
3 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index 7240975..c097885 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -96,6 +96,7 @@ namespace orc {
// For each child of type, select it if one of its children
// is selected.
bool selectParents(std::vector<bool>& selectedColumns, const Type& type);
+
/**
* Constructor that selects columns.
* @param contents of the file
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index 37ce67c..fefaaad 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -294,8 +294,8 @@ namespace orc {
std::string MapVectorBatch::toString() const {
std::ostringstream buffer;
- buffer << "Map vector <" << keys->toString() << ", "
- << elements->toString() << " with "
+ buffer << "Map vector <" << (keys ? keys->toString(): "key not selected")
<< ", "
+ << (elements ? elements->toString(): "value not selected") << "
with "
<< numElements << " of " << capacity << ">";
return buffer.str();
}
@@ -316,8 +316,8 @@ namespace orc {
uint64_t MapVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
- + keys->getMemoryUsage()
- + elements->getMemoryUsage();
+ + (keys ? keys->getMemoryUsage() : 0)
+ + (elements ? elements->getMemoryUsage() : 0);
}
bool MapVectorBatch::hasVariableLength() {
diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc
index fc4f6f9..8ba3079 100644
--- a/tools/test/TestMatch.cc
+++ b/tools/test/TestMatch.cc
@@ -1085,6 +1085,28 @@ TEST(TestMatch, selectColumns) {
<< "\"887336a7\"}}]}";
EXPECT_EQ(expectedMapWithColumnId.str(), line);
+ // Map column #12 again, to test map key is automatically included
+ // two subtypes with column id:
+ // map<string(20),struct(21)<int1(22):int,string1(23):string>
+ cols.clear();
+ cols.push_back(22);
+ cols.push_back(23);
+ rowReaderOpts.includeTypes(cols);
+ rowReader = reader->createRowReader(rowReaderOpts);
+ c = rowReader->getSelectedColumns();
+ for (unsigned int i=1; i < c.size(); i++) {
+ if (i==19 || (i>=21 && i<=23))
+ EXPECT_TRUE(c[i]);
+ else
+ EXPECT_TRUE(!c[i]);
+ }
+ batch = rowReader->createRowBatch(1);
+ std::ostringstream expectedMapSchema;
+ expectedMapSchema << "Struct vector <0 of 1; Map vector <key not selected,
"
+ << "Struct vector <0 of 1; Long vector <0 of 1>; Byte vector <0 of 1>;
> with 0 of 1>; >";
+ EXPECT_EQ(expectedMapSchema.str(), batch->toString());
+ EXPECT_EQ(45, batch->getMemoryUsage());
+
// Struct column #10, with field name: middle
std::list<std::string> colNames;
colNames.push_back("middle.list.int1");