This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new be6e456  ORC-800: [C++] Add `key` and `value` nullness check in 
`MapVectorBatch`
be6e456 is described below

commit be6e456c221058ddef58636b3811f83e61b1cdd5
Author: Zherui <[email protected]>
AuthorDate: Wed May 19 22:39:09 2021 -0400

    ORC-800: [C++] Add `key` and `value` nullness check in `MapVectorBatch`
    
    Fix toString() and getMemoryUse() of MapVectorBatch when either key or 
value column is not selected.
    
    This closes #701
---
 c++/src/Reader.hh       |  1 +
 c++/src/Vector.cc       |  8 ++++----
 tools/test/TestMatch.cc | 22 ++++++++++++++++++++++
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index 7240975..c097885 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -96,6 +96,7 @@ namespace orc {
     // For each child of type, select it if one of its children
     // is selected.
     bool selectParents(std::vector<bool>& selectedColumns, const Type& type);
+
    /**
     * Constructor that selects columns.
     * @param contents of the file
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index 37ce67c..fefaaad 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -294,8 +294,8 @@ namespace orc {
 
   std::string MapVectorBatch::toString() const {
     std::ostringstream buffer;
-    buffer << "Map vector <" << keys->toString() << ", "
-           << elements->toString() << " with "
+    buffer << "Map vector <" << (keys ? keys->toString(): "key not selected") 
<< ", "
+           << (elements ? elements->toString(): "value not selected")  << " 
with "
            << numElements << " of " << capacity << ">";
     return buffer.str();
   }
@@ -316,8 +316,8 @@ namespace orc {
   uint64_t MapVectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage()
            + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
-           + keys->getMemoryUsage()
-           + elements->getMemoryUsage();
+           + (keys ? keys->getMemoryUsage() : 0)
+           + (elements ? elements->getMemoryUsage() : 0);
   }
 
   bool MapVectorBatch::hasVariableLength() {
diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc
index fc4f6f9..8ba3079 100644
--- a/tools/test/TestMatch.cc
+++ b/tools/test/TestMatch.cc
@@ -1085,6 +1085,28 @@ TEST(TestMatch, selectColumns) {
         << "\"887336a7\"}}]}";
     EXPECT_EQ(expectedMapWithColumnId.str(), line);
 
+    // Map column #12 again, to test map key is automatically included
+    // two subtypes with column id:
+    // map<string(20),struct(21)<int1(22):int,string1(23):string>
+    cols.clear();
+    cols.push_back(22);
+    cols.push_back(23);
+    rowReaderOpts.includeTypes(cols);
+    rowReader = reader->createRowReader(rowReaderOpts);
+    c = rowReader->getSelectedColumns();
+    for (unsigned int i=1; i < c.size(); i++) {
+      if (i==19 || (i>=21 && i<=23))
+        EXPECT_TRUE(c[i]);
+      else
+        EXPECT_TRUE(!c[i]);
+    }
+    batch = rowReader->createRowBatch(1);
+    std::ostringstream expectedMapSchema;
+    expectedMapSchema << "Struct vector <0 of 1; Map vector <key not selected, 
"
+        << "Struct vector <0 of 1; Long vector <0 of 1>; Byte vector <0 of 1>; 
> with 0 of 1>; >";
+    EXPECT_EQ(expectedMapSchema.str(), batch->toString());
+    EXPECT_EQ(45, batch->getMemoryUsage());
+
     // Struct column #10, with field name: middle
     std::list<std::string> colNames;
     colNames.push_back("middle.list.int1");

Reply via email to