dongjoon-hyun commented on a change in pull request #934:
URL: https://github.com/apache/orc/pull/934#discussion_r728656986



##########
File path: c++/test/TestReader.cc
##########
@@ -101,4 +101,41 @@ namespace orc {
       900, rowsInCurrentStripe, rowIndexStride, includedRowGroups));
   }
 
+  void CheckFileWithSargs(const char* fileName, const char* softwareVersion) {
+    std::stringstream ss;
+    if(const char* example_dir = std::getenv("ORC_EXAMPLE_DIR")) {
+      ss << example_dir;
+    } else {
+      ss << "../../../examples";
+    }
+    // Read a file with bloom filters written by CPP writer in version 1.6.11.
+    ss << "/" << fileName;
+    ReaderOptions readerOpts;
+    std::unique_ptr<Reader> reader =
+      createReader(readLocalFile(ss.str().c_str()), readerOpts);
+    EXPECT_EQ(WriterId::ORC_CPP_WRITER, reader->getWriterId());
+    EXPECT_EQ(softwareVersion, reader->getSoftwareVersion());
+
+    // Create SearchArgument with a EQUALS predicate which can leverage the 
bloom filters.
+    RowReaderOptions rowReaderOpts;
+    std::unique_ptr<SearchArgumentBuilder> sarg = 
SearchArgumentFactory::newBuilder();
+    // Integer value 18000000000 has an inconsistent hash before the fix of 
ORC-1024.
+    sarg->equals(1, 
PredicateDataType::LONG,Literal(static_cast<int64_t>(18000000000L)));
+    std::unique_ptr<SearchArgument> final_sarg = sarg->build();
+    rowReaderOpts.searchArgument(std::move(final_sarg));
+    std::unique_ptr<RowReader> rowReader = 
reader->createRowReader(rowReaderOpts);
+
+    // Make sure bad bloom filters won't affect the results.
+    std::unique_ptr<ColumnVectorBatch> batch =
+      rowReader->createRowBatch(1024);
+    EXPECT_TRUE(rowReader->next(*batch));
+    EXPECT_EQ(5, batch->numElements);
+    EXPECT_FALSE(rowReader->next(*batch));
+  }
+
+  TEST(TestRowReader, testSkipBadBloomFilters) {
+    CheckFileWithSargs("bad_bloom_filter_1.6.11.orc", "ORC C++ 1.6.11");
+    CheckFileWithSargs("bad_bloom_filter_1.6.0.orc", "ORC C++");
+    CheckFileWithSargs("bad_bloom_filter_1.6.11-SNAPSHOT.orc", "ORC C++ 
1.6.11-SNAPSHOT");

Review comment:
       Note that this is just a duplication of the test coverage except 
`-SNAPSHOT` parsing part, @stiga-huang .
   I believe this only causes confusion. If that is the case, please remove it 
simply. I'm explicitly -1 for adding like this.
   
   > For 1.6.12-SNAPSHOT vs 1.6.11-SNAPSHOT, I think we will backport the fix 
to 1.6 branch. The next release is 1.6.12. So 1.6.12 won't have the issue.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@orc.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to