http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index 6d1955d..f159eef 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -19,9 +19,11 @@ package org.apache.orc.impl; import static junit.framework.Assert.assertEquals; +import static junit.framework.TestCase.fail; import static org.hamcrest.core.Is.is; -import static org.junit.Assert.*; -import static org.mockito.Mockito.any; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; @@ -33,9 +35,9 @@ import java.io.IOException; import java.io.InputStream; import java.sql.Timestamp; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; -import junit.framework.Assert; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; @@ -46,7 +48,7 @@ import org.apache.hadoop.fs.Seekable; import org.apache.hadoop.hive.common.io.DiskRangeList; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl; -import org.apache.orc.BloomFilterIO; +import org.apache.orc.util.BloomFilter; import org.apache.orc.DataReader; import org.apache.orc.RecordReader; import org.apache.orc.TypeDescription; @@ -62,6 +64,7 @@ import org.apache.orc.OrcFile; import org.apache.orc.Reader; import org.apache.orc.OrcProto; +import org.junit.Assert; import org.junit.Test; import org.mockito.MockSettings; import org.mockito.Mockito; @@ -375,23 +378,23 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN)); pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN)); pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", false, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN)); } @Test @@ -399,34 +402,34 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // Stats gets converted to column type. "15" is outside of "10" and "100" pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // Integer stats will not be converted date because of days/seconds/millis ambiguity pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -434,39 +437,39 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // Stats gets converted to column type. "15.0" is outside of "10.0" and "100.0" pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE)); // Double is not converted to date type because of days/seconds/millis ambiguity pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15*1000L), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150*1000L), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE)); } @Test @@ -474,33 +477,33 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 100L, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 100.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "100", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // IllegalArgumentException is thrown when converting String to Date, hence YES_NO pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 1000), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 1000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("100"), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(100), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -509,69 +512,69 @@ public class TestRecordReaderImpl { PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); // Date to Integer conversion is not possible. assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // Date to Float conversion is also not possible. pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "1970-01-11", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15.1", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "__a15__1", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "2000-01-16", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "1970-01-16", null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // Date to Decimal conversion is also not possible. pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15L * 24L * 60L * 60L * 1000L), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -579,39 +582,39 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // "15" out of range of "10.0" and "100.0" pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // Decimal to Date not possible. pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15 * 1000L), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150 * 1000L), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -619,46 +622,46 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", new Timestamp(15).toString(), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10 * 24L * 60L * 60L * 1000L, - 100 * 24L * 60L * 60L * 1000L), pred, null)); + 100 * 24L * 60L * 60L * 1000L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -667,17 +670,17 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -686,17 +689,17 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -705,15 +708,15 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), lessThan, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), lessThan, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), lessThan, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), lessThan, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -722,15 +725,15 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -742,13 +745,13 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG, "x", null, args); assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -760,19 +763,19 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.LONG, "x", null, args); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 40L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 40L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 15L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 25L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 25L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 25L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 25L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 20L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 20L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -781,7 +784,7 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.LONG, "x", null, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @@ -791,17 +794,17 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", "c", null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before + RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after + RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same } @Test @@ -810,17 +813,17 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "c", null); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before + RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after + RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same } @Test @@ -829,17 +832,17 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x", "c", null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before + RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after + RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle assertEquals(TruthValue.NO_NULL, // min, same stats - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -848,17 +851,17 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x", "c", null); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before + RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after + RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same } @Test @@ -870,17 +873,17 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING, "x", null, args); assertEquals(TruthValue.NO_NULL, // before & after - RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after + RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null)); // max + RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle assertEquals(TruthValue.YES_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same } @Test @@ -892,31 +895,31 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING, "x", null, args); assertEquals(TruthValue.YES_NULL, // before & after - RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, // before & max - RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO_NULL, // before & before - RecordReaderImpl.evaluatePredicateProto(createStringStats("h", "g", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("h", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, // before & min - RecordReaderImpl.evaluatePredicateProto(createStringStats("f", "g", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("f", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, // before & middle - RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "g", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, // min & after - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "e", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NULL, // min & max - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "f", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.YES_NO_NULL, // min & middle - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "g", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after + RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "c", true), pred, null)); // max + RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max assertEquals(TruthValue.YES_NO_NULL, - RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle + RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle assertEquals(TruthValue.YES_NULL, // min & after, same stats - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -925,9 +928,9 @@ public class TestRecordReaderImpl { (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING, "x", null, null); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); assertEquals(TruthValue.NO, - RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", false), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", false), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); } @Test @@ -1304,7 +1307,7 @@ public class TestRecordReaderImpl { public void testIntNullSafeEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong(i); } @@ -1319,7 +1322,7 @@ public class TestRecordReaderImpl { public void testIntEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong(i); } @@ -1338,7 +1341,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG, "x", null, args); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong(i); } @@ -1356,7 +1359,7 @@ public class TestRecordReaderImpl { public void testDoubleNullSafeEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addDouble(i); } @@ -1371,7 +1374,7 @@ public class TestRecordReaderImpl { public void testDoubleEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addDouble(i); } @@ -1390,7 +1393,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.FLOAT, "x", null, args); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addDouble(i); } @@ -1408,7 +1411,7 @@ public class TestRecordReaderImpl { public void testStringNullSafeEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "str_15", null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addString("str_" + i); } @@ -1423,7 +1426,7 @@ public class TestRecordReaderImpl { public void testStringEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", "str_15", null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addString("str_" + i); } @@ -1442,7 +1445,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING, "x", null, args); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addString("str_" + i); } @@ -1461,7 +1464,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong((new DateWritable(i)).getDays()); } @@ -1477,7 +1480,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong((new DateWritable(i)).getDays()); } @@ -1496,7 +1499,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DATE, "x", null, args); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong((new DateWritable(i)).getDays()); } @@ -1516,7 +1519,7 @@ public class TestRecordReaderImpl { PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong((new Timestamp(i)).getTime()); } @@ -1531,7 +1534,7 @@ public class TestRecordReaderImpl { public void testTimestampEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong((new Timestamp(i)).getTime()); } @@ -1550,7 +1553,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.TIMESTAMP, "x", null, args); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addLong((new Timestamp(i)).getTime()); } @@ -1570,7 +1573,7 @@ public class TestRecordReaderImpl { PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addString(HiveDecimal.create(i).toString()); } @@ -1587,7 +1590,7 @@ public class TestRecordReaderImpl { PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addString(HiveDecimal.create(i).toString()); } @@ -1606,7 +1609,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL, "x", null, args); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addString(HiveDecimal.create(i).toString()); } @@ -1629,7 +1632,7 @@ public class TestRecordReaderImpl { PredicateLeaf pred = createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL, "x", null, args); - BloomFilterIO bf = new BloomFilterIO(10000); + BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { bf.addString(HiveDecimal.create(i).toString()); } @@ -1692,4 +1695,171 @@ public class TestRecordReaderImpl { recordReader.close(); } + + @Test + public void TestOldBloomFilters() throws Exception { + OrcProto.StripeFooter footer = + OrcProto.StripeFooter.newBuilder() + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(1).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(1).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(2).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(2).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(3).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(3).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build()) + .build(); + TypeDescription schema = TypeDescription.fromString("struct<x:int,y:decimal(10,2),z:string>"); + OrcProto.Stream.Kind[] bloomFilterKinds = new OrcProto.Stream.Kind[4]; + + // normal read + DiskRangeList ranges = RecordReaderUtils.planIndexReading(schema, footer, + false, new boolean[]{true, true, false, true}, + new boolean[]{false, true, false, true}, + OrcFile.WriterVersion.HIVE_4243, + bloomFilterKinds); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[3]); + assertEquals("range start: 0 end: 2000", ranges.toString()); + assertEquals("range start: 4000 end: 6000", ranges.next.toString()); + assertEquals(null, ranges.next.next); + + // ignore non-utf8 bloom filter + Arrays.fill(bloomFilterKinds, null); + ranges = RecordReaderUtils.planIndexReading(schema, footer, + true, new boolean[]{true, true, false, true}, + new boolean[]{false, true, false, true}, + OrcFile.WriterVersion.HIVE_4243, + bloomFilterKinds); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]); + assertEquals(null, bloomFilterKinds[3]); + assertEquals("range start: 0 end: 2000", ranges.toString()); + assertEquals("range start: 4000 end: 5000", ranges.next.toString()); + assertEquals(null, ranges.next.next); + + // check that we are handling the post hive-12055 strings correctly + Arrays.fill(bloomFilterKinds, null); + ranges = RecordReaderUtils.planIndexReading(schema, footer, + true, null, new boolean[]{false, true, true, true}, + OrcFile.WriterVersion.HIVE_12055, bloomFilterKinds); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]); + assertEquals(null, bloomFilterKinds[2]); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[3]); + assertEquals("range start: 0 end: 3000", ranges.toString()); + assertEquals("range start: 4000 end: 6000", ranges.next.toString()); + assertEquals(null, ranges.next.next); + + // ignore non-utf8 bloom filter on decimal + Arrays.fill(bloomFilterKinds, null); + ranges = RecordReaderUtils.planIndexReading(schema, footer, + true, null, + new boolean[]{false, false, true, false}, + OrcFile.WriterVersion.HIVE_4243, + bloomFilterKinds); + assertEquals(null, bloomFilterKinds[2]); + assertEquals("range start: 0 end: 1000", ranges.toString()); + assertEquals("range start: 2000 end: 3000", ranges.next.toString()); + assertEquals("range start: 4000 end: 5000", ranges.next.next.toString()); + assertEquals(null, ranges.next.next.next); + } + + @Test + public void TestCompatibleBloomFilters() throws Exception { + OrcProto.StripeFooter footer = + OrcProto.StripeFooter.newBuilder() + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(1).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(1).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(2).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(2).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(2).setKind(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(3).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(3).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(3).setKind(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8).setLength(1000).build()) + .build(); + TypeDescription schema = TypeDescription.fromString("struct<x:int,y:decimal(10,2),z:string>"); + OrcProto.Stream.Kind[] bloomFilterKinds = new OrcProto.Stream.Kind[4]; + + // normal read + DiskRangeList ranges = RecordReaderUtils.planIndexReading(schema, footer, + false, new boolean[]{true, true, false, true}, + new boolean[]{false, true, false, true}, + OrcFile.WriterVersion.HIVE_4243, + bloomFilterKinds); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8, bloomFilterKinds[3]); + assertEquals("range start: 0 end: 2000", ranges.toString()); + assertEquals("range start: 5000 end: 6000", ranges.next.toString()); + assertEquals("range start: 7000 end: 8000", ranges.next.next.toString()); + assertEquals(null, ranges.next.next.next); + + // + Arrays.fill(bloomFilterKinds, null); + ranges = RecordReaderUtils.planIndexReading(schema, footer, + true, null, + new boolean[]{false, true, true, false}, + OrcFile.WriterVersion.HIVE_4243, + bloomFilterKinds); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8, bloomFilterKinds[2]); + assertEquals("range start: 0 end: 3000", ranges.toString()); + assertEquals("range start: 4000 end: 6000", ranges.next.toString()); + assertEquals(null, ranges.next.next); + } + + @Test + public void TestNewBloomFilters() throws Exception { + OrcProto.StripeFooter footer = + OrcProto.StripeFooter.newBuilder() + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(1).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(1).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(2).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(2).setKind(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(3).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build()) + .addStreams(OrcProto.Stream.newBuilder() + .setColumn(3).setKind(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8).setLength(1000).build()) + .build(); + TypeDescription schema = TypeDescription.fromString("struct<x:int,y:decimal(10,2),z:string>"); + OrcProto.Stream.Kind[] bloomFilterKinds = new OrcProto.Stream.Kind[4]; + + // normal read + DiskRangeList ranges = RecordReaderUtils.planIndexReading(schema, footer, + false, new boolean[]{true, true, false, true}, + new boolean[]{false, true, false, true}, + OrcFile.WriterVersion.HIVE_4243, + bloomFilterKinds); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8, bloomFilterKinds[3]); + assertEquals("range start: 0 end: 2000", ranges.toString()); + assertEquals("range start: 4000 end: 6000", ranges.next.toString()); + assertEquals(null, ranges.next.next); + + // + Arrays.fill(bloomFilterKinds, null); + ranges = RecordReaderUtils.planIndexReading(schema, footer, + true, null, + new boolean[]{false, true, true, false}, + OrcFile.WriterVersion.HIVE_4243, + bloomFilterKinds); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]); + assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8, bloomFilterKinds[2]); + assertEquals("range start: 0 end: 5000", ranges.toString()); + assertEquals(null, ranges.next); + } }
http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/core/src/test/org/apache/orc/util/TestMurmur3.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/util/TestMurmur3.java b/java/core/src/test/org/apache/orc/util/TestMurmur3.java new file mode 100644 index 0000000..575e250 --- /dev/null +++ b/java/core/src/test/org/apache/orc/util/TestMurmur3.java @@ -0,0 +1,225 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.util; + +import static org.junit.Assert.assertEquals; + +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; + +import org.apache.orc.util.Murmur3; +import org.junit.Test; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; +import java.util.Random; + +/** + * Tests for Murmur3 variants. + */ +public class TestMurmur3 { + + @Test + public void testHashCodesM3_32_string() { + String key = "test"; + int seed = 123; + HashFunction hf = Hashing.murmur3_32(seed); + int hc1 = hf.hashBytes(key.getBytes()).asInt(); + int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed); + assertEquals(hc1, hc2); + + key = "testkey"; + hc1 = hf.hashBytes(key.getBytes()).asInt(); + hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed); + assertEquals(hc1, hc2); + } + + @Test + public void testHashCodesM3_32_ints() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_32(seed); + for (int i = 0; i < 1000; i++) { + int val = rand.nextInt(); + byte[] data = ByteBuffer.allocate(4).putInt(val).array(); + int hc1 = hf.hashBytes(data).asInt(); + int hc2 = Murmur3.hash32(data, data.length, seed); + assertEquals(hc1, hc2); + } + } + + @Test + public void testHashCodesM3_32_longs() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_32(seed); + for (int i = 0; i < 1000; i++) { + long val = rand.nextLong(); + byte[] data = ByteBuffer.allocate(8).putLong(val).array(); + int hc1 = hf.hashBytes(data).asInt(); + int hc2 = Murmur3.hash32(data, data.length, seed); + assertEquals(hc1, hc2); + } + } + + @Test + public void testHashCodesM3_32_double() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_32(seed); + for (int i = 0; i < 1000; i++) { + double val = rand.nextDouble(); + byte[] data = ByteBuffer.allocate(8).putDouble(val).array(); + int hc1 = hf.hashBytes(data).asInt(); + int hc2 = Murmur3.hash32(data, data.length, seed); + assertEquals(hc1, hc2); + } + } + + @Test + public void testHashCodesM3_128_string() { + String key = "test"; + int seed = 123; + HashFunction hf = Hashing.murmur3_128(seed); + // guava stores the hashcodes in little endian order + ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(key.getBytes()).asBytes()); + buf.flip(); + long gl1 = buf.getLong(); + long gl2 = buf.getLong(8); + long[] hc = Murmur3.hash128(key.getBytes(), 0, key.getBytes().length, seed); + long m1 = hc[0]; + long m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + + key = "testkey128_testkey128"; + buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(key.getBytes()).asBytes()); + buf.flip(); + gl1 = buf.getLong(); + gl2 = buf.getLong(8); + byte[] keyBytes = key.getBytes(); + hc = Murmur3.hash128(keyBytes, 0, keyBytes.length, seed); + m1 = hc[0]; + m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + + byte[] offsetKeyBytes = new byte[keyBytes.length + 35]; + Arrays.fill(offsetKeyBytes, (byte) -1); + System.arraycopy(keyBytes, 0, offsetKeyBytes, 35, keyBytes.length); + hc = Murmur3.hash128(offsetKeyBytes, 35, keyBytes.length, seed); + assertEquals(gl1, hc[0]); + assertEquals(gl2, hc[1]); + } + + @Test + public void testHashCodeM3_64() { + byte[] origin = ("It was the best of times, it was the worst of times," + + " it was the age of wisdom, it was the age of foolishness," + + " it was the epoch of belief, it was the epoch of incredulity," + + " it was the season of Light, it was the season of Darkness," + + " it was the spring of hope, it was the winter of despair," + + " we had everything before us, we had nothing before us," + + " we were all going direct to Heaven," + + " we were all going direct the other way.").getBytes(); + long hash = Murmur3.hash64(origin, 0, origin.length); + assertEquals(305830725663368540L, hash); + + byte[] originOffset = new byte[origin.length + 150]; + Arrays.fill(originOffset, (byte) 123); + System.arraycopy(origin, 0, originOffset, 150, origin.length); + hash = Murmur3.hash64(originOffset, 150, origin.length); + assertEquals(305830725663368540L, hash); + } + + @Test + public void testHashCodesM3_128_ints() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_128(seed); + for (int i = 0; i < 1000; i++) { + int val = rand.nextInt(); + byte[] data = ByteBuffer.allocate(4).putInt(val).array(); + // guava stores the hashcodes in little endian order + ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(data).asBytes()); + buf.flip(); + long gl1 = buf.getLong(); + long gl2 = buf.getLong(8); + long[] hc = Murmur3.hash128(data, 0, data.length, seed); + long m1 = hc[0]; + long m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + + byte[] offsetData = new byte[data.length + 50]; + System.arraycopy(data, 0, offsetData, 50, data.length); + hc = Murmur3.hash128(offsetData, 50, data.length, seed); + assertEquals(gl1, hc[0]); + assertEquals(gl2, hc[1]); + } + } + + @Test + public void testHashCodesM3_128_longs() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_128(seed); + for (int i = 0; i < 1000; i++) { + long val = rand.nextLong(); + byte[] data = ByteBuffer.allocate(8).putLong(val).array(); + // guava stores the hashcodes in little endian order + ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(data).asBytes()); + buf.flip(); + long gl1 = buf.getLong(); + long gl2 = buf.getLong(8); + long[] hc = Murmur3.hash128(data, 0, data.length, seed); + long m1 = hc[0]; + long m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + } + } + + @Test + public void testHashCodesM3_128_double() { + int seed = 123; + Random rand = new Random(seed); + HashFunction hf = Hashing.murmur3_128(seed); + for (int i = 0; i < 1000; i++) { + double val = rand.nextDouble(); + byte[] data = ByteBuffer.allocate(8).putDouble(val).array(); + // guava stores the hashcodes in little endian order + ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); + buf.put(hf.hashBytes(data).asBytes()); + buf.flip(); + long gl1 = buf.getLong(); + long gl2 = buf.getLong(8); + long[] hc = Murmur3.hash128(data, 0, data.length, seed); + long m1 = hc[0]; + long m2 = hc[1]; + assertEquals(gl1, m1); + assertEquals(gl2, m2); + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/core/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/java/core/src/test/resources/log4j.properties b/java/core/src/test/resources/log4j.properties index d2c063d..fae44b6 100644 --- a/java/core/src/test/resources/log4j.properties +++ b/java/core/src/test/resources/log4j.properties @@ -15,3 +15,6 @@ log4j.rootLogger=WARN,stdout log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n + +# Suppress the warnings about native io not being available +log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR \ No newline at end of file http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/mapreduce/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/java/mapreduce/src/test/resources/log4j.properties b/java/mapreduce/src/test/resources/log4j.properties index d2c063d..fae44b6 100644 --- a/java/mapreduce/src/test/resources/log4j.properties +++ b/java/mapreduce/src/test/resources/log4j.properties @@ -15,3 +15,6 @@ log4j.rootLogger=WARN,stdout log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n + +# Suppress the warnings about native io not being available +log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR \ No newline at end of file http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java b/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java deleted file mode 100644 index e60690d..0000000 --- a/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java +++ /dev/null @@ -1,313 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.common.util; - -import java.util.Arrays; -import java.util.List; - -/** - * BloomFilter is a probabilistic data structure for set membership check. BloomFilters are - * highly space efficient when compared to using a HashSet. Because of the probabilistic nature of - * bloom filter false positive (element not present in bloom filter but test() says true) are - * possible but false negatives are not possible (if element is present then test() will never - * say false). The false positive probability is configurable (default: 5%) depending on which - * storage requirement may increase or decrease. Lower the false positive probability greater - * is the space requirement. - * Bloom filters are sensitive to number of elements that will be inserted in the bloom filter. - * During the creation of bloom filter expected number of entries must be specified. If the number - * of insertions exceed the specified initial number of entries then false positive probability will - * increase accordingly. - * - * Internally, this implementation of bloom filter uses Murmur3 fast non-cryptographic hash - * algorithm. Although Murmur2 is slightly faster than Murmur3 in Java, it suffers from hash - * collisions for specific sequence of repeating bytes. Check the following link for more info - * https://code.google.com/p/smhasher/wiki/MurmurHash2Flaw - */ -public class BloomFilter { - public static final double DEFAULT_FPP = 0.05; - protected BitSet bitSet; - protected int numBits; - protected int numHashFunctions; - - public BloomFilter() { - } - - public BloomFilter(long expectedEntries) { - this(expectedEntries, DEFAULT_FPP); - } - - static void checkArgument(boolean expression, String message) { - if (!expression) { - throw new IllegalArgumentException(message); - } - } - - public BloomFilter(long expectedEntries, double fpp) { - checkArgument(expectedEntries > 0, "expectedEntries should be > 0"); - checkArgument(fpp > 0.0 && fpp < 1.0, "False positive probability should be > 0.0 & < 1.0"); - int nb = optimalNumOfBits(expectedEntries, fpp); - // make 'm' multiple of 64 - this.numBits = nb + (Long.SIZE - (nb % Long.SIZE)); - this.numHashFunctions = optimalNumOfHashFunctions(expectedEntries, numBits); - this.bitSet = new BitSet(numBits); - } - - /** - * A constructor to support rebuilding the BloomFilter from a serialized representation. - * @param bits - * @param numBits - * @param numFuncs - */ - public BloomFilter(List<Long> bits, int numBits, int numFuncs) { - super(); - long[] copied = new long[bits.size()]; - for (int i = 0; i < bits.size(); i++) copied[i] = bits.get(i); - bitSet = new BitSet(copied); - this.numBits = numBits; - numHashFunctions = numFuncs; - } - - static int optimalNumOfHashFunctions(long n, long m) { - return Math.max(1, (int) Math.round((double) m / n * Math.log(2))); - } - - static int optimalNumOfBits(long n, double p) { - return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2))); - } - - public void add(byte[] val) { - if (val == null) { - addBytes(val, -1, -1); - } else { - addBytes(val, 0, val.length); - } - } - - public void addBytes(byte[] val, int offset, int length) { - // We use the trick mentioned in "Less Hashing, Same Performance: Building a Better Bloom Filter" - // by Kirsch et.al. From abstract 'only two hash functions are necessary to effectively - // implement a Bloom filter without any loss in the asymptotic false positive probability' - - // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the technique mentioned - // in the above paper - long hash64 = val == null ? Murmur3.NULL_HASHCODE : - Murmur3.hash64(val, offset, length); - addHash(hash64); - } - - private void addHash(long hash64) { - int hash1 = (int) hash64; - int hash2 = (int) (hash64 >>> 32); - - for (int i = 1; i <= numHashFunctions; i++) { - int combinedHash = hash1 + (i * hash2); - // hashcode should be positive, flip all the bits if it's negative - if (combinedHash < 0) { - combinedHash = ~combinedHash; - } - int pos = combinedHash % numBits; - bitSet.set(pos); - } - } - - public void addString(String val) { - if (val == null) { - add(null); - } else { - add(val.getBytes()); - } - } - - public void addLong(long val) { - addHash(getLongHash(val)); - } - - public void addDouble(double val) { - addLong(Double.doubleToLongBits(val)); - } - - public boolean test(byte[] val) { - if (val == null) { - return testBytes(val, -1, -1); - } - return testBytes(val, 0, val.length); - } - - public boolean testBytes(byte[] val, int offset, int length) { - long hash64 = val == null ? Murmur3.NULL_HASHCODE : - Murmur3.hash64(val, offset, length); - return testHash(hash64); - } - - private boolean testHash(long hash64) { - int hash1 = (int) hash64; - int hash2 = (int) (hash64 >>> 32); - - for (int i = 1; i <= numHashFunctions; i++) { - int combinedHash = hash1 + (i * hash2); - // hashcode should be positive, flip all the bits if it's negative - if (combinedHash < 0) { - combinedHash = ~combinedHash; - } - int pos = combinedHash % numBits; - if (!bitSet.get(pos)) { - return false; - } - } - return true; - } - - public boolean testString(String val) { - if (val == null) { - return test(null); - } else { - return test(val.getBytes()); - } - } - - public boolean testLong(long val) { - return testHash(getLongHash(val)); - } - - // Thomas Wang's integer hash function - // http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm - private long getLongHash(long key) { - key = (~key) + (key << 21); // key = (key << 21) - key - 1; - key = key ^ (key >> 24); - key = (key + (key << 3)) + (key << 8); // key * 265 - key = key ^ (key >> 14); - key = (key + (key << 2)) + (key << 4); // key * 21 - key = key ^ (key >> 28); - key = key + (key << 31); - return key; - } - - public boolean testDouble(double val) { - return testLong(Double.doubleToLongBits(val)); - } - - public long sizeInBytes() { - return getBitSize() / 8; - } - - public int getBitSize() { - return bitSet.getData().length * Long.SIZE; - } - - public int getNumHashFunctions() { - return numHashFunctions; - } - - public long[] getBitSet() { - return bitSet.getData(); - } - - @Override - public String toString() { - return "m: " + numBits + " k: " + numHashFunctions; - } - - /** - * Merge the specified bloom filter with current bloom filter. - * - * @param that - bloom filter to merge - */ - public void merge(BloomFilter that) { - if (this != that && this.numBits == that.numBits && this.numHashFunctions == that.numHashFunctions) { - this.bitSet.putAll(that.bitSet); - } else { - throw new IllegalArgumentException("BloomFilters are not compatible for merging." + - " this - " + this.toString() + " that - " + that.toString()); - } - } - - public void reset() { - this.bitSet.clear(); - } - - /** - * Bare metal bit set implementation. For performance reasons, this implementation does not check - * for index bounds nor expand the bit set size if the specified index is greater than the size. - */ - public class BitSet { - private final long[] data; - - public BitSet(long bits) { - this(new long[(int) Math.ceil((double) bits / (double) Long.SIZE)]); - } - - /** - * Deserialize long array as bit set. - * - * @param data - bit array - */ - public BitSet(long[] data) { - assert data.length > 0 : "data length is zero!"; - this.data = data; - } - - /** - * Sets the bit at specified index. - * - * @param index - position - */ - public void set(int index) { - data[index >>> 6] |= (1L << index); - } - - /** - * Returns true if the bit is set in the specified index. - * - * @param index - position - * @return - value at the bit position - */ - public boolean get(int index) { - return (data[index >>> 6] & (1L << index)) != 0; - } - - /** - * Number of bits - */ - public long bitSize() { - return (long) data.length * Long.SIZE; - } - - public long[] getData() { - return data; - } - - /** - * Combines the two BitArrays using bitwise OR. - */ - public void putAll(BitSet array) { - assert data.length == array.data.length : - "BitArrays must be of equal length (" + data.length + "!= " + array.data.length + ")"; - for (int i = 0; i < data.length; i++) { - data[i] |= array.data[i]; - } - } - - /** - * Clear the bit set. - */ - public void clear() { - Arrays.fill(data, 0); - } - } -}