pavibhai commented on code in PR #1482:
URL: https://github.com/apache/orc/pull/1482#discussion_r1188921953
##########
java/core/src/test/org/apache/orc/TestOrcFilterContext.java:
##########
@@ -225,4 +247,101 @@ public void testRepeatingVector() {
assertTrue(OrcFilterContext.isNull(vectorBranch, 1));
assertTrue(OrcFilterContext.isNull(vectorBranch, 2));
}
+
+ @Test
+ public void testACIDTable() {
+ ColumnVector[] columnVector =
filterContextACID.findColumnVector("string1");
+ assertEquals(2, columnVector.length);
+ assertTrue(columnVector[1] instanceof BytesColumnVector, "Expected a
BytesColumnVector, but found "+ columnVector[1].getClass());
+ columnVector = filterContextACID.findColumnVector("int1");
+ assertEquals(2, columnVector.length);
+ assertTrue(columnVector[1] instanceof LongColumnVector, "Expected a
LongColumnVector, but found "+ columnVector[1].getClass());
+ }
+
+
+ @Test
+ public void testRowFilterWithACIDTable() throws IOException {
+ createAcidORCFile();
+ readSingleRowWithFilter(1);
+ fileSystem.delete(filePath, false);
+
+ }
+ private void createAcidORCFile() throws IOException {
+ configuration = new Configuration();
+ fileSystem = FileSystem.get(configuration);
+
+ try (Writer writer = OrcFile.createWriter(filePath,
+ OrcFile.writerOptions(configuration)
+ .fileSystem(fileSystem)
+ .overwrite(true)
+ .rowIndexStride(8192)
+ .setSchema(acidSchema))) {
+
+ Random random = new Random(1024);
+ VectorizedRowBatch b = acidSchema.createRowBatch();
+ for (int rowId = 0; rowId < RowCount; rowId++) {
+ long v = random.nextLong();
+ populateColumnValues(acidSchema, b.cols,b.size, v);
+ // Populate the rowId
+ ((LongColumnVector) b.cols[3]).vector[b.size] = rowId;
+ StructColumnVector row = (StructColumnVector) b.cols[5];
+ ((LongColumnVector) row.fields[0]).vector[b.size] = rowId;
+ b.size += 1;
+ if (b.size == b.getMaxSize()) {
+ writer.addRowBatch(b);
+ b.reset();
+ }
+ }
+ if (b.size > 0) {
+ writer.addRowBatch(b);
+ b.reset();
+ }
+ }
+ }
+
+ private void populateColumnValues(TypeDescription typeDescription,
ColumnVector[] columnVectors, int size, long value) {
+ for (int columnId = 0; columnId < typeDescription.getChildren().size() ;
columnId++) {
+ switch (typeDescription.getChildren().get(columnId).getCategory()) {
+ case INT:
+ ((LongColumnVector)columnVectors[columnId]).vector[size] = value;
+ break;
+ case LONG:
+ ((LongColumnVector)columnVectors[columnId]).vector[size] = value;
+ break;
+ case DECIMAL:
+ HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable();
+ hiveDecimalWritable.setFromLongAndScale(value, scale);
+ ((DecimalColumnVector) columnVectors[columnId]).vector[size] =
hiveDecimalWritable;
+ break;
+ case STRING:
+ ((BytesColumnVector) columnVectors[columnId]).setVal(size,
+ String.valueOf(value)
+ .getBytes(StandardCharsets.UTF_8));
+ break;
+ case STRUCT:
+ populateColumnValues(typeDescription.getChildren().get(columnId),
((StructColumnVector)columnVectors[columnId]).fields, size, value);
+ break;
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
+ }
+ private void readSingleRowWithFilter(long id) throws IOException {
+ Reader reader = OrcFile.createReader(filePath,
OrcFile.readerOptions(configuration).filesystem(fileSystem));
+ SearchArgument searchArgument = SearchArgumentFactory.newBuilder()
+ .in("int1", PredicateLeaf.Type.LONG, id)
+ .build();
+ Reader.Options readerOptions = reader.options()
+ .searchArgument(searchArgument, new String[] {"int1"})
+ .useSelected(true)
+ .allowSARGToFilter(true);
+ VectorizedRowBatch vectorizedRowBatch = acidSchema.createRowBatch();
+ long rowCount = 0;
+ try (RecordReader recordReader = reader.rows(readerOptions)) {
+ assertTrue(recordReader.nextBatch(vectorizedRowBatch));
+ rowCount += vectorizedRowBatch.size;
Review Comment:
Can we please validate that the entry is matching? i.e. the value of the
LongColumnVector value is the `id` that was passed in.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]