Jackie-Jiang commented on a change in pull request #8148: URL: https://github.com/apache/pinot/pull/8148#discussion_r806346742
########## File path: pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/NotDocIdIteratorTest.java ########## @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.dociditerators; + +import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.segment.spi.Constants; +import org.roaringbitmap.buffer.MutableRoaringBitmap; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + + +public class NotDocIdIteratorTest { + @Test + public void testNotDocIdIterator() { + // OR result: [0, 1, 2, 4, 5, 6, 8, 10, 13, 15, 16, 17, 18, 19, 20] + int[] docIds1 = new int[]{1, 4, 6, 10, 15, 17, 18, 20}; + int[] docIds2 = new int[]{0, 1, 5, 8, 15, 18}; + int[] docIds3 = new int[]{1, 2, 6, 13, 16, 19}; + int[] docIds4 = new int[]{0, 1, 2, 3, 4, 5}; + + MutableRoaringBitmap bitmap1 = new MutableRoaringBitmap(); + bitmap1.add(docIds1); + MutableRoaringBitmap bitmap2 = new MutableRoaringBitmap(); + bitmap2.add(docIds2); + MutableRoaringBitmap bitmap3 = new MutableRoaringBitmap(); + bitmap3.add(docIds3); + MutableRoaringBitmap bitmap4 = new MutableRoaringBitmap(); + bitmap4.add(docIds4); + OrDocIdIterator orDocIdIterator = new OrDocIdIterator(new BlockDocIdIterator[]{ Review comment: Why do we need to build this `OrDocIdIterator`? We don't want to test `OrDocIdIterator` in this test ########## File path: pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/NotDocIdIteratorTest.java ########## @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.dociditerators; + +import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.segment.spi.Constants; +import org.roaringbitmap.buffer.MutableRoaringBitmap; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + + +public class NotDocIdIteratorTest { + @Test + public void testNotDocIdIterator() { + // OR result: [0, 1, 2, 4, 5, 6, 8, 10, 13, 15, 16, 17, 18, 19, 20] + int[] docIds1 = new int[]{1, 4, 6, 10, 15, 17, 18, 20}; + int[] docIds2 = new int[]{0, 1, 5, 8, 15, 18}; + int[] docIds3 = new int[]{1, 2, 6, 13, 16, 19}; + int[] docIds4 = new int[]{0, 1, 2, 3, 4, 5}; + + MutableRoaringBitmap bitmap1 = new MutableRoaringBitmap(); + bitmap1.add(docIds1); + MutableRoaringBitmap bitmap2 = new MutableRoaringBitmap(); + bitmap2.add(docIds2); + MutableRoaringBitmap bitmap3 = new MutableRoaringBitmap(); + bitmap3.add(docIds3); + MutableRoaringBitmap bitmap4 = new MutableRoaringBitmap(); + bitmap4.add(docIds4); + OrDocIdIterator orDocIdIterator = new OrDocIdIterator(new BlockDocIdIterator[]{ + new RangelessBitmapDocIdIterator(bitmap1), new RangelessBitmapDocIdIterator( + bitmap2), new RangelessBitmapDocIdIterator(bitmap3) + }); + NotDocIdIterator notDocIdIterator = new NotDocIdIterator(new RangelessBitmapDocIdIterator(bitmap1), 25); + + assertEquals(notDocIdIterator.advance(1), 2); + assertEquals(notDocIdIterator.next(), 3); + assertEquals(notDocIdIterator.next(), 5); + assertEquals(notDocIdIterator.advance(7), 8); Review comment: (major) Per the contract, this should return 7 ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/NotDocIdIterator.java ########## @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.dociditerators; + +import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.segment.spi.Constants; + + +/** + * The iterator performs a linear pass through the underlying child iterator and returns + * the complement of the result set. + */ +public class NotDocIdIterator implements BlockDocIdIterator { + private final BlockDocIdIterator _childDocIdIterator; + private final int _numDocs; + private int _nextDocId; + private int _nextNonMatchingDocId; + + public NotDocIdIterator(BlockDocIdIterator childDocIdIterator, int numDocs) { + _childDocIdIterator = childDocIdIterator; + _nextDocId = 0; + + int currentDocIdFromChildIterator = childDocIdIterator.next(); + _nextNonMatchingDocId = currentDocIdFromChildIterator == Constants.EOF ? numDocs : currentDocIdFromChildIterator; + _numDocs = numDocs; + } + + @Override + public int next() { + while (_nextDocId == _nextNonMatchingDocId) { + _nextDocId = _nextNonMatchingDocId + 1; + + int nextMatchingDocId = _childDocIdIterator.next(); + + if (nextMatchingDocId == Constants.EOF) { + _nextNonMatchingDocId = _numDocs; + } else { + _nextNonMatchingDocId = nextMatchingDocId; + } + } + + if (_nextDocId >= _numDocs) { + return Constants.EOF; + } + + return _nextDocId++; + } + + @Override + public int advance(int targetDocId) { + if (targetDocId == Constants.EOF || targetDocId > _numDocs) { + return Constants.EOF; + } + + if (targetDocId < _nextDocId) { + return _nextDocId; + } + + _nextDocId = targetDocId + 1; + + int upperLimit = findUpperLimitGreaterThanDocId(targetDocId); + + if (upperLimit == Constants.EOF) { + _nextNonMatchingDocId = _numDocs; + } else { + _nextNonMatchingDocId = upperLimit; + } + + return _nextDocId++; + } + + private int findUpperLimitGreaterThanDocId(int currentDocId) { + int result = _childDocIdIterator.advance(currentDocId); Review comment: (major) need to check `_nextNonMatchingDocId < currentDocId` before advancing the pointer ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/NotDocIdIterator.java ########## @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.dociditerators; + +import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.segment.spi.Constants; + + +/** + * The iterator performs a linear pass through the underlying child iterator and returns + * the complement of the result set. + */ +public class NotDocIdIterator implements BlockDocIdIterator { + private final BlockDocIdIterator _childDocIdIterator; + private final int _numDocs; + private int _nextDocId; + private int _nextNonMatchingDocId; + + public NotDocIdIterator(BlockDocIdIterator childDocIdIterator, int numDocs) { + _childDocIdIterator = childDocIdIterator; + _nextDocId = 0; + + int currentDocIdFromChildIterator = childDocIdIterator.next(); + _nextNonMatchingDocId = currentDocIdFromChildIterator == Constants.EOF ? numDocs : currentDocIdFromChildIterator; + _numDocs = numDocs; + } + + @Override + public int next() { + while (_nextDocId == _nextNonMatchingDocId) { + _nextDocId = _nextNonMatchingDocId + 1; + + int nextMatchingDocId = _childDocIdIterator.next(); + + if (nextMatchingDocId == Constants.EOF) { + _nextNonMatchingDocId = _numDocs; + } else { + _nextNonMatchingDocId = nextMatchingDocId; + } + } + + if (_nextDocId >= _numDocs) { + return Constants.EOF; + } + + return _nextDocId++; + } + + @Override + public int advance(int targetDocId) { + if (targetDocId == Constants.EOF || targetDocId > _numDocs) { + return Constants.EOF; + } + + if (targetDocId < _nextDocId) { + return _nextDocId; + } Review comment: These checks are redundant because by contract, `targetDocId >= _nextDocId && targetDocId < _numDocs` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/NotDocIdIterator.java ########## @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.dociditerators; + +import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.segment.spi.Constants; + + +/** + * The iterator performs a linear pass through the underlying child iterator and returns + * the complement of the result set. + */ +public class NotDocIdIterator implements BlockDocIdIterator { + private final BlockDocIdIterator _childDocIdIterator; + private final int _numDocs; + private int _nextDocId; + private int _nextNonMatchingDocId; + + public NotDocIdIterator(BlockDocIdIterator childDocIdIterator, int numDocs) { + _childDocIdIterator = childDocIdIterator; + _nextDocId = 0; + + int currentDocIdFromChildIterator = childDocIdIterator.next(); + _nextNonMatchingDocId = currentDocIdFromChildIterator == Constants.EOF ? numDocs : currentDocIdFromChildIterator; + _numDocs = numDocs; + } + + @Override + public int next() { + while (_nextDocId == _nextNonMatchingDocId) { + _nextDocId = _nextNonMatchingDocId + 1; + + int nextMatchingDocId = _childDocIdIterator.next(); + + if (nextMatchingDocId == Constants.EOF) { + _nextNonMatchingDocId = _numDocs; + } else { + _nextNonMatchingDocId = nextMatchingDocId; + } + } + + if (_nextDocId >= _numDocs) { + return Constants.EOF; + } + + return _nextDocId++; + } + + @Override + public int advance(int targetDocId) { + if (targetDocId == Constants.EOF || targetDocId > _numDocs) { + return Constants.EOF; + } + + if (targetDocId < _nextDocId) { + return _nextDocId; + } + + _nextDocId = targetDocId + 1; Review comment: (major) This will skip the `targetDocId` ########## File path: pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/NotDocIdIteratorTest.java ########## @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.dociditerators; + +import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.segment.spi.Constants; +import org.roaringbitmap.buffer.MutableRoaringBitmap; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + + +public class NotDocIdIteratorTest { + @Test + public void testNotDocIdIterator() { + // OR result: [0, 1, 2, 4, 5, 6, 8, 10, 13, 15, 16, 17, 18, 19, 20] + int[] docIds1 = new int[]{1, 4, 6, 10, 15, 17, 18, 20}; + int[] docIds2 = new int[]{0, 1, 5, 8, 15, 18}; + int[] docIds3 = new int[]{1, 2, 6, 13, 16, 19}; + int[] docIds4 = new int[]{0, 1, 2, 3, 4, 5}; + + MutableRoaringBitmap bitmap1 = new MutableRoaringBitmap(); + bitmap1.add(docIds1); + MutableRoaringBitmap bitmap2 = new MutableRoaringBitmap(); + bitmap2.add(docIds2); + MutableRoaringBitmap bitmap3 = new MutableRoaringBitmap(); + bitmap3.add(docIds3); + MutableRoaringBitmap bitmap4 = new MutableRoaringBitmap(); + bitmap4.add(docIds4); + OrDocIdIterator orDocIdIterator = new OrDocIdIterator(new BlockDocIdIterator[]{ + new RangelessBitmapDocIdIterator(bitmap1), new RangelessBitmapDocIdIterator( + bitmap2), new RangelessBitmapDocIdIterator(bitmap3) + }); + NotDocIdIterator notDocIdIterator = new NotDocIdIterator(new RangelessBitmapDocIdIterator(bitmap1), 25); + + assertEquals(notDocIdIterator.advance(1), 2); + assertEquals(notDocIdIterator.next(), 3); + assertEquals(notDocIdIterator.next(), 5); + assertEquals(notDocIdIterator.advance(7), 8); + assertEquals(notDocIdIterator.advance(13), 14); Review comment: (major) This should return 13, same for other places ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java ########## @@ -165,6 +179,9 @@ int getPriority(BaseFilterOperator filterOperator) { if (filterOperator instanceof OrFilterOperator) { return 4; } + if (filterOperator instanceof NotFilterOperator) { + return getPriority((BaseFilterOperator) filterOperator.getChildOperators().get(0)); Review comment: (minor) We can add a `getChildFilterOperator()` in `NotFilterOperator` which can save an unnecessary array allocation ########## File path: pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/NotDocIdIteratorTest.java ########## @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.dociditerators; + +import org.apache.pinot.core.common.BlockDocIdIterator; +import org.apache.pinot.segment.spi.Constants; +import org.roaringbitmap.buffer.MutableRoaringBitmap; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + + +public class NotDocIdIteratorTest { + @Test + public void testNotDocIdIterator() { + // OR result: [0, 1, 2, 4, 5, 6, 8, 10, 13, 15, 16, 17, 18, 19, 20] + int[] docIds1 = new int[]{1, 4, 6, 10, 15, 17, 18, 20}; + int[] docIds2 = new int[]{0, 1, 5, 8, 15, 18}; + int[] docIds3 = new int[]{1, 2, 6, 13, 16, 19}; + int[] docIds4 = new int[]{0, 1, 2, 3, 4, 5}; + + MutableRoaringBitmap bitmap1 = new MutableRoaringBitmap(); + bitmap1.add(docIds1); + MutableRoaringBitmap bitmap2 = new MutableRoaringBitmap(); + bitmap2.add(docIds2); + MutableRoaringBitmap bitmap3 = new MutableRoaringBitmap(); + bitmap3.add(docIds3); + MutableRoaringBitmap bitmap4 = new MutableRoaringBitmap(); + bitmap4.add(docIds4); + OrDocIdIterator orDocIdIterator = new OrDocIdIterator(new BlockDocIdIterator[]{ Review comment: Understood. What I'm saying is that we should directly set the `docIds` (similar to `docIds1`) instead of creating an `OrDocIdIterator` because that is more clear, and not mixing the scope of this test. ########## File path: pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java ########## @@ -0,0 +1,225 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.queries; + +import java.io.File; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.core.common.Operator; +import org.apache.pinot.core.operator.blocks.IntermediateResultsBlock; +import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; +import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; +import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; +import org.apache.pinot.segment.spi.ImmutableSegment; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.spi.config.table.FSTType; +import org.apache.pinot.spi.config.table.FieldConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.data.readers.GenericRow; +import org.apache.pinot.spi.data.readers.RecordReader; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +public class NotOperatorQueriesTest extends BaseQueriesTest { + private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "NotOperatorQueriesTest"); + private static final String TABLE_NAME = "MyTable"; + private static final String SEGMENT_NAME = "testSegment"; + private static final String FIRST_INT_COL_NAME = "FIRST_INT_COL"; + private static final String SECOND_INT_COL_NAME = "SECOND_INT_COL"; + private static final String DOMAIN_NAMES_COL = "DOMAIN_NAMES"; + private static final Integer INT_BASE_VALUE = 1000; + private static final Integer NUM_ROWS = 1024; + + private IndexSegment _indexSegment; + private List<IndexSegment> _indexSegments; + + @Override + protected String getFilter() { + return ""; + } + + @Override + protected IndexSegment getIndexSegment() { + return _indexSegment; + } + + @Override + protected List<IndexSegment> getIndexSegments() { + return _indexSegments; + } + + @BeforeClass + public void setUp() + throws Exception { + FileUtils.deleteQuietly(INDEX_DIR); + + List<IndexSegment> segments = new ArrayList<>(); + buildSegment(); + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); + Set<String> invertedIndexCols = new HashSet<>(); + invertedIndexCols.add(FIRST_INT_COL_NAME); + indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols); + Set<String> fstIndexCols = new HashSet<>(); + fstIndexCols.add(DOMAIN_NAMES_COL); + indexLoadingConfig.setFSTIndexColumns(fstIndexCols); + indexLoadingConfig.setFSTIndexType(FSTType.LUCENE); + ImmutableSegment segment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); + segments.add(segment); + + _indexSegment = segment; + _indexSegments = segments; + } + + private List<String> getDomainNames() { + return Arrays.asList("www.domain1.com", "www.domain1.co.ab", "www.domain1.co.bc", "www.domain1.co.cd", + "www.sd.domain1.com", "www.sd.domain1.co.ab", "www.sd.domain1.co.bc", "www.sd.domain1.co.cd", "www.domain2.com", + "www.domain2.co.ab", "www.domain2.co.bc", "www.domain2.co.cd", "www.sd.domain2.com", "www.sd.domain2.co.ab", + "www.sd.domain2.co.bc", "www.sd.domain2.co.cd"); + } + + private List<GenericRow> createTestData(int numRows) { + List<GenericRow> rows = new ArrayList<>(); + List<String> domainNames = getDomainNames(); + for (int i = 0; i < numRows; i++) { + String domain = domainNames.get(i % domainNames.size()); + GenericRow row = new GenericRow(); + row.putField(FIRST_INT_COL_NAME, i); + row.putField(SECOND_INT_COL_NAME, INT_BASE_VALUE + i); + row.putField(DOMAIN_NAMES_COL, domain); + rows.add(row); + } + return rows; + } + + private void buildSegment() + throws Exception { + List<GenericRow> rows = createTestData(NUM_ROWS); + List<FieldConfig> fieldConfigs = new ArrayList<>(); + fieldConfigs.add( + new FieldConfig(DOMAIN_NAMES_COL, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null, null)); + fieldConfigs.add( + new FieldConfig(FIRST_INT_COL_NAME, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.INVERTED, null, + null)); + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setInvertedIndexColumns(Arrays.asList(FIRST_INT_COL_NAME)).setFieldConfigList(fieldConfigs).build(); + Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addSingleValueDimension(FIRST_INT_COL_NAME, FieldSpec.DataType.INT) + .addSingleValueDimension(DOMAIN_NAMES_COL, FieldSpec.DataType.STRING) + .addMetric(SECOND_INT_COL_NAME, FieldSpec.DataType.INT).build(); + SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema); + config.setOutDir(INDEX_DIR.getPath()); + config.setTableName(TABLE_NAME); + config.setSegmentName(SEGMENT_NAME); + + SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); + try (RecordReader recordReader = new GenericRowRecordReader(rows)) { + driver.init(config, recordReader); + driver.build(); + } + } + + @AfterClass + public void tearDown() { + _indexSegment.destroy(); + FileUtils.deleteQuietly(INDEX_DIR); + } + + private void testSelectionResults(String query, int expectedResultSize, List<Serializable[]> expectedResults) { + Operator<IntermediateResultsBlock> operator = getOperatorForSqlQuery(query); + IntermediateResultsBlock operatorResult = operator.nextBlock(); + List<Object[]> resultset = (List<Object[]>) operatorResult.getSelectionResult(); + Assert.assertNotNull(resultset); + Assert.assertEquals(resultset.size(), expectedResultSize); + if (expectedResults != null) { + for (int i = 0; i < expectedResultSize; i++) { + Object[] actualRow = resultset.get(i); + Object[] expectedRow = expectedResults.get(i); + Assert.assertEquals(actualRow.length, expectedRow.length); + for (int j = 0; j < actualRow.length; j++) { + Object actualColValue = actualRow[j]; + Object expectedColValue = expectedRow[j]; + Assert.assertEquals(actualColValue, expectedColValue); + } + } + } + } + + @Test + public void testLikeBasedNotOperator() { + String query = + "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.domain1.*') LIMIT " + + "50000"; + testSelectionResults(query, 768, null); + + query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.sd.domain1.*') " + + "LIMIT 50000"; + testSelectionResults(query, 768, null); + + query = + "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain1.*') LIMIT " + + "50000"; + testSelectionResults(query, 512, null); + + query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain.*') LIMIT " + + "50000"; + testSelectionResults(query, 0, null); + + query = + "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, '.*com') LIMIT 50000"; + testSelectionResults(query, 768, null); + } + + @Test + public void testWeirdPredicates() { + String query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT FIRST_INT_COL = 5 LIMIT " + "50000"; Review comment: (minor) Remove the unnecessary `+`, same for other places ```suggestion String query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT FIRST_INT_COL = 5 LIMIT 50000"; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
