This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 1a701b7 Support NOT LIKE and NOT BETWEEN (#8331)
1a701b7 is described below
commit 1a701b7d52c5d571689d4b2f8159f8c0cdda0acd
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Wed Mar 9 19:30:08 2022 -0800
Support NOT LIKE and NOT BETWEEN (#8331)
---
.../apache/pinot/sql/parsers/CalciteSqlParser.java | 32 ++--
.../core/operator/filter/FilterOperatorUtils.java | 2 +-
.../core/operator/filter/NotFilterOperator.java | 2 +-
.../org/apache/pinot/queries/BaseQueriesTest.java | 9 --
.../pinot/queries/NotOperatorQueriesTest.java | 167 +++++++--------------
5 files changed, 78 insertions(+), 134 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
index 5c21ef0..11678f9 100644
---
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
+++
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
@@ -42,7 +42,9 @@ import org.apache.calcite.sql.SqlNumericLiteral;
import org.apache.calcite.sql.SqlOrderBy;
import org.apache.calcite.sql.SqlSelect;
import org.apache.calcite.sql.SqlSelectKeyword;
+import org.apache.calcite.sql.fun.SqlBetweenOperator;
import org.apache.calcite.sql.fun.SqlCase;
+import org.apache.calcite.sql.fun.SqlLikeOperator;
import org.apache.calcite.sql.parser.SqlParseException;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl;
@@ -55,6 +57,7 @@ import org.apache.pinot.common.request.ExpressionType;
import org.apache.pinot.common.request.Function;
import org.apache.pinot.common.request.PinotQuery;
import org.apache.pinot.common.utils.request.RequestUtils;
+import org.apache.pinot.pql.parsers.pql2.ast.FilterKind;
import org.apache.pinot.segment.spi.AggregationFunctionType;
import org.apache.pinot.spi.utils.Pairs;
import org.apache.pinot.sql.parsers.rewriter.QueryRewriter;
@@ -152,7 +155,7 @@ public class CalciteSqlParser {
if (isAggregateExpression(selectExpression)) {
aggregateExprCount++;
} else if (hasGroupByClause &&
expressionOutsideGroupByList(selectExpression, groupByExprs)) {
- throw new SqlCompilationException(
+ throw new SqlCompilationException(
"'" + RequestUtils.prettyPrint(selectExpression) + "' should
appear in GROUP BY clause.");
}
}
@@ -686,19 +689,21 @@ public class CalciteSqlParser {
private static Expression compileFunctionExpression(SqlBasicCall
functionNode) {
SqlKind functionKind = functionNode.getKind();
+ boolean negated = false;
String functionName;
switch (functionKind) {
case AND:
return compileAndExpression(functionNode);
case OR:
return compileOrExpression(functionNode);
- case COUNT:
- SqlLiteral functionQuantifier = functionNode.getFunctionQuantifier();
- if (functionQuantifier != null &&
functionQuantifier.toValue().equalsIgnoreCase("DISTINCT")) {
- functionName = AggregationFunctionType.DISTINCTCOUNT.name();
- } else {
- functionName = AggregationFunctionType.COUNT.name();
- }
+ // BETWEEN and LIKE might be negated (NOT BETWEEN, NOT LIKE)
+ case BETWEEN:
+ negated = ((SqlBetweenOperator)
functionNode.getOperator()).isNegated();
+ functionName = SqlKind.BETWEEN.name();
+ break;
+ case LIKE:
+ negated = ((SqlLikeOperator) functionNode.getOperator()).isNegated();
+ functionName = SqlKind.LIKE.name();
break;
case OTHER:
case OTHER_FUNCTION:
@@ -740,7 +745,16 @@ public class CalciteSqlParser {
validateFunction(functionName, operands);
Expression functionExpression =
RequestUtils.getFunctionExpression(functionName);
functionExpression.getFunctionCall().setOperands(operands);
- return functionExpression;
+ if (negated) {
+ Expression negatedFunctionExpression =
RequestUtils.getFunctionExpression(FilterKind.NOT.name());
+ // Do not use `Collections.singletonList()` because we might modify the
operand later
+ List<Expression> negatedFunctionOperands = new ArrayList<>(1);
+ negatedFunctionOperands.add(functionExpression);
+
negatedFunctionExpression.getFunctionCall().setOperands(negatedFunctionOperands);
+ return negatedFunctionExpression;
+ } else {
+ return functionExpression;
+ }
}
/**
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
index 4b6feb0..4fa91aa 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
@@ -180,7 +180,7 @@ public class FilterOperatorUtils {
return 4;
}
if (filterOperator instanceof NotFilterOperator) {
- return getPriority((NotFilterOperator) ((NotFilterOperator)
filterOperator).getChildFilterOperator());
+ return getPriority(((NotFilterOperator)
filterOperator).getChildFilterOperator());
}
if (filterOperator instanceof ScanBasedFilterOperator) {
return getScanBasedFilterPriority((ScanBasedFilterOperator)
filterOperator, 5, debugOptions);
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java
index 50a03b8..f1aeb50 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java
@@ -58,7 +58,7 @@ public class NotFilterOperator extends BaseFilterOperator {
return new FilterBlock(new
NotDocIdSet(_filterOperator.nextBlock().getBlockDocIdSet(), _numDocs));
}
- public Operator getChildFilterOperator() {
+ public BaseFilterOperator getChildFilterOperator() {
return _filterOperator;
}
}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java
index 0333985..e283978 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java
@@ -241,15 +241,6 @@ public abstract class BaseQueriesTest {
return brokerResponse;
}
- /**
- * Run optimized SQL query on multiple index segments.
- * <p>Use this to test the whole flow from server to broker.
- * <p>The result should be equivalent to querying 4 identical index segments.
- */
- protected BrokerResponseNative getBrokerResponseForOptimizedSqlQuery(String
sqlQuery, @Nullable Schema schema) {
- return getBrokerResponseForOptimizedSqlQuery(sqlQuery, null, schema,
PLAN_MAKER);
- }
-
protected BrokerResponseNative getBrokerResponseForOptimizedSqlQuery(String
sqlQuery, @Nullable TableConfig config,
@Nullable Schema schema) {
return getBrokerResponseForOptimizedSqlQuery(sqlQuery, config, schema,
PLAN_MAKER);
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java
index ca00dd5..ccb9626 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java
@@ -19,42 +19,38 @@
package org.apache.pinot.queries;
import java.io.File;
-import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.common.response.broker.BrokerResponseNative;
import org.apache.pinot.common.response.broker.ResultTable;
-import org.apache.pinot.core.common.Operator;
-import org.apache.pinot.core.operator.blocks.IntermediateResultsBlock;
+import org.apache.pinot.core.operator.query.AggregationOperator;
import
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
import
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
-import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
import org.apache.pinot.segment.spi.ImmutableSegment;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
-import org.apache.pinot.spi.config.table.FSTType;
-import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.data.readers.RecordReader;
+import org.apache.pinot.spi.utils.ReadMode;
import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
-import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+
public class NotOperatorQueriesTest extends BaseQueriesTest {
private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(),
"NotOperatorQueriesTest");
- private static final String TABLE_NAME = "MyTable";
+ private static final String TABLE_NAME = "testTable";
private static final String SEGMENT_NAME = "testSegment";
private static final String FIRST_INT_COL_NAME = "FIRST_INT_COL";
private static final String SECOND_INT_COL_NAME = "SECOND_INT_COL";
@@ -85,21 +81,10 @@ public class NotOperatorQueriesTest extends BaseQueriesTest
{
throws Exception {
FileUtils.deleteQuietly(INDEX_DIR);
- List<IndexSegment> segments = new ArrayList<>();
buildSegment();
- IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
- Set<String> invertedIndexCols = new HashSet<>();
- invertedIndexCols.add(FIRST_INT_COL_NAME);
- indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
- Set<String> fstIndexCols = new HashSet<>();
- fstIndexCols.add(DOMAIN_NAMES_COL);
- indexLoadingConfig.setFSTIndexColumns(fstIndexCols);
- indexLoadingConfig.setFSTIndexType(FSTType.LUCENE);
- ImmutableSegment segment = ImmutableSegmentLoader.load(new File(INDEX_DIR,
SEGMENT_NAME), indexLoadingConfig);
- segments.add(segment);
-
- _indexSegment = segment;
- _indexSegments = segments;
+ ImmutableSegment immutableSegment = ImmutableSegmentLoader.load(new
File(INDEX_DIR, SEGMENT_NAME), ReadMode.mmap);
+ _indexSegment = immutableSegment;
+ _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
}
private List<String> getDomainNames() {
@@ -109,15 +94,15 @@ public class NotOperatorQueriesTest extends
BaseQueriesTest {
"www.sd.domain2.co.bc", "www.sd.domain2.co.cd");
}
- private List<GenericRow> createTestData(int numRows) {
+ private List<GenericRow> createTestData() {
List<GenericRow> rows = new ArrayList<>();
List<String> domainNames = getDomainNames();
- for (int i = 0; i < numRows; i++) {
+ for (int i = 0; i < NUM_ROWS; i++) {
String domain = domainNames.get(i % domainNames.size());
GenericRow row = new GenericRow();
- row.putField(FIRST_INT_COL_NAME, i);
- row.putField(SECOND_INT_COL_NAME, INT_BASE_VALUE + i);
- row.putField(DOMAIN_NAMES_COL, domain);
+ row.putValue(FIRST_INT_COL_NAME, i);
+ row.putValue(SECOND_INT_COL_NAME, INT_BASE_VALUE + i);
+ row.putValue(DOMAIN_NAMES_COL, domain);
rows.add(row);
}
return rows;
@@ -125,15 +110,8 @@ public class NotOperatorQueriesTest extends
BaseQueriesTest {
private void buildSegment()
throws Exception {
- List<GenericRow> rows = createTestData(NUM_ROWS);
- List<FieldConfig> fieldConfigs = new ArrayList<>();
- fieldConfigs.add(
- new FieldConfig(DOMAIN_NAMES_COL, FieldConfig.EncodingType.DICTIONARY,
FieldConfig.IndexType.FST, null, null));
- fieldConfigs.add(
- new FieldConfig(FIRST_INT_COL_NAME,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.INVERTED, null,
- null));
- TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
-
.setInvertedIndexColumns(Arrays.asList(FIRST_INT_COL_NAME)).setFieldConfigList(fieldConfigs).build();
+ List<GenericRow> rows = createTestData();
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
.addSingleValueDimension(FIRST_INT_COL_NAME, FieldSpec.DataType.INT)
.addSingleValueDimension(DOMAIN_NAMES_COL, FieldSpec.DataType.STRING)
@@ -156,92 +134,53 @@ public class NotOperatorQueriesTest extends
BaseQueriesTest {
FileUtils.deleteQuietly(INDEX_DIR);
}
- private void testSelectionResults(String query, int expectedResultSize,
List<Serializable[]> expectedResults) {
- Operator<IntermediateResultsBlock> operator =
getOperatorForSqlQuery(query);
- IntermediateResultsBlock operatorResult = operator.nextBlock();
- List<Object[]> resultset = (List<Object[]>)
operatorResult.getSelectionResult();
- Assert.assertNotNull(resultset);
- Assert.assertEquals(resultset.size(), expectedResultSize);
- if (expectedResults != null) {
- for (int i = 0; i < expectedResultSize; i++) {
- Object[] actualRow = resultset.get(i);
- Object[] expectedRow = expectedResults.get(i);
- Assert.assertEquals(actualRow.length, expectedRow.length);
- for (int j = 0; j < actualRow.length; j++) {
- Object actualColValue = actualRow[j];
- Object expectedColValue = expectedRow[j];
- Assert.assertEquals(actualColValue, expectedColValue);
- }
- }
- }
- }
-
- private void testOptimizedQueryHelper(String query, int expectedResultSize,
List<Serializable[]> expectedResults) {
- BrokerResponseNative brokerResponseNative =
getBrokerResponseForOptimizedSqlQuery(query, null);
- ResultTable resultTable = brokerResponseNative.getResultTable();
- List<Object[]> results = resultTable.getRows();
- Assert.assertEquals(results.size(), expectedResultSize);
-
- if (expectedResults != null) {
- for (int i = 0; i < expectedResults.size(); i++) {
- Object[] actualRow = results.get(i);
- Serializable[] expectedRow = expectedResults.get(i);
- Assert.assertEquals(actualRow, expectedRow);
- }
- }
+ private void testNotOperator(String filter, long expectedSegmentResult) {
+ String query = "SELECT COUNT(*) FROM testTable WHERE " + filter;
+ AggregationOperator aggregationOperator = getOperatorForSqlQuery(query);
+ List<Object> segmentResults =
aggregationOperator.nextBlock().getAggregationResult();
+ assertNotNull(segmentResults);
+ assertEquals((long) segmentResults.get(0), expectedSegmentResult);
+
+ BrokerResponseNative brokerResponse = getBrokerResponseForSqlQuery(query);
+ ResultTable resultTable = brokerResponse.getResultTable();
+ Object[] brokerResults = resultTable.getRows().get(0);
+ assertEquals((long) brokerResults[0], 4 * expectedSegmentResult);
}
@Test
- public void testLikeBasedNotOperator() {
- String query =
- "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
REGEXP_LIKE(DOMAIN_NAMES, 'www.domain1.*') LIMIT "
- + "50000";
- testSelectionResults(query, 768, null);
- testOptimizedQueryHelper(query, 1536, null);
-
- query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
REGEXP_LIKE(DOMAIN_NAMES, 'www.sd.domain1.*') "
- + "LIMIT 50000";
- testSelectionResults(query, 768, null);
- testOptimizedQueryHelper(query, 1536, null);
-
- query =
- "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
REGEXP_LIKE(DOMAIN_NAMES, '.*domain1.*') LIMIT "
- + "50000";
- testSelectionResults(query, 512, null);
- testOptimizedQueryHelper(query, 1024, null);
-
- query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
REGEXP_LIKE(DOMAIN_NAMES, '.*domain.*') LIMIT "
- + "50000";
- testSelectionResults(query, 0, null);
- testOptimizedQueryHelper(query, 0, null);
-
- query =
- "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
REGEXP_LIKE(DOMAIN_NAMES, '.*com') LIMIT 50000";
- testSelectionResults(query, 768, null);
- testOptimizedQueryHelper(query, 1536, null);
+ public void testLikePredicates() {
+ testNotOperator("DOMAIN_NAMES NOT LIKE 'www.domain1%'", 768);
+ testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.domain1.*')", 768);
+
+ testNotOperator("DOMAIN_NAMES NOT LIKE 'www.sd.domain1%'", 768);
+ testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.sd.domain1.*')", 768);
+
+ testNotOperator("DOMAIN_NAMES NOT LIKE '%domain1%'", 512);
+ testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain1.*')", 512);
+
+ testNotOperator("DOMAIN_NAMES NOT LIKE '%domain%'", 0);
+ testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain.*')", 0);
+
+ testNotOperator("DOMAIN_NAMES NOT LIKE '%com'", 768);
+ testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*com')", 768);
}
@Test
- public void testWeirdPredicates() {
- String query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE
NOT FIRST_INT_COL = 5 LIMIT 50000";
- testSelectionResults(query, 1023, null);
+ public void testRangePredicates() {
+ testNotOperator("NOT FIRST_INT_COL = 5", 1023);
+ testNotOperator("NOT FIRST_INT_COL < 5", 1019);
+ testNotOperator("NOT FIRST_INT_COL > 5", 6);
- query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
FIRST_INT_COL < 5 LIMIT " + "50000";
- testSelectionResults(query, 1019, null);
-
- query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
FIRST_INT_COL > 5 LIMIT " + "50000";
- testSelectionResults(query, 6, null);
+ testNotOperator("FIRST_INT_COL NOT BETWEEN 10 AND 20", 1013);
+ testNotOperator("NOT FIRST_INT_COL BETWEEN 10 AND 20", 1013);
}
@Test
public void testCompositePredicates() {
- String query =
- "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
(FIRST_INT_COL > 5 AND SECOND_INT_COL < 1009) "
- + "LIMIT " + "50000";
- testSelectionResults(query, 1021, null);
-
- query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT
(FIRST_INT_COL < 5 OR SECOND_INT_COL > 2000) "
- + "LIMIT " + "50000";
- testSelectionResults(query, 996, null);
+ testNotOperator("NOT (FIRST_INT_COL > 5 AND SECOND_INT_COL < 1009)", 1021);
+ testNotOperator("NOT FIRST_INT_COL > 5 OR NOT SECOND_INT_COL < 1009",
1021);
+
+ testNotOperator("NOT (FIRST_INT_COL < 5 OR SECOND_INT_COL > 2000)", 996);
+ testNotOperator("NOT FIRST_INT_COL < 5 AND NOT SECOND_INT_COL > 2000",
996);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]