This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 22c81de538 Enhance range index rule and flag invalid queries (#8956)
22c81de538 is described below
commit 22c81de5383b19671213e8377445e1b3c79342ec
Author: Jia Guo <[email protected]>
AuthorDate: Wed Jun 22 23:55:10 2022 -0700
Enhance range index rule and flag invalid queries (#8956)
* Enhance range index rule and flag invalid queries
* Enhance range index rule and flag invalid queries
---
.../controller/recommender/io/InputManager.java | 17 +++
.../recommender/rules/impl/RangeIndexRule.java | 1 +
.../rules/io/params/RangeIndexRuleParams.java | 11 ++
.../rules/io/params/RecommenderConstants.java | 4 +
.../controller/recommender/TestConfigEngine.java | 14 ++
.../InvalidColumnInFilterInput.json | 170 +++++++++++++++++++++
6 files changed, 217 insertions(+)
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
index c6bbea84e4..fb74b85a48 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
@@ -66,6 +66,7 @@ import org.slf4j.LoggerFactory;
import static java.lang.Math.max;
import static
org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.*;
+import static
org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.FlagQueryRuleParams.ERROR_INVALID_COLUMN;
import static
org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.FlagQueryRuleParams.ERROR_INVALID_QUERY;
@@ -170,6 +171,22 @@ public class InputManager {
PinotQuery pinotQuery =
CalciteSqlParser.compileToPinotQuery(queryString);
_queryOptimizer.optimize(pinotQuery, _schema);
QueryContext queryContext =
QueryContextConverterUtils.getQueryContext(pinotQuery);
+
+ // Flag the queries having in filter columns not appear in schema
+ // to exclude user input like select i from tableName where a = xyz
and t > 500
+ Set<String> filterColumns = new HashSet<>();
+ if (queryContext.getFilter() != null) {
+ // get in filter column names, excluding literals, etc
+ queryContext.getFilter().getColumns(filterColumns);
+ // remove those appear in schema
+ filterColumns.removeAll(_colNameToIntMap.keySet());
+ // flag if there are columns left
+ if (!filterColumns.isEmpty()) {
+ invalidQueries.add(queryString);
+ _overWrittenConfigs.getFlaggedQueries().add(queryString,
ERROR_INVALID_COLUMN + filterColumns);
+ }
+ }
+
_parsedQueries.put(queryString,
Triple.of(_queryWeightMap.get(queryString),
CalciteSqlCompiler.convertToBrokerRequest(pinotQuery),
queryContext));
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java
index e393f5f4fe..17424923cb 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java
@@ -71,6 +71,7 @@ public class RangeIndexRule extends AbstractRule {
// As currently, only numeric columns are selected in range index
creation, we will skip non numeric columns
if (((weights[i] / totalWeight.get()) >
_params._thresholdMinPercentRangeIndex) && !_output.getIndexConfig()
.getSortedColumn().equals(colName) &&
!_output.getIndexConfig().getInvertedIndexColumns().contains(colName)
+ && _input.getCardinality(colName) >
_params._thresholdMinCardinalityRangeIndex
&& _input.getFieldType(colName).isNumeric()) {
_output.getIndexConfig().getRangeIndexColumns().add(colName);
}
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java
index 5831930982..da9abcb213 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java
@@ -28,6 +28,17 @@ import com.fasterxml.jackson.annotation.Nulls;
public class RangeIndexRuleParams {
public Double _thresholdMinPercentRangeIndex =
RecommenderConstants.RangeIndexRule.DEFAULT_THRESHOLD_MIN_PERCENT_RANGE_INDEX;
+ public Double _thresholdMinCardinalityRangeIndex =
+
RecommenderConstants.RangeIndexRule.DEFAULT_THRESHOLD_MIN_CARDINALITY_RANGE_INDEX;
+
+ public Double getThresholdMinCardinalityRangeIndex() {
+ return _thresholdMinCardinalityRangeIndex;
+ }
+
+ @JsonSetter(value = "THRESHOLD_MIN_CARDINALITY_RANGE_INDEX", nulls =
Nulls.SKIP)
+ public void setThresholdMinCardinalityRangeIndex(Double
thresholdMinCardinalityRangeIndex) {
+ _thresholdMinCardinalityRangeIndex = thresholdMinCardinalityRangeIndex;
+ }
public Double getThresholdMinPercentRangeIndex() {
return _thresholdMinPercentRangeIndex;
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java
index 05e3d0219a..2cda521399 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java
@@ -71,6 +71,7 @@ public class RecommenderConstants {
public static class RangeIndexRule {
public static final double DEFAULT_THRESHOLD_MIN_PERCENT_RANGE_INDEX = 0.4;
+ public static final double DEFAULT_THRESHOLD_MIN_CARDINALITY_RANGE_INDEX =
2;
}
public static class NoDictionaryOnHeapDictionaryJointRule {
@@ -94,6 +95,9 @@ public class RecommenderConstants {
public static final String WARNING_TOO_LONG_LIMIT =
"Warning: Please verify if you need to pull out huge number of records
for this query. Consider using smaller"
+ " limit than " + DEFAULT_THRESHOLD_MAX_LIMIT_SIZE;
+
+ public static final String ERROR_INVALID_COLUMN =
+ "ERROR: Query is filtering on columns not appearing in schema: ";
public static final String ERROR_INVALID_QUERY = "Error: Invalid query
syntax. Please fix the query";
}
diff --git
a/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
b/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
index 7f96d8378b..26d33570b0 100644
---
a/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
+++
b/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
@@ -132,6 +132,20 @@ public class TestConfigEngine {
assertEquals(output.getIndexConfig().getSortedColumn(), "c");
}
+ @Test
+ void testInvalidColumnInFilterRule()
+ throws InvalidInputException, IOException {
+ loadInput("recommenderInput/InvalidColumnInFilterInput.json");
+ ConfigManager output = new ConfigManager();
+ AbstractRule abstractRule =
+
RulesToExecute.RuleFactory.getRule(RulesToExecute.Rule.InvertedSortedIndexJointRule,
_input, output);
+ abstractRule.run();
+ assertEquals(output.getIndexConfig().getInvertedIndexColumns().toString(),
"[]");
+
assertEquals(_input.getOverWrittenConfigs().getFlaggedQueries().getFlaggedQueries().toString(),
+ "{select i from tableName where a = xyz and t > 500=ERROR: "
+ + "Query is filtering on columns not appearing in schema: [xyz]}");
+ }
+
@Test
void testSortedInvertedIndexJointRuleWithMetricAndDateTimeColumn()
throws InvalidInputException, IOException {
diff --git
a/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json
b/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json
new file mode 100644
index 0000000000..0e45462f10
--- /dev/null
+++
b/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json
@@ -0,0 +1,170 @@
+{
+ "schema":{
+ "schemaName": "tableSchema",
+ "dimensionFieldSpecs": [
+ {
+ "name": "a",
+ "dataType": "INT",
+ "cardinality":20,
+ "numValuesPerEntry":1
+ },
+ {
+ "name": "b",
+ "dataType": "DOUBLE",
+ "cardinality":6,
+ "singleValueField": false,
+ "numValuesPerEntry":1.5
+ },
+ {
+ "name": "c",
+ "dataType": "FLOAT",
+ "cardinality":7,
+ "numValuesPerEntry":1
+ },
+ {
+ "name": "d",
+ "dataType": "STRING",
+ "cardinality":41,
+ "singleValueField": false,
+ "numValuesPerEntry":2,
+ "averageLength" : 27
+ },
+ {
+ "name": "e",
+ "dataType": "LONG",
+ "cardinality":18,
+ "singleValueField": false,
+ "numValuesPerEntry":4
+ },
+ {
+ "name": "f",
+ "dataType": "DOUBLE",
+ "cardinality":13,
+ "singleValueField": false,
+ "numValuesPerEntry":3
+ },
+ {
+ "name": "g",
+ "dataType": "STRING",
+ "cardinality":6,
+ "singleValueField": false,
+ "numValuesPerEntry":2,
+ "averageLength" : 100
+ },
+ {
+ "name": "h",
+ "dataType": "BYTES",
+ "cardinality":12,
+ "numValuesPerEntry":1,
+ "averageLength" : 10
+ },
+ {
+ "name": "i",
+ "dataType": "STRING",
+ "singleValueField": false,
+ "cardinality":7,
+ "numValuesPerEntry":1,
+ "averageLength" : 25
+ },
+ {
+ "name": "j",
+ "dataType": "DOUBLE",
+ "cardinality":4,
+ "numValuesPerEntry":1.00000001
+ },
+ {
+ "name": "ja",
+ "dataType": "BOOLEAN"
+ },
+ {
+ "name": "jb",
+ "dataType": "BOOLEAN",
+ "numValuesPerEntry": 3
+ }
+ ],
+ "metricFieldSpecs": [
+ {
+ "name": "k",
+ "dataType": "DOUBLE",
+ "cardinality":10000,
+ "numValuesPerEntry":1,
+ "averageLength" : 100
+ },
+ {
+ "name": "l",
+ "dataType": "DOUBLE",
+ "cardinality":10000,
+ "numValuesPerEntry":1,
+ "averageLength" : 10
+ },
+ {
+ "name": "m",
+ "dataType": "BYTES",
+ "cardinality":10000,
+ "numValuesPerEntry":1,
+ "averageLength" : 25
+ },
+ {
+ "name": "n",
+ "dataType": "DOUBLE",
+ "cardinality":10000,
+ "numValuesPerEntry":1
+ },
+ {
+ "name": "o",
+ "dataType": "DOUBLE",
+ "cardinality":10000,
+ "numValuesPerEntry":1,
+ "averageLength" : 25
+ },
+ {
+ "name": "p",
+ "dataType": "DOUBLE",
+ "cardinality":10000,
+ "numValuesPerEntry":1
+ }
+ ],
+ "timeFieldSpec": {
+ "incomingGranularitySpec": {
+ "dataType": "INT",
+ "name": "t",
+ "timeType": "DAYS",
+ "cardinality":10000,
+ "numValuesPerEntry":1
+ }
+ }
+ },
+ "queriesWithWeights":{
+ "select i from tableName where a = xyz and t > 500": 1,
+ "select i from tableName where a = 43 and t > 500": 1,
+ "select i from tableName where a = 'xyz' and t > 500": 1,
+ "select i from tableName where a = b and t > 500": 1
+ },
+ "qps": 15000,
+ "numMessagesPerSecInKafkaTopic":1000,
+ "numRecordsPerPush":1000000000,
+ "tableType": "HYBRID",
+ "latencySLA": 500,
+
+ "rulesToExecute": {
+ "recommendInvertedSortedIndexJoint": true
+ },
+ "partitionRuleParams": {
+ "THRESHOLD_MAX_LATENCY_SLA_PARTITION": 1001
+ },
+ "bloomFilterRuleParams": {
+ "THRESHOLD_MIN_PERCENT_EQ_BLOOMFILTER" : 0.51
+ },
+ "invertedSortedIndexJointRuleParams": {
+ "THRESHOLD_RATIO_MIN_GAIN_DIFF_BETWEEN_ITERATION" : 0.06
+ },
+ "noDictionaryOnHeapDictionaryJointRuleParams": {
+ "THRESHOLD_MIN_PERCENT_DICTIONARY_STORAGE_SAVE" : 0.96
+ },
+ "overWrittenConfigs": {
+ "indexConfig": {
+ "invertedIndexColumns": ["a","b"],
+ "rangeIndexColumns": ["f"]
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]