This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 5c914f4786e51cc58c6659544541f9b230ec2066 Author: Ali Alsuliman <[email protected]> AuthorDate: Thu Nov 6 10:52:31 2025 -0800 [ASTERIXDB-3670][COMP] Fix ArrayIndexOutOfBoundsException with array index - user model changes: no - storage format changes: no - interface changes: no Ext-ref: MB-69201 Change-Id: Ic1f94be46ec12ad88422e455296cd20508b1f541 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20553 Tested-by: Ali Alsuliman <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> --- .../optimizer/rules/am/AccessMethodUtils.java | 3 + .../optimizer/rules/am/BTreeAccessMethod.java | 3 + .../optimizer/rules/am/RTreeAccessMethod.java | 4 +- .../use-case-5/query1.sqlpp | 41 +++++++++++++ .../use-case-5/query2.sqlpp | 42 +++++++++++++ .../use-case-5/query1.plan | 69 ++++++++++++++++++++++ .../use-case-5/query2.plan | 69 ++++++++++++++++++++++ 7 files changed, 230 insertions(+), 1 deletion(-) diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java index b66a0fd199..5669e63839 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java @@ -725,6 +725,9 @@ public class AccessMethodUtils { // We are optimizing a join query. Determine which variable feeds the secondary index. OptimizableOperatorSubTree opSubTree0 = optFuncExpr.getOperatorSubTree(0); int probeVarIndex = opSubTree0 == null || opSubTree0 == probeSubTree ? 0 : 1; + if (probeVarIndex >= optFuncExpr.getNumLogicalVars()) { + return null; + } LogicalVariable probeVar = optFuncExpr.getLogicalVar(probeVarIndex); VariableReferenceExpression probeExpr = new VariableReferenceExpression(probeVar); probeExpr.setSourceLocation(sourceLoc); diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java index 2a94359e1a..e6342c104d 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java @@ -413,6 +413,9 @@ public class BTreeAccessMethod implements IAccessMethod { Triple<ILogicalExpression, ILogicalExpression, Boolean> returnedSearchKeyExpr = AccessMethodUtils.createSearchKeyExpr(chosenIndex, optFuncExpr, indexedFieldType, probeSubTree, SEARCH_KEY_ROUNDING_FUNCTION_COMPUTER); + if (returnedSearchKeyExpr == null) { + return null; + } ILogicalExpression searchKeyExpr = returnedSearchKeyExpr.first; ILogicalExpression searchKeyEQExpr = null; boolean realTypeConvertedToIntegerType = returnedSearchKeyExpr.third; diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java index 3de78f7432..a20f4989c0 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java @@ -270,7 +270,9 @@ public class RTreeAccessMethod implements IAccessMethod { Triple<ILogicalExpression, ILogicalExpression, Boolean> returnedSearchKeyExpr = AccessMethodUtils.createSearchKeyExpr(chosenIndex, optFuncExpr, optFieldType, probeSubTree, SEARCH_KEY_ROUNDING_FUNCTION_PROVIDER); - + if (returnedSearchKeyExpr == null) { + return null; + } for (int i = 0; i < numSecondaryKeys; i++) { // The create MBR function "extracts" one field of an MBR around the given spatial object. AbstractFunctionCallExpression createMBR = diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/array-index/select-quantified-queries/use-case-5/query1.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/array-index/select-quantified-queries/use-case-5/query1.sqlpp new file mode 100644 index 0000000000..221b4a1717 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/array-index/select-quantified-queries/use-case-5/query1.sqlpp @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// testing select-quantified with JOIN on other fields and making sure the query does not fail +SET `compiler.arrayindex` "true"; + +DROP DATAVERSE TestYelp IF EXISTS; +CREATE DATAVERSE TestYelp; +USE TestYelp; + +CREATE TYPE OpenType AS { + id: int +}; + +CREATE DATASET sales(OpenType) PRIMARY KEY id; +CREATE DATASET temp (OpenType) PRIMARY KEY id; +CREATE INDEX idx_fv ON temp(UNNEST forecast SELECT fv : string) EXCLUDE UNKNOWN KEY; + +SELECT + sales.date, + sales.sid, + temp.forecast +FROM sales JOIN temp ON temp.x = sales.sid +WHERE (ANY ff IN temp.forecast SATISFIES ff.fv = "aaaa" END ) +LIMIT 10; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/array-index/select-quantified-queries/use-case-5/query2.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/array-index/select-quantified-queries/use-case-5/query2.sqlpp new file mode 100644 index 0000000000..2ea664d2e1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/array-index/select-quantified-queries/use-case-5/query2.sqlpp @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// testing select-quantified with JOIN on other fields and making sure the query does not fail. +// the array index is currently not used due to the JOIN. +SET `compiler.arrayindex` "true"; + +DROP DATAVERSE TestYelp IF EXISTS; +CREATE DATAVERSE TestYelp; +USE TestYelp; + +CREATE TYPE OpenType AS { + id: int +}; + +CREATE DATASET sales(OpenType) PRIMARY KEY id; +CREATE DATASET temp (OpenType) PRIMARY KEY id; +CREATE INDEX idx_fv ON temp(UNNEST forecast SELECT fv : string) EXCLUDE UNKNOWN KEY; + +SELECT + sales.date, + sales.sid, + temp.forecast +FROM sales JOIN temp ON temp.x = sales.sid +WHERE (ANY ff IN temp.forecast SATISFIES ff.fv /*+ indexnl */ = "aaaa" END ) +LIMIT 10; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/array-index/select-quantified-queries/use-case-5/query1.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/array-index/select-quantified-queries/use-case-5/query1.plan new file mode 100644 index 0000000000..44e3886609 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/array-index/select-quantified-queries/use-case-5/query1.plan @@ -0,0 +1,69 @@ +distribute result [$$44] +-- DISTRIBUTE_RESULT |UNPARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| + limit 10 + -- STREAM_LIMIT |UNPARTITIONED| + exchange + -- RANDOM_MERGE_EXCHANGE |PARTITIONED| + project ([$$44]) + -- STREAM_PROJECT |PARTITIONED| + assign [$$44] <- [{"date": $$51, "sid": $$45, "forecast": $$49}] + -- ASSIGN |PARTITIONED| + limit 10 + -- STREAM_LIMIT |PARTITIONED| + project ([$$51, $$45, $$49]) + -- STREAM_PROJECT |PARTITIONED| + select ($$39) + -- STREAM_SELECT |PARTITIONED| + subplan { + aggregate [$$39] <- [non-empty-stream()] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- AGGREGATE |LOCAL| + select (eq($$50, "aaaa")) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_SELECT |LOCAL| + assign [$$50] <- [$$ff.getField("fv")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |LOCAL| + unnest $$ff <- scan-collection($$49) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- UNNEST |LOCAL| + nested tuple source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- SUBPLAN |PARTITIONED| + project ([$$51, $$45, $$49]) + -- STREAM_PROJECT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + join (eq($$48, $$45)) + -- HYBRID_HASH_JOIN [$$45][$$48] |PARTITIONED| + exchange + -- HASH_PARTITION_EXCHANGE [$$45] |PARTITIONED| + project ([$$51, $$45]) + -- STREAM_PROJECT |PARTITIONED| + assign [$$51, $$45] <- [$$sales.getField("date"), $$sales.getField("sid")] + -- ASSIGN |PARTITIONED| + project ([$$sales]) + -- STREAM_PROJECT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$46, $$sales] <- TestYelp.sales + -- DATASOURCE_SCAN |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + exchange + -- HASH_PARTITION_EXCHANGE [$$48] |PARTITIONED| + project ([$$49, $$48]) + -- STREAM_PROJECT |PARTITIONED| + assign [$$49, $$48] <- [$$temp.getField("forecast"), $$temp.getField("x")] + -- ASSIGN |PARTITIONED| + project ([$$temp]) + -- STREAM_PROJECT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$47, $$temp] <- TestYelp.temp + -- DATASOURCE_SCAN |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source + -- EMPTY_TUPLE_SOURCE |PARTITIONED| \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/array-index/select-quantified-queries/use-case-5/query2.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/array-index/select-quantified-queries/use-case-5/query2.plan new file mode 100644 index 0000000000..44e3886609 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/array-index/select-quantified-queries/use-case-5/query2.plan @@ -0,0 +1,69 @@ +distribute result [$$44] +-- DISTRIBUTE_RESULT |UNPARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| + limit 10 + -- STREAM_LIMIT |UNPARTITIONED| + exchange + -- RANDOM_MERGE_EXCHANGE |PARTITIONED| + project ([$$44]) + -- STREAM_PROJECT |PARTITIONED| + assign [$$44] <- [{"date": $$51, "sid": $$45, "forecast": $$49}] + -- ASSIGN |PARTITIONED| + limit 10 + -- STREAM_LIMIT |PARTITIONED| + project ([$$51, $$45, $$49]) + -- STREAM_PROJECT |PARTITIONED| + select ($$39) + -- STREAM_SELECT |PARTITIONED| + subplan { + aggregate [$$39] <- [non-empty-stream()] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- AGGREGATE |LOCAL| + select (eq($$50, "aaaa")) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_SELECT |LOCAL| + assign [$$50] <- [$$ff.getField("fv")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |LOCAL| + unnest $$ff <- scan-collection($$49) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- UNNEST |LOCAL| + nested tuple source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- SUBPLAN |PARTITIONED| + project ([$$51, $$45, $$49]) + -- STREAM_PROJECT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + join (eq($$48, $$45)) + -- HYBRID_HASH_JOIN [$$45][$$48] |PARTITIONED| + exchange + -- HASH_PARTITION_EXCHANGE [$$45] |PARTITIONED| + project ([$$51, $$45]) + -- STREAM_PROJECT |PARTITIONED| + assign [$$51, $$45] <- [$$sales.getField("date"), $$sales.getField("sid")] + -- ASSIGN |PARTITIONED| + project ([$$sales]) + -- STREAM_PROJECT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$46, $$sales] <- TestYelp.sales + -- DATASOURCE_SCAN |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + exchange + -- HASH_PARTITION_EXCHANGE [$$48] |PARTITIONED| + project ([$$49, $$48]) + -- STREAM_PROJECT |PARTITIONED| + assign [$$49, $$48] <- [$$temp.getField("forecast"), $$temp.getField("x")] + -- ASSIGN |PARTITIONED| + project ([$$temp]) + -- STREAM_PROJECT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$47, $$temp] <- TestYelp.temp + -- DATASOURCE_SCAN |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source + -- EMPTY_TUPLE_SOURCE |PARTITIONED| \ No newline at end of file
