This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 1b85c6482e7def7b7a8e00421d05436fc2537f26
Author: Ritik Raj <[email protected]>
AuthorDate: Tue Jan 7 09:14:43 2025 +0530

    [ASTERIXDB-3540][COMP] Fixed calculation of expected schema for pushdown
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    if the getField expr consisted of a function which needs to be
    evaluated at runtime, the pushdown computer was not evaluating
    those expression leading to incorrect computation.
    eg:
    1. `field-access-by-name`(t.r.p, x.y.age_field)
    2. `field-access-by-name`(t.r.p, substring(x.y.age_field, 0, 4))
    
    Ext-ref: MB-64730
    Change-Id: Iac55527af143c292557158ca8e47e92538e93970
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19288
    Reviewed-by: Murtadha Hubail <[email protected]>
    Tested-by: Murtadha Hubail <[email protected]>
    Integration-Tests: Murtadha Hubail <[email protected]>
---
 asterixdb/NOTICE                                   |  2 +-
 .../rules/pushdown/ExpectedSchemaBuilder.java      | 67 ++++++++++++++++++++++
 .../asterix-app/data/hdfs/parquet/friends.json     |  1 +
 .../external_dataset/ExternalDatasetTestUtils.java |  1 +
 .../ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp     | 41 +++++++++++++
 .../ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp   | 26 +++++++++
 .../ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp   | 25 ++++++++
 .../parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan  |  1 +
 .../parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm   |  1 +
 .../runtimets/testsuite_external_dataset_s3.xml    |  6 ++
 hyracks-fullstack/NOTICE                           |  2 +-
 11 files changed, 171 insertions(+), 2 deletions(-)

diff --git a/asterixdb/NOTICE b/asterixdb/NOTICE
index 06d538de68..5118782978 100644
--- a/asterixdb/NOTICE
+++ b/asterixdb/NOTICE
@@ -1,5 +1,5 @@
 Apache AsterixDB
-Copyright 2015-2024 The Apache Software Foundation
+Copyright 2015-2025 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/ExpectedSchemaBuilder.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/ExpectedSchemaBuilder.java
index b7632db0e5..a9937d1af3 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/ExpectedSchemaBuilder.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/ExpectedSchemaBuilder.java
@@ -22,6 +22,7 @@ import static 
org.apache.asterix.optimizer.rules.pushdown.ExpressionValueAccessP
 import static 
org.apache.asterix.optimizer.rules.pushdown.ExpressionValueAccessPushdownVisitor.SUPPORTED_FUNCTIONS;
 
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.asterix.om.functions.BuiltinFunctions;
@@ -37,6 +38,7 @@ import 
org.apache.asterix.optimizer.rules.pushdown.schema.RootExpectedSchemaNode
 import 
org.apache.asterix.optimizer.rules.pushdown.schema.UnionExpectedSchemaNode;
 import org.apache.asterix.runtime.projection.DataProjectionInfo;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
+import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
@@ -72,6 +74,10 @@ class ExpectedSchemaBuilder {
     }
 
     public boolean setSchemaFromExpression(AbstractFunctionCallExpression 
expr, LogicalVariable producedVar) {
+        return buildExpectedSchemaNodes(expr, producedVar);
+    }
+
+    public boolean 
setSchemaFromCalculatedExpression(AbstractFunctionCallExpression expr, 
LogicalVariable producedVar) {
         //Parent always nested
         AbstractComplexExpectedSchemaNode parent = 
(AbstractComplexExpectedSchemaNode) buildNestedNode(expr);
         if (parent != null) {
@@ -111,6 +117,67 @@ class ExpectedSchemaBuilder {
         return !varToNode.isEmpty();
     }
 
+    private boolean buildExpectedSchemaNodes(ILogicalExpression expr, 
LogicalVariable producedVar) {
+        return buildNestedNodes(expr, producedVar);
+    }
+
+    private boolean buildNestedNodes(ILogicalExpression expr, LogicalVariable 
producedVar) {
+        //The current node expression
+        boolean changed = false;
+        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            return false;
+        }
+        AbstractFunctionCallExpression myExpr = 
(AbstractFunctionCallExpression) expr;
+        if (!SUPPORTED_FUNCTIONS.contains(myExpr.getFunctionIdentifier()) || 
noArgsOrFirstArgIsConstant(myExpr)) {
+            // Check if the function consists of the Supported Functions
+            for (Mutable<ILogicalExpression> arg : myExpr.getArguments()) {
+                changed |= buildNestedNodes(arg.getValue(), producedVar);
+            }
+            return changed;
+        }
+        // if the child is not a function expression, then just one node.
+        if (BuiltinFunctions.ARRAY_STAR.equals(myExpr.getFunctionIdentifier())
+                || 
BuiltinFunctions.SCAN_COLLECTION.equals(myExpr.getFunctionIdentifier())) {
+            // these supported function won't have second child
+            IExpectedSchemaNode expectedSchemaNode = buildNestedNode(expr);
+            if (expectedSchemaNode != null) {
+                changed |= 
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) expr, 
producedVar);
+            }
+        } else {
+            ILogicalExpression childExpr = 
myExpr.getArguments().get(1).getValue();
+            if (childExpr.getExpressionTag() != 
LogicalExpressionTag.FUNCTION_CALL) {
+                // must be a variable or constant
+                IExpectedSchemaNode expectedSchemaNode = buildNestedNode(expr);
+                if (expectedSchemaNode != null) {
+                    changed |= 
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) expr, 
producedVar);
+                }
+            } else {
+                // as the childExpr is a function.
+                // if the function had been evaluated at compile time, it 
would have been
+                // evaluated at this stage of compilation.
+                // eg: field-access(t.r.p, substring("name",2,4))
+                // this will be evaluated to field-access(t.r.p, "me") at 
compile time itself.
+                // since the execution reached this branch, this means the 
childExpr
+                // need to be evaluated at runtime, hence the childExpr should 
also be checked
+                // for possible pushdown.
+                // eg: field-access(t.r.p, substring(x.y.age_field, 0, 4))
+                ILogicalExpression parentExpr = 
myExpr.getArguments().get(0).getValue();
+                IExpectedSchemaNode parentExpectedNode = 
buildNestedNode(parentExpr);
+                if (parentExpectedNode != null) {
+                    changed |=
+                            
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) parentExpr, 
producedVar);
+                }
+                changed |= buildNestedNodes(childExpr, producedVar);
+            }
+        }
+        return changed;
+    }
+
+    private boolean noArgsOrFirstArgIsConstant(AbstractFunctionCallExpression 
myExpr) {
+        List<Mutable<ILogicalExpression>> args = myExpr.getArguments();
+        return args.isEmpty() || args.get(0).getValue().getExpressionTag() == 
LogicalExpressionTag.CONSTANT;
+    }
+
     private IExpectedSchemaNode buildNestedNode(ILogicalExpression expr) {
         //The current node expression
         AbstractFunctionCallExpression myExpr = 
(AbstractFunctionCallExpression) expr;
diff --git a/asterixdb/asterix-app/data/hdfs/parquet/friends.json 
b/asterixdb/asterix-app/data/hdfs/parquet/friends.json
new file mode 100644
index 0000000000..d708ad9a53
--- /dev/null
+++ b/asterixdb/asterix-app/data/hdfs/parquet/friends.json
@@ -0,0 +1 @@
+{ "id": "1", "name": "Monica", "x": { "y": { "age_field": "age" } }, "t": { 
"r": { "p": { "age": "26" } } } }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index 316d261e14..7963132494 100644
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -272,6 +272,7 @@ public class ExternalDatasetTestUtils {
         loadData(generatedDataBasePath, "", "heterogeneous_1.parquet", 
definition, definitionSegment, false, false);
         loadData(generatedDataBasePath, "", "heterogeneous_2.parquet", 
definition, definitionSegment, false, false);
         loadData(generatedDataBasePath, "", "parquetTypes.parquet", 
definition, definitionSegment, false, false);
+        loadData(generatedDataBasePath, "", "friends.parquet", definition, 
definitionSegment, false, false);
     }
 
     private static void loadData(String fileBasePath, String filePathSegment, 
String filename, String definition,
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
new file mode 100644
index 0000000000..a601a8db19
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description  : Field access pushdown
+* Expected Res : Success
+* Date         : June 22nd 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+  %template%,
+  ("container"="playground"),
+  ("definition"="parquet-data/reviews"),
+  ("include"="*friends.parquet"),
+  ("format" = "parquet")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
new file mode 100644
index 0000000000..e72d4121da
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+EXPLAIN
+SELECT t.r.g, `field-access-by-name`(t.r.p, x.y.age_field)
+FROM ParquetDataset;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
new file mode 100644
index 0000000000..d15ba8d766
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT t.r.g, `field-access-by-name`(t.r.p, x.y.age_field)
+FROM ParquetDataset;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
new file mode 100644
index 0000000000..4806a282ee
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
@@ -0,0 +1 @@
+"distribute result [$$24] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 
1000000.0]\n-- DISTRIBUTE_RESULT  |PARTITIONED|\n  exchange [cardinality: 
1000000.0, op-cost: 0.0, total-cost: 1000000.0]\n  -- ONE_TO_ONE_EXCHANGE  
|PARTITIONED|\n    project ([$$24]) [cardinality: 1000000.0, op-cost: 0.0, 
total-cost: 1000000.0]\n    -- STREAM_PROJECT  |PARTITIONED|\n      assign 
[$$24] <- [{\"g\": $$25.getField(\"g\"), \"$1\": 
$$25.getField(\"p\").getField(\"$$ParquetDataset.getField(\"x\").ge [...]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
new file mode 100644
index 0000000000..224633561b
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
@@ -0,0 +1 @@
+{ "$1": "26" }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 724298479e..723c1186d3 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -101,6 +101,12 @@
         <output-dir 
compare="Text">common/parquet/field-access-pushdown</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/parquet/ASTERIXDB-3540">
+        <placeholder name="adapter" value="S3" />
+        <output-dir 
compare="Clean-JSON">common/parquet/ASTERIXDB-3540</output-dir>
+      </compilation-unit>
+    </test-case>
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/parquet/array-access-pushdown">
         <placeholder name="adapter" value="S3" />
diff --git a/hyracks-fullstack/NOTICE b/hyracks-fullstack/NOTICE
index e9bb9a4535..722db88282 100644
--- a/hyracks-fullstack/NOTICE
+++ b/hyracks-fullstack/NOTICE
@@ -1,5 +1,5 @@
 Apache Hyracks and Algebricks
-Copyright 2015-2024 The Apache Software Foundation
+Copyright 2015-2025 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).

Reply via email to