This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit de0acbf578f9d29361a3c87e20a8f92a96005644
Merge: 474bef9c3b 0ef02534ac
Author: Michael Blow <[email protected]>
AuthorDate: Wed Jan 15 22:59:13 2025 -0500

    Merge branch 'gerrit/trinity' into 'gerrit/goldfish'
    
    Ext-ref: MB-64561
    Change-Id: If535113f5438a9d32420ffd1d35d44a24a217b1e

 asterixdb/NOTICE                                   |  2 +-
 .../pushdown/schema/ExpectedSchemaBuilder.java     | 75 ++++++++++++++++++++--
 .../asterix-app/data/hdfs/parquet/friends.json     |  1 +
 .../external_dataset/ExternalDatasetTestUtils.java |  1 +
 .../ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp     | 41 ++++++++++++
 .../ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp   | 26 ++++++++
 .../ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp   | 25 ++++++++
 .../query-ASTERIXDB-3538.1.ddl.sqlpp               | 67 +++++++++++++++++++
 .../query-ASTERIXDB-3538.2.update.sqlpp            | 31 +++++++++
 .../query-ASTERIXDB-3538.3.query.sqlpp             | 25 ++++++++
 .../query-ASTERIXDB-3538.4.query.sqlpp             | 25 ++++++++
 .../query-ASTERIXDB-3538.5.query.sqlpp             | 25 ++++++++
 .../query-ASTERIXDB-3538.6.query.sqlpp             | 24 +++++++
 .../parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan  |  1 +
 .../parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm   |  1 +
 .../query-ASTERIXDB-3538.3.plan                    | 24 +++++++
 .../query-ASTERIXDB-3538.4.adm                     |  8 +++
 .../query-ASTERIXDB-3538.5.plan                    | 41 ++++++++++++
 .../query-ASTERIXDB-3538.6.adm                     |  4 ++
 .../query-ASTERIXDB-3538.3.plan                    | 24 +++++++
 .../query-ASTERIXDB-3538.5.plan                    | 41 ++++++++++++
 .../src/test/resources/runtimets/sqlpp_queries.xml |  5 ++
 .../runtimets/testsuite_external_dataset_s3.xml    |  6 ++
 hyracks-fullstack/NOTICE                           |  2 +-
 .../rules/RemoveUnusedAssignAndAggregateRule.java  |  4 ++
 .../dataflow/common/utils/FrameDebugUtils.java     |  2 +-
 26 files changed, 523 insertions(+), 8 deletions(-)

diff --cc 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaBuilder.java
index e442d64ae9,0000000000..23da417f70
mode 100644,000000..100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaBuilder.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaBuilder.java
@@@ -1,237 -1,0 +1,302 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *   http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.asterix.optimizer.rules.pushdown.schema;
 +
 +import static org.apache.asterix.metadata.utils.PushdownUtil.ARRAY_FUNCTIONS;
 +import static 
org.apache.asterix.metadata.utils.PushdownUtil.SUPPORTED_FUNCTIONS;
 +
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.Map;
 +
 +import org.apache.asterix.metadata.utils.PushdownUtil;
 +import org.apache.asterix.om.functions.BuiltinFunctions;
 +import org.apache.commons.lang3.mutable.Mutable;
 +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 +import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
 +import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
 +import 
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 +import 
org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
 +import 
org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
 +import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
 +
 +/**
 + * This class takes a value access expression and produces an expected schema 
(given the expression).
 + * Example:
 + * - $$t.getField("hashtags").getItem(0)
 + * We expect:
 + * 1- $$t is OBJECT
 + * 2- the output type of getField("hashtags") is ARRAY
 + * 3- the output type of getItem(0) is ANY node
 + */
 +public class ExpectedSchemaBuilder {
 +    //Registered Variables
 +    private final Map<LogicalVariable, IExpectedSchemaNode> varToNode;
 +
 +    public ExpectedSchemaBuilder() {
 +        varToNode = new HashMap<>();
 +    }
 +
 +    public boolean setSchemaFromExpression(AbstractFunctionCallExpression 
expr, LogicalVariable producedVar,
 +            IVariableTypeEnvironment typeEnv) throws AlgebricksException {
++        return buildExpectedSchemaNodes(expr, producedVar, typeEnv);
++    }
++
++    public boolean 
setSchemaFromCalculatedExpression(AbstractFunctionCallExpression expr, 
LogicalVariable producedVar,
++            IVariableTypeEnvironment typeEnv) throws AlgebricksException {
 +        //Parent always nested
 +        AbstractComplexExpectedSchemaNode parent = 
(AbstractComplexExpectedSchemaNode) buildNestedNode(expr, typeEnv);
 +        if (parent != null) {
 +            IExpectedSchemaNode leaf = new AnyExpectedSchemaNode(parent, 
expr);
 +            IExpectedSchemaNode actualNode = addOrReplaceChild(expr, typeEnv, 
parent, leaf);
 +            if (producedVar != null) {
 +                //Register the node if a variable is produced
 +                varToNode.put(producedVar, actualNode);
 +            }
 +        }
 +
 +        return parent != null;
 +    }
 +
 +    public void registerRoot(LogicalVariable recordVar, 
RootExpectedSchemaNode rootNode) {
 +        varToNode.put(recordVar, rootNode);
 +    }
 +
 +    public void unregisterVariable(LogicalVariable variable) {
 +        //Remove the node so no other expression will pushdown any expression 
in the future
 +        IExpectedSchemaNode node = varToNode.remove(variable);
 +        AbstractComplexExpectedSchemaNode parent = node.getParent();
 +        if (parent == null) {
 +            //It is a root node. Request the entire record
 +            varToNode.put(variable, 
RootExpectedSchemaNode.ALL_FIELDS_ROOT_IRREPLACEABLE_NODE);
 +        } else {
 +            // If it is a nested node, replace it to a LEAF node
 +            // Both expressions are null as they're the node isn't used 
anymore and the node ANY is not replaceable
 +            AnyExpectedSchemaNode leafNode =
 +                    (AnyExpectedSchemaNode) 
node.replaceIfNeeded(ExpectedSchemaNodeType.ANY, null, null);
 +            // make the leaf node irreplaceable
 +            leafNode.preventReplacing();
 +            varToNode.put(variable, leafNode);
 +        }
 +    }
 +
 +    public boolean isVariableRegistered(LogicalVariable variable) {
 +        return varToNode.containsKey(variable);
 +    }
 +
 +    public boolean isEmpty() {
 +        return varToNode.isEmpty();
 +    }
 +
 +    public IExpectedSchemaNode getNode(LogicalVariable variable) {
 +        return varToNode.get(variable);
 +    }
 +
++    private boolean buildExpectedSchemaNodes(ILogicalExpression expr, 
LogicalVariable producedVar,
++            IVariableTypeEnvironment typeEnv) throws AlgebricksException {
++        return buildNestedNodes(expr, producedVar, typeEnv);
++    }
++
++    private boolean buildNestedNodes(ILogicalExpression expr, LogicalVariable 
producedVar,
++            IVariableTypeEnvironment typeEnv) throws AlgebricksException {
++        //The current node expression
++        boolean changed = false;
++        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
++            return false;
++        }
++        AbstractFunctionCallExpression myExpr = 
(AbstractFunctionCallExpression) expr;
++        if (!SUPPORTED_FUNCTIONS.contains(myExpr.getFunctionIdentifier()) || 
noArgsOrFirstArgIsConstant(myExpr)) {
++            // Check if the function consists of the Supported Functions
++            for (Mutable<ILogicalExpression> arg : myExpr.getArguments()) {
++                changed |= buildNestedNodes(arg.getValue(), producedVar, 
typeEnv);
++            }
++            return changed;
++        }
++        // if the child is not a function expression, then just one node.
++        if (BuiltinFunctions.ARRAY_STAR.equals(myExpr.getFunctionIdentifier())
++                || 
BuiltinFunctions.SCAN_COLLECTION.equals(myExpr.getFunctionIdentifier())) {
++            // these supported function won't have second child
++            IExpectedSchemaNode expectedSchemaNode = buildNestedNode(expr, 
typeEnv);
++            if (expectedSchemaNode != null) {
++                changed |=
++                        
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) expr, 
producedVar, typeEnv);
++            }
++        } else {
++            ILogicalExpression childExpr = 
myExpr.getArguments().get(1).getValue();
++            if (childExpr.getExpressionTag() != 
LogicalExpressionTag.FUNCTION_CALL) {
++                // must be a variable or constant
++                IExpectedSchemaNode expectedSchemaNode = 
buildNestedNode(expr, typeEnv);
++                if (expectedSchemaNode != null) {
++                    changed |= 
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) expr, 
producedVar,
++                            typeEnv);
++                }
++            } else {
++                // as the childExpr is a function.
++                // if the function had been evaluated at compile time, it 
would have been
++                // evaluated at this stage of compilation.
++                // eg: field-access(t.r.p, substring("name",2,4))
++                // this will be evaluated to field-access(t.r.p, "me") at 
compile time itself.
++                // since the execution reached this branch, this means the 
childExpr
++                // need to be evaluated at runtime, hence the childExpr 
should also be checked
++                // for possible pushdown.
++                // eg: field-access(t.r.p, substring(x.y.age_field, 0, 4))
++                ILogicalExpression parentExpr = 
myExpr.getArguments().get(0).getValue();
++                IExpectedSchemaNode parentExpectedNode = 
buildNestedNode(parentExpr, typeEnv);
++                if (parentExpectedNode != null) {
++                    changed |= 
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) parentExpr,
++                            producedVar, typeEnv);
++                }
++                changed |= buildNestedNodes(childExpr, producedVar, typeEnv);
++            }
++        }
++        return changed;
++    }
++
++    private boolean noArgsOrFirstArgIsConstant(AbstractFunctionCallExpression 
myExpr) {
++        List<Mutable<ILogicalExpression>> args = myExpr.getArguments();
++        return args.isEmpty() || args.get(0).getValue().getExpressionTag() == 
LogicalExpressionTag.CONSTANT;
++    }
++
 +    private IExpectedSchemaNode buildNestedNode(ILogicalExpression expr, 
IVariableTypeEnvironment typeEnv)
 +            throws AlgebricksException {
 +        //The current node expression
 +        AbstractFunctionCallExpression myExpr = 
(AbstractFunctionCallExpression) expr;
 +        if (!SUPPORTED_FUNCTIONS.contains(myExpr.getFunctionIdentifier()) || 
noArgsOrFirstArgIsConstant(myExpr)) {
 +            //Return null if the function is not supported.
 +            return null;
 +        }
 +
 +        //The parent expression
 +        ILogicalExpression parentExpr = 
myExpr.getArguments().get(0).getValue();
 +        if (isVariable(parentExpr)) {
 +            //A variable could be the record's originated from data-scan or 
an expression from assign
 +            LogicalVariable sourceVar = 
VariableUtilities.getVariable(parentExpr);
 +            return changeNodeForVariable(sourceVar, myExpr, myExpr);
 +        }
 +
 +        //Recursively create the parent nodes. Parent is always a nested node
 +        AbstractComplexExpectedSchemaNode newParent =
 +                (AbstractComplexExpectedSchemaNode) 
buildNestedNode(parentExpr, typeEnv);
 +        //newParent could be null if the expression is not supported
 +        if (newParent != null) {
 +            //Parent expression must be a function call (as parent is a 
nested node)
 +            AbstractFunctionCallExpression parentFuncExpr = 
(AbstractFunctionCallExpression) parentExpr;
 +            //Get 'myType' as we will create the child type of the newParent
 +            ExpectedSchemaNodeType myType = getExpectedNestedNodeType(myExpr);
 +            /*
 +             * Create 'myNode'. It is a nested node because the function is 
either getField() or a supported array
 +             * function
 +             */
 +            AbstractComplexExpectedSchemaNode myNode =
 +                    
AbstractComplexExpectedSchemaNode.createNestedNode(myType, newParent, 
parentFuncExpr, myExpr);
 +            // Add (or replace old child with) myNode to the parent
 +            return addOrReplaceChild(parentFuncExpr, typeEnv, newParent, 
myNode);
 +        }
 +        return null;
 +    }
 +
-     private boolean noArgsOrFirstArgIsConstant(AbstractFunctionCallExpression 
myExpr) {
-         List<Mutable<ILogicalExpression>> args = myExpr.getArguments();
-         return args.isEmpty() || args.get(0).getValue().getExpressionTag() == 
LogicalExpressionTag.CONSTANT;
-     }
- 
 +    private IExpectedSchemaNode changeNodeForVariable(LogicalVariable 
sourceVar,
 +            AbstractFunctionCallExpression parentExpression, 
ILogicalExpression expression) {
 +        //Get the associated node with the sourceVar (if any)
 +        IExpectedSchemaNode oldNode = varToNode.get(sourceVar);
 +        if (oldNode == null || !oldNode.allowsReplacing()) {
 +            // Variable is not associated with a node. No pushdown is possible
 +            // Or its associated node cannot be replaced
 +            return null;
 +        }
 +        //What is the expected type of the variable
 +        ExpectedSchemaNodeType varExpectedType = 
getExpectedNestedNodeType(parentExpression);
 +        // Get the node associated with the variable (or change its type if 
needed).
 +        IExpectedSchemaNode newNode = 
oldNode.replaceIfNeeded(varExpectedType, parentExpression, expression);
 +        //Map the sourceVar to the node
 +        varToNode.put(sourceVar, newNode);
 +        return newNode;
 +    }
 +
 +    public static IExpectedSchemaNode 
addOrReplaceChild(AbstractFunctionCallExpression parentExpr,
 +            IVariableTypeEnvironment typeEnv, 
AbstractComplexExpectedSchemaNode parent, IExpectedSchemaNode child)
 +            throws AlgebricksException {
 +        switch (parent.getType()) {
 +            case OBJECT:
 +                return handleObject(parentExpr, typeEnv, parent, child);
 +            case ARRAY:
 +                return handleArray(parent, child);
 +            case UNION:
 +                return handleUnion(parentExpr, parent, child);
 +            default:
 +                throw new IllegalStateException("Node " + parent.getType() + 
" is not nested");
 +
 +        }
 +    }
 +
 +    public static ExpectedSchemaNodeType 
getExpectedNestedNodeType(AbstractFunctionCallExpression funcExpr) {
 +        FunctionIdentifier fid = funcExpr.getFunctionIdentifier();
 +        if (BuiltinFunctions.FIELD_ACCESS_BY_NAME.equals(fid) || 
BuiltinFunctions.FIELD_ACCESS_BY_INDEX.equals(fid)) {
 +            return ExpectedSchemaNodeType.OBJECT;
 +        } else if (ARRAY_FUNCTIONS.contains(fid)) {
 +            return ExpectedSchemaNodeType.ARRAY;
 +        }
 +        throw new IllegalStateException("Function " + fid + " should not be 
pushed down");
 +    }
 +
 +    private static IExpectedSchemaNode 
handleObject(AbstractFunctionCallExpression parentExpr,
 +            IVariableTypeEnvironment typeEnv, 
AbstractComplexExpectedSchemaNode parent, IExpectedSchemaNode child)
 +            throws AlgebricksException {
 +        int fieldNameId = PushdownUtil.getFieldNameId(parentExpr);
 +        String fieldName = PushdownUtil.getFieldName(parentExpr, typeEnv);
 +        ObjectExpectedSchemaNode objectNode = (ObjectExpectedSchemaNode) 
parent;
 +        IExpectedSchemaNode actualChild = 
objectNode.getChildren().get(fieldName);
 +        if (actualChild == null) {
 +            objectNode.addChild(fieldName, fieldNameId, child);
 +            actualChild = child;
 +        } else {
 +            actualChild = objectNode.replaceChild(actualChild, child);
 +        }
 +
 +        return actualChild;
 +    }
 +
 +    private static IExpectedSchemaNode 
handleArray(AbstractComplexExpectedSchemaNode parent,
 +            IExpectedSchemaNode child) {
 +        ArrayExpectedSchemaNode arrayNode = (ArrayExpectedSchemaNode) parent;
 +        IExpectedSchemaNode actualChild = arrayNode.getChild();
 +        if (actualChild == null) {
 +            arrayNode.addChild(child);
 +            actualChild = child;
 +        } else {
 +            actualChild = arrayNode.replaceChild(actualChild, child);
 +        }
 +
 +        return actualChild;
 +    }
 +
 +    private static IExpectedSchemaNode 
handleUnion(AbstractFunctionCallExpression parentExpr,
 +            AbstractComplexExpectedSchemaNode parent, IExpectedSchemaNode 
child) throws AlgebricksException {
 +        UnionExpectedSchemaNode unionNode = (UnionExpectedSchemaNode) parent;
 +        ExpectedSchemaNodeType parentType = 
getExpectedNestedNodeType(parentExpr);
 +        AbstractComplexExpectedSchemaNode actualParent = 
unionNode.getChild(parentType);
 +        child.setParent(actualParent);
 +        return addOrReplaceChild(parentExpr, null, actualParent, child);
 +    }
 +
 +    private static boolean isVariable(ILogicalExpression expr) {
 +        return expr.getExpressionTag() == LogicalExpressionTag.VARIABLE;
 +    }
 +}
diff --cc 
asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index c50b391fa2,7963132494..9e63c446d8
--- 
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ 
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@@ -395,81 -272,7 +395,82 @@@ public class ExternalDatasetTestUtils 
          loadData(generatedDataBasePath, "", "heterogeneous_1.parquet", 
definition, definitionSegment, false, false);
          loadData(generatedDataBasePath, "", "heterogeneous_2.parquet", 
definition, definitionSegment, false, false);
          loadData(generatedDataBasePath, "", "parquetTypes.parquet", 
definition, definitionSegment, false, false);
+         loadData(generatedDataBasePath, "", "friends.parquet", definition, 
definitionSegment, false, false);
 +
 +        Collection<File> files =
 +                IoUtil.getMatchingFiles(Paths.get(generatedDataBasePath + 
"/external-filter"), PARQUET_FILTER);
 +        for (File file : files) {
 +            String fileName = file.getName();
 +            String externalFilterDefinition = 
file.getParent().substring(generatedDataBasePath.length() + 1) + "/";
 +            loadData(file.getParent(), "", fileName, "parquet-data/" + 
externalFilterDefinition, "", false, false);
 +        }
 +    }
 +
 +    private static void loadAvroFiles() {
 +        String generatedDataBasePath = AVRO_GEN_BASEDIR;
 +        String definition = AVRO_DEFINITION;
 +        String definitionSegment = "";
 +
 +        loadData(generatedDataBasePath, "", "dummy_tweet.avro", definition, 
definitionSegment, false, false);
 +        loadData(generatedDataBasePath, "", "id_age.avro", definition, 
definitionSegment, false, false);
 +        loadData(generatedDataBasePath, "", "id_age-string.avro", definition, 
definitionSegment, false, false);
 +        loadData(generatedDataBasePath, "", "id_name.avro", definition, 
definitionSegment, false, false);
 +        loadData(generatedDataBasePath, "", "id_name_comment.avro", 
definition, definitionSegment, false, false);
 +        loadData(generatedDataBasePath, "", "heterogeneous_1.avro", 
definition, definitionSegment, false, false);
 +        loadData(generatedDataBasePath, "", "heterogeneous_2.avro", 
definition, definitionSegment, false, false);
 +        loadData(generatedDataBasePath, "", "avro_type.avro", definition, 
definitionSegment, false, false);
 +        loadData(generatedDataBasePath, "", "partition_heterogeneous.avro", 
definition, definitionSegment, false,
 +                false);
 +
 +        Collection<File> files =
 +                IoUtil.getMatchingFiles(Paths.get(generatedDataBasePath + 
"/external-filter"), AVRO_FILTER);
 +        for (File file : files) {
 +            String fileName = file.getName();
 +            String externalFilterDefinition = 
file.getParent().substring(generatedDataBasePath.length() + 1) + "/";
 +            loadData(file.getParent(), "", fileName, "avro-data/" + 
externalFilterDefinition, "", false, false);
 +        }
 +    }
 +
 +    private static void loadDeltaTableFiles() {
 +        String generatedDataBasePath = DELTA_GEN_BASEDIR;
 +        loadDeltaDirectory(generatedDataBasePath, "/empty_delta_table", 
PARQUET_FILTER, "delta-data/");
 +        loadDeltaDirectory(generatedDataBasePath, 
"/empty_delta_table/_delta_log", JSON_FILTER, "delta-data/");
 +        loadDeltaDirectory(generatedDataBasePath, "/modified_delta_table", 
PARQUET_FILTER, "delta-data/");
 +        loadDeltaDirectory(generatedDataBasePath, 
"/modified_delta_table/_delta_log", JSON_FILTER, "delta-data/");
 +        loadDeltaDirectory(generatedDataBasePath, 
"/multiple_file_delta_table", PARQUET_FILTER, "delta-data/");
 +        loadDeltaDirectory(generatedDataBasePath, 
"/multiple_file_delta_table/_delta_log", JSON_FILTER, "delta-data/");
 +        loadDeltaDirectory(generatedDataBasePath, 
"/delta_all_type/_delta_log", JSON_FILTER, "delta-data/");
 +        loadDeltaDirectory(generatedDataBasePath, "/delta_all_type", 
PARQUET_FILTER, "delta-data/");
 +    }
 +
 +    private static void loadDeltaDirectory(String dataBasePath, String 
rootPath, FilenameFilter filter,
 +            String definitionPart) {
 +        Collection<File> files = 
IoUtil.getMatchingFiles(Paths.get(dataBasePath + rootPath), filter);
 +        for (File file : files) {
 +            String fileName = file.getName();
 +            String externalFilterDefinition = 
file.getParent().substring(dataBasePath.length() + 1) + "/";
 +            loadData(file.getParent(), "", fileName, definitionPart + 
externalFilterDefinition, "", false, false);
 +        }
 +    }
 +
 +    private static void loadDirectory(String dataBasePath, String rootPath, 
FilenameFilter filter) {
 +        File dir = new File(dataBasePath, rootPath);
 +        if (!dir.exists() || !dir.isDirectory()) {
 +            return;
 +        }
 +
 +        Collection<File> files = IoUtil.getMatchingFiles(dir.toPath(), 
filter);
 +        int size = 0;
 +        for (File file : files) {
 +            String path = file.getPath();
 +            // +1 to remove the leading '/'
 +            int startIndex = path.indexOf(rootPath) + rootPath.length() + 1;
 +            int endIndex = path.lastIndexOf(File.separatorChar);
 +            String definitionSegment = rootPath + File.separator + 
path.substring(startIndex, endIndex);
 +            loadData(path.substring(0, endIndex), "", file.getName(), "", 
definitionSegment, false, false);
 +            size++;
 +        }
 +        LOGGER.info("Loaded {} files from {}", size, dataBasePath + 
File.separator + rootPath);
      }
  
      private static void loadData(String fileBasePath, String filePathSegment, 
String filename, String definition,
diff --cc 
asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.3.plan
index 0000000000,c5f8436831..1f2126c5c8
mode 000000,100644..100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.3.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.3.plan
@@@ -1,0 -1,24 +1,24 @@@
 -distribute result [$$297] [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
++distribute result [$$311] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- DISTRIBUTE_RESULT  |PARTITIONED|
 -  exchange [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
++  exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
 -    project ([$$297]) [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
++    project ([$$311]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+     -- STREAM_PROJECT  |PARTITIONED|
 -      assign [$$297] <- [{"id": $$331}] [cardinality: 2.1, op-cost: 0.0, 
total-cost: 2.1]
++      assign [$$311] <- [{"id": $$345}] [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
+       -- ASSIGN  |PARTITIONED|
 -        select (not(is-null($$331))) [cardinality: 2.1, op-cost: 0.0, 
total-cost: 2.1]
++        select (not(is-null($$345))) [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
+         -- STREAM_SELECT  |PARTITIONED|
 -          project ([$$331]) [cardinality: 1000000.0, op-cost: 0.0, 
total-cost: 2.1]
++          project ([$$345]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+           -- STREAM_PROJECT  |PARTITIONED|
 -            assign [$$331] <- [string-default-null($$s.getField("id"))] 
[cardinality: 1000000.0, op-cost: 0.0, total-cost: 2.1]
++            assign [$$345] <- [string-default-null($$s.getField("id"))] 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+             -- ASSIGN  |PARTITIONED|
 -              project ([$$s]) [cardinality: 1000000.0, op-cost: 0.0, 
total-cost: 2.1]
++              project ([$$s]) [cardinality: 0.0, op-cost: 0.0, total-cost: 
0.0]
+               -- STREAM_PROJECT  |PARTITIONED|
 -                exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 
2.1]
++                exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
 -                  data-scan []<-[$$300, $$s] <- test.dat1 [cardinality: 
1000000.0, op-cost: 2.1, total-cost: 2.1]
++                  data-scan []<-[$$314, $$s] <- test.dat1 [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
+                   -- DATASOURCE_SCAN  |PARTITIONED|
+                     exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                       empty-tuple-source [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
+                       -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --cc 
asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.5.plan
index 0000000000,302961126a..2f074b771b
mode 000000,100644..100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.5.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.5.plan
@@@ -1,0 -1,41 +1,41 @@@
 -distribute result [$$303] [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
++distribute result [$$317] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- DISTRIBUTE_RESULT  |PARTITIONED|
 -  exchange [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
++  exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
 -    project ([$$303]) [cardinality: 2.1, op-cost: 0.0, total-cost: 2.1]
++    project ([$$317]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+     -- STREAM_PROJECT  |PARTITIONED|
 -      assign [$$303] <- [{"dat3": {"id": $$337, "a": 
string-default-null($$331), "d": int64-default-null($$304)}}] [cardinality: 
2.1, op-cost: 0.0, total-cost: 2.1]
++      assign [$$317] <- [{"dat3": {"id": $$351, "a": 
string-default-null($$345), "d": int64-default-null($$318)}}] [cardinality: 
0.0, op-cost: 0.0, total-cost: 0.0]
+       -- ASSIGN  |PARTITIONED|
 -        project ([$$337, $$331, $$304]) [cardinality: 2.1, op-cost: 0.0, 
total-cost: 2.1]
++        project ([$$351, $$345, $$318]) [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
+         -- STREAM_PROJECT  |PARTITIONED|
 -          select (le($$304, get-item($$281, 0))) [cardinality: 2.1, op-cost: 
0.0, total-cost: 2.1]
++          select (le($$318, get-item($$295, 0))) [cardinality: 0.0, op-cost: 
0.0, total-cost: 0.0]
+           -- STREAM_SELECT  |PARTITIONED|
 -            project ([$$337, $$331, $$304, $$281]) [cardinality: 1000000.0, 
op-cost: 0.0, total-cost: 2.1]
++            project ([$$351, $$345, $$318, $$295]) [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
+             -- STREAM_PROJECT  |PARTITIONED|
+               subplan {
 -                        aggregate [$$281] <- [listify($$316)] [cardinality: 
0.0, op-cost: 0.0, total-cost: 0.0]
++                        aggregate [$$295] <- [listify($$330)] [cardinality: 
0.0, op-cost: 0.0, total-cost: 0.0]
+                         -- AGGREGATE  |LOCAL|
 -                          aggregate [$$316] <- [agg-sql-count(1)] 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
++                          aggregate [$$330] <- [agg-sql-count(1)] 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                           -- AGGREGATE  |LOCAL|
 -                            unnest $$319 <- scan-collection($$329) 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
++                            unnest $$333 <- scan-collection($$343) 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                             -- UNNEST  |LOCAL|
+                               nested tuple source [cardinality: 0.0, op-cost: 
0.0, total-cost: 0.0]
+                               -- NESTED_TUPLE_SOURCE  |LOCAL|
 -                     } [cardinality: 1000000.0, op-cost: 0.0, total-cost: 2.1]
++                     } [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+               -- SUBPLAN  |PARTITIONED|
 -                select (not(is-null($$337))) [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
++                select (not(is-null($$351))) [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
+                 -- STREAM_SELECT  |PARTITIONED|
 -                  project ([$$337, $$331, $$304, $$329]) [cardinality: 
1000000.0, op-cost: 0.0, total-cost: 2.1]
++                  project ([$$351, $$345, $$318, $$343]) [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
+                   -- STREAM_PROJECT  |PARTITIONED|
 -                    assign [$$337, $$331, $$304, $$329] <- 
[string-default-null($$s.getField("id")), $$s.getField("a"), $$s.getField("d"), 
$$s.getField("e")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 2.1]
++                    assign [$$351, $$345, $$318, $$343] <- 
[string-default-null($$s.getField("id")), $$s.getField("a"), $$s.getField("d"), 
$$s.getField("e")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                     -- ASSIGN  |PARTITIONED|
 -                      project ([$$s]) [cardinality: 1000000.0, op-cost: 0.0, 
total-cost: 2.1]
++                      project ([$$s]) [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
+                       -- STREAM_PROJECT  |PARTITIONED|
 -                        exchange [cardinality: 1000000.0, op-cost: 0.0, 
total-cost: 2.1]
++                        exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 
0.0]
+                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
 -                          data-scan []<-[$$305, $$s] <- test.dat1 
[cardinality: 1000000.0, op-cost: 2.1, total-cost: 2.1]
++                          data-scan []<-[$$319, $$s] <- test.dat1 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                           -- DATASOURCE_SCAN  |PARTITIONED|
+                             exchange [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
+                             -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                               empty-tuple-source [cardinality: 0.0, op-cost: 
0.0, total-cost: 0.0]
+                               -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --cc 
asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.3.plan
index 0000000000,0000000000..dc43c42f98
new file mode 100644
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.3.plan
@@@ -1,0 -1,0 +1,24 @@@
++distribute result [$$311] [cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
++-- DISTRIBUTE_RESULT  |PARTITIONED|
++  exchange [cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
++  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
++    project ([$$311]) [cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
++    -- STREAM_PROJECT  |PARTITIONED|
++      assign [$$311] <- [{"id": $$345}] [cardinality: 8.0, op-cost: 0.0, 
total-cost: 8.0]
++      -- ASSIGN  |PARTITIONED|
++        select (not(is-null($$345))) [cardinality: 8.0, op-cost: 0.0, 
total-cost: 8.0]
++        -- STREAM_SELECT  |PARTITIONED|
++          project ([$$345]) [cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
++          -- STREAM_PROJECT  |PARTITIONED|
++            assign [$$345] <- [string-default-null($$s.getField("id"))] 
[cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
++            -- ASSIGN  |PARTITIONED|
++              project ([$$s]) [cardinality: 8.0, op-cost: 0.0, total-cost: 
8.0]
++              -- STREAM_PROJECT  |PARTITIONED|
++                exchange [cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
++                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
++                  data-scan []<-[$$314, $$s] <- test.dat1 [cardinality: 8.0, 
op-cost: 8.0, total-cost: 8.0]
++                  -- DATASOURCE_SCAN  |PARTITIONED|
++                    exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
++                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
++                      empty-tuple-source [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
++                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --cc 
asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.5.plan
index 0000000000,0000000000..b9dda1938f
new file mode 100644
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/subquery/query-ASTERIXDB-3538/query-ASTERIXDB-3538.5.plan
@@@ -1,0 -1,0 +1,41 @@@
++distribute result [$$317] [cardinality: 4.0, op-cost: 0.0, total-cost: 8.0]
++-- DISTRIBUTE_RESULT  |PARTITIONED|
++  exchange [cardinality: 4.0, op-cost: 0.0, total-cost: 8.0]
++  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
++    project ([$$317]) [cardinality: 4.0, op-cost: 0.0, total-cost: 8.0]
++    -- STREAM_PROJECT  |PARTITIONED|
++      assign [$$317] <- [{"dat3": {"id": $$351, "a": 
string-default-null($$345), "d": int64-default-null($$318)}}] [cardinality: 
4.0, op-cost: 0.0, total-cost: 8.0]
++      -- ASSIGN  |PARTITIONED|
++        project ([$$351, $$345, $$318]) [cardinality: 4.0, op-cost: 0.0, 
total-cost: 8.0]
++        -- STREAM_PROJECT  |PARTITIONED|
++          select (le($$318, get-item($$295, 0))) [cardinality: 4.0, op-cost: 
0.0, total-cost: 8.0]
++          -- STREAM_SELECT  |PARTITIONED|
++            project ([$$351, $$345, $$318, $$295]) [cardinality: 8.0, 
op-cost: 0.0, total-cost: 8.0]
++            -- STREAM_PROJECT  |PARTITIONED|
++              subplan {
++                        aggregate [$$295] <- [listify($$330)] [cardinality: 
0.0, op-cost: 0.0, total-cost: 0.0]
++                        -- AGGREGATE  |LOCAL|
++                          aggregate [$$330] <- [agg-sql-count(1)] 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
++                          -- AGGREGATE  |LOCAL|
++                            unnest $$333 <- scan-collection($$343) 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
++                            -- UNNEST  |LOCAL|
++                              nested tuple source [cardinality: 0.0, op-cost: 
0.0, total-cost: 0.0]
++                              -- NESTED_TUPLE_SOURCE  |LOCAL|
++                     } [cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
++              -- SUBPLAN  |PARTITIONED|
++                select (not(is-null($$351))) [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
++                -- STREAM_SELECT  |PARTITIONED|
++                  project ([$$351, $$345, $$318, $$343]) [cardinality: 8.0, 
op-cost: 0.0, total-cost: 8.0]
++                  -- STREAM_PROJECT  |PARTITIONED|
++                    assign [$$351, $$345, $$318, $$343] <- 
[string-default-null($$s.getField("id")), $$s.getField("a"), $$s.getField("d"), 
$$s.getField("e")] [cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
++                    -- ASSIGN  |PARTITIONED|
++                      project ([$$s]) [cardinality: 8.0, op-cost: 0.0, 
total-cost: 8.0]
++                      -- STREAM_PROJECT  |PARTITIONED|
++                        exchange [cardinality: 8.0, op-cost: 0.0, total-cost: 
8.0]
++                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
++                          data-scan []<-[$$319, $$s] <- test.dat1 
[cardinality: 8.0, op-cost: 8.0, total-cost: 8.0]
++                          -- DATASOURCE_SCAN  |PARTITIONED|
++                            exchange [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
++                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
++                              empty-tuple-source [cardinality: 0.0, op-cost: 
0.0, total-cost: 0.0]
++                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|


Reply via email to