This is an automated email from the ASF dual-hosted git repository. alsuliman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit ba91b1cdec9e8ed9fa8c5e5cc8502c987abe2bad Author: Ali Alsuliman <[email protected]> AuthorDate: Wed Feb 22 21:33:55 2023 -0800 [ASTERIXDB-3117][EXT] Allow specifying a subpath for EXTERNAL datasets queries - user model changes: no - storage format changes: no - interface changes: yes Details: This patch is to allow users to specify a subpath in queries scanning external data sources like S3. - add "subpath" hint to allow specifying a subpath to be used in conjunction with the "definition" of external datasets. - capture the "subpath" hint for each term in the FROM clause. - pass the hint to the properties of IDataSource and expose them. - Op Isomorphism: compare IDataSource properties only for EXTERNAL datasets. Change-Id: I3d1ec61fd8aa3275260c1aed6d00fa3c7b408351 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17395 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> --- .../SqlppExpressionToPlanTranslator.java | 11 ++++ .../asterix/app/function/DatasetRewriter.java | 13 +++++ .../json/json/external_dataset.000.ddl.sqlpp | 23 ++++++++ .../json/json/external_dataset.008.query.sqlpp | 24 ++------- .../json/json/external_dataset.009.query.sqlpp | 24 ++------- .../json/json/external_dataset.010.query.sqlpp | 24 ++------- .../json/json/external_dataset.011.query.sqlpp | 26 ++------- .../json/json/external_dataset.012.query.sqlpp | 26 ++------- .../json/json/external_dataset.013.query.sqlpp | 26 ++------- .../json/json/external_dataset.014.query.sqlpp | 26 ++------- .../common/json/json/external_dataset.008.adm | 25 +++++++++ .../common/json/json/external_dataset.009.adm | 25 +++++++++ .../common/json/json/external_dataset.010.adm | 1 + .../common/json/json/external_dataset.011.plan | 48 +++++++++++++++++ .../common/json/json/external_dataset.012.adm | 1 + .../common/json/json/external_dataset.013.plan | 62 ++++++++++++++++++++++ .../common/json/json/external_dataset.014.adm | 1 + .../annotations/ExternalSubpathAnnotation.java | 26 ++++----- .../external/util/ExternalDataConstants.java | 1 + .../asterix/external/util/ExternalDataUtils.java | 22 +++++++- .../asterix/lang/sqlpp/parser/SqlppHint.java | 1 + .../visitor/VariableCheckAndRewriteVisitor.java | 1 + .../asterix-lang-sqlpp/src/main/javacc/SQLPP.jj | 23 +++++++- .../metadata/declared/DatasetDataSource.java | 20 ++++++- .../core/algebra/metadata/IDataSource.java | 8 +++ .../visitors/IsomorphismOperatorVisitor.java | 7 +++ 26 files changed, 327 insertions(+), 168 deletions(-) diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/SqlppExpressionToPlanTranslator.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/SqlppExpressionToPlanTranslator.java index 3c4537f98d..ddabaa0934 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/SqlppExpressionToPlanTranslator.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/SqlppExpressionToPlanTranslator.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.translator; +import static org.apache.asterix.external.util.ExternalDataConstants.SUBPATH; + import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -28,6 +30,7 @@ import java.util.Set; import java.util.function.Predicate; import org.apache.asterix.algebra.base.ILangExpressionToPlanTranslator; +import org.apache.asterix.common.annotations.ExternalSubpathAnnotation; import org.apache.asterix.common.exceptions.CompilationException; import org.apache.asterix.common.exceptions.ErrorCode; import org.apache.asterix.common.functions.FunctionSignature; @@ -325,6 +328,10 @@ public class SqlppExpressionToPlanTranslator extends LangExpressionToPlanTransla } else { unnestOp = new UnnestOperator(fromVar, new MutableObject<>(pUnnestExpr.first)); } + ExternalSubpathAnnotation hint = ((AbstractExpression) fromExpr).findHint(ExternalSubpathAnnotation.class); + if (hint != null) { + unnestOp.getAnnotations().put(SUBPATH, hint.getSubPath()); + } unnestOp.getInputs().add(pUnnestExpr.second); unnestOp.setSourceLocation(sourceLoc); @@ -576,6 +583,10 @@ public class SqlppExpressionToPlanTranslator extends LangExpressionToPlanTransla outerUnnestMissingValue) : new UnnestOperator(rightVar, new MutableObject<>(pUnnestExpr.first)); } + ExternalSubpathAnnotation hint = ((AbstractExpression) rightExpr).findHint(ExternalSubpathAnnotation.class); + if (hint != null) { + unnestOp.getAnnotations().put(SUBPATH, hint.getSubPath()); + } unnestOp.getInputs().add(pUnnestExpr.second); unnestOp.setSourceLocation(binaryCorrelate.getRightVariable().getSourceLocation()); return new Pair<>(unnestOp, rightVar); diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/DatasetRewriter.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/DatasetRewriter.java index b1c2f0a956..68edc0ddf6 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/DatasetRewriter.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/DatasetRewriter.java @@ -21,10 +21,14 @@ package org.apache.asterix.app.function; import static org.apache.asterix.common.api.IIdentifierMapper.Modifier.PLURAL; import static org.apache.asterix.common.api.IIdentifierMapper.Modifier.SINGULAR; import static org.apache.asterix.common.utils.IdentifierUtil.dataset; +import static org.apache.asterix.external.util.ExternalDataConstants.SUBPATH; +import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import org.apache.asterix.common.config.DatasetConfig; import org.apache.asterix.common.exceptions.CompilationException; import org.apache.asterix.common.exceptions.ErrorCode; import org.apache.asterix.common.metadata.DatasetFullyQualifiedName; @@ -102,6 +106,15 @@ public class DatasetRewriter implements IFunctionToDataSourceRewriter, IResultTy } DataSourceScanOperator scan = new DataSourceScanOperator(variables, dataSource); scan.setSourceLocation(unnest.getSourceLocation()); + if (dataset.getDatasetType() == DatasetConfig.DatasetType.EXTERNAL) { + Map<String, Object> unnestAnnotations = unnest.getAnnotations(); + scan.getAnnotations().putAll(unnestAnnotations); + Map<String, Serializable> dataSourceProperties = dataSource.getProperties(); + Object externalSubpath = unnestAnnotations.get(SUBPATH); + if (externalSubpath instanceof String) { + dataSourceProperties.put(SUBPATH, (String) externalSubpath); + } + } List<Mutable<ILogicalOperator>> scanInpList = scan.getInputs(); scanInpList.addAll(unnest.getInputs()); opRef.setValue(scan); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.000.ddl.sqlpp index f203da0101..1538bf9052 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.000.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.000.ddl.sqlpp @@ -63,4 +63,27 @@ CREATE EXTERNAL DATASET test5(test) USING %adapter% ( ("container"="playground"), ("definition"="json-data/single-line/json-array-of-objects"), ("format"="json") +); + +drop dataset test6 if exists; +CREATE EXTERNAL DATASET test6(test) USING %adapter% ( +%template%, +("container"="playground"), +("definition"="json-data/reviews"), +("format"="json") +); + +drop dataset test7 if exists; +CREATE EXTERNAL DATASET test7(test) USING %adapter% ( +%template%, +("container"="playground"), +("format"="json") +); + +drop dataset test8 if exists; +CREATE EXTERNAL DATASET test8(test) USING %adapter% ( +%template%, +("container"="playground"), +("definition"="json-data"), +("format"="json") ); \ No newline at end of file diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.008.query.sqlpp similarity index 52% copy from hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.008.query.sqlpp index e15b699b2b..1b265f1dce 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.008.query.sqlpp @@ -16,25 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.hyracks.algebricks.core.algebra.metadata; -import java.util.List; - -import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; -import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain; - -public interface IDataSource<T> { - public T getId(); - - public Object[] getSchemaTypes(); - - public IDataSourcePropertiesProvider getPropertiesProvider(); - - public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList); - - // https://issues.apache.org/jira/browse/ASTERIXDB-1619 - public boolean isScanAccessPathALeaf(); - - public INodeDomain getDomain(); -} +use test; +// testing subpath hint. the result should be = scanning test3 +select value test6 from /*+ subpath /multi-lines-with-arrays/json */ test6 order by id; \ No newline at end of file diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.009.query.sqlpp similarity index 52% copy from hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.009.query.sqlpp index e15b699b2b..9356366ab7 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.009.query.sqlpp @@ -16,25 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.hyracks.algebricks.core.algebra.metadata; -import java.util.List; - -import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; -import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain; - -public interface IDataSource<T> { - public T getId(); - - public Object[] getSchemaTypes(); - - public IDataSourcePropertiesProvider getPropertiesProvider(); - - public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList); - - // https://issues.apache.org/jira/browse/ASTERIXDB-1619 - public boolean isScanAccessPathALeaf(); - - public INodeDomain getDomain(); -} +use test; +// testing subpath hint. the result should be = scanning test4 +select value test7 from /*+ subpath json-data/reviews/multi-lines-with-nested-objects/json */ test7 order by id; \ No newline at end of file diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.010.query.sqlpp similarity index 52% copy from hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.010.query.sqlpp index e15b699b2b..8425714602 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.010.query.sqlpp @@ -16,25 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.hyracks.algebricks.core.algebra.metadata; -import java.util.List; - -import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; -import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain; - -public interface IDataSource<T> { - public T getId(); - - public Object[] getSchemaTypes(); - - public IDataSourcePropertiesProvider getPropertiesProvider(); - - public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList); - - // https://issues.apache.org/jira/browse/ASTERIXDB-1619 - public boolean isScanAccessPathALeaf(); - - public INodeDomain getDomain(); -} +use test; +// testing subpath hint. the result should be = scanning test5 +select value count(*) from /*+ subpath single-line/json-array-of-objects */ test8; \ No newline at end of file diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.011.query.sqlpp similarity index 52% copy from hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.011.query.sqlpp index e15b699b2b..d0f08aa748 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.011.query.sqlpp @@ -16,25 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.hyracks.algebricks.core.algebra.metadata; -import java.util.List; - -import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; -import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain; - -public interface IDataSource<T> { - public T getId(); - - public Object[] getSchemaTypes(); - - public IDataSourcePropertiesProvider getPropertiesProvider(); - - public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList); - - // https://issues.apache.org/jira/browse/ASTERIXDB-1619 - public boolean isScanAccessPathALeaf(); - - public INodeDomain getDomain(); -} +use test; +// testing that test6 a and test6 b are two different data sources and hence no replicate should exist in the plan +explain select count(a.quarter) as cnt +from /*+ subpath /multi-lines/json */ test6 a +join test6 b on a.quarter = b.quarter; \ No newline at end of file diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.012.query.sqlpp similarity index 52% copy from hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.012.query.sqlpp index e15b699b2b..3a675ef117 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.012.query.sqlpp @@ -16,25 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.hyracks.algebricks.core.algebra.metadata; -import java.util.List; - -import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; -import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain; - -public interface IDataSource<T> { - public T getId(); - - public Object[] getSchemaTypes(); - - public IDataSourcePropertiesProvider getPropertiesProvider(); - - public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList); - - // https://issues.apache.org/jira/browse/ASTERIXDB-1619 - public boolean isScanAccessPathALeaf(); - - public INodeDomain getDomain(); -} +use test; +// testing that test6 a and test6 b are two different data sources and hence no replicate should exist in the plan +select count(a.quarter) as cnt +from /*+ subpath /multi-lines/json */ test6 a +join test6 b on a.quarter = b.quarter; \ No newline at end of file diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.013.query.sqlpp similarity index 52% copy from hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.013.query.sqlpp index e15b699b2b..e886adb898 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.013.query.sqlpp @@ -16,25 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.hyracks.algebricks.core.algebra.metadata; -import java.util.List; - -import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; -import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain; - -public interface IDataSource<T> { - public T getId(); - - public Object[] getSchemaTypes(); - - public IDataSourcePropertiesProvider getPropertiesProvider(); - - public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList); - - // https://issues.apache.org/jira/browse/ASTERIXDB-1619 - public boolean isScanAccessPathALeaf(); - - public INodeDomain getDomain(); -} +use test; +// testing that test6 a and test6 b are the same data sources and hence replicate should exist in the plan +explain select count(a.quarter) as cnt +from /*+ subpath /multi-lines/json */ test6 a +join /*+ subpath /multi-lines/json */ test6 b on a.quarter = b.quarter; \ No newline at end of file diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.014.query.sqlpp similarity index 52% copy from hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.014.query.sqlpp index e15b699b2b..5b39d4f239 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.014.query.sqlpp @@ -16,25 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.hyracks.algebricks.core.algebra.metadata; -import java.util.List; - -import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; -import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain; - -public interface IDataSource<T> { - public T getId(); - - public Object[] getSchemaTypes(); - - public IDataSourcePropertiesProvider getPropertiesProvider(); - - public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList); - - // https://issues.apache.org/jira/browse/ASTERIXDB-1619 - public boolean isScanAccessPathALeaf(); - - public INodeDomain getDomain(); -} +use test; +// testing that test6 a and test6 b are the same data sources and hence replicate should exist in the plan +select count(a.quarter) as cnt +from /*+ subpath /multi-lines/json */ test6 a +join /*+ subpath /multi-lines/json */ test6 b on a.quarter = b.quarter; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.008.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.008.adm new file mode 100644 index 0000000000..7660e7e5e6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.008.adm @@ -0,0 +1,25 @@ +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.009.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.009.adm new file mode 100644 index 0000000000..764398637a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.009.adm @@ -0,0 +1,25 @@ +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } +{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.010.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.010.adm new file mode 100644 index 0000000000..86babba1b3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.010.adm @@ -0,0 +1 @@ +50128 \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.011.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.011.plan new file mode 100644 index 0000000000..6ec7195319 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.011.plan @@ -0,0 +1,48 @@ +distribute result [$$48] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] +-- DISTRIBUTE_RESULT |UNPARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| + project ([$$48]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- STREAM_PROJECT |UNPARTITIONED| + assign [$$48] <- [{"cnt": $$51}] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- ASSIGN |UNPARTITIONED| + aggregate [$$51] <- [agg-sql-sum($$53)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- AGGREGATE |UNPARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- RANDOM_MERGE_EXCHANGE |PARTITIONED| + aggregate [$$53] <- [agg-sql-count($$49)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- AGGREGATE |PARTITIONED| + project ([$$49]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + join (eq($$49, $$50)) [cardinality: 1000000.0, op-cost: 2000000.0, total-cost: 6000000.0] + -- HYBRID_HASH_JOIN [$$49][$$50] |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- HASH_PARTITION_EXCHANGE [$$49] |PARTITIONED| + project ([$$49]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$49] <- [$$a.getField("quarter")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$a] <- test.test6 [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- HASH_PARTITION_EXCHANGE [$$50] |PARTITIONED| + project ([$$50]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$50] <- [$$b.getField("quarter")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$b] <- test.test6 [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.012.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.012.adm new file mode 100644 index 0000000000..6cd35abbe2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.012.adm @@ -0,0 +1 @@ +{ "cnt": 15600 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.013.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.013.plan new file mode 100644 index 0000000000..22d5bd44f5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.013.plan @@ -0,0 +1,62 @@ +distribute result [$$47] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] +-- DISTRIBUTE_RESULT |UNPARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| + project ([$$47]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- STREAM_PROJECT |UNPARTITIONED| + assign [$$47] <- [{"cnt": $$50}] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- ASSIGN |UNPARTITIONED| + aggregate [$$50] <- [agg-sql-sum($$52)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- AGGREGATE |UNPARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- RANDOM_MERGE_EXCHANGE |PARTITIONED| + aggregate [$$52] <- [agg-sql-count($$48)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- AGGREGATE |PARTITIONED| + project ([$$48]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + join (eq($$48, $$49)) [cardinality: 1000000.0, op-cost: 2000000.0, total-cost: 6000000.0] + -- HYBRID_HASH_JOIN [$$48][$$49] |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$48]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$48] <- [$$49] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + replicate [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- REPLICATE |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- HASH_PARTITION_EXCHANGE [$$49] |PARTITIONED| + project ([$$49]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$49] <- [$$b.getField("quarter")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$b] <- test.test6 [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + replicate [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- REPLICATE |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- HASH_PARTITION_EXCHANGE [$$49] |PARTITIONED| + project ([$$49]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$49] <- [$$b.getField("quarter")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$b] <- test.test6 [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.014.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.014.adm new file mode 100644 index 0000000000..8006c78129 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.014.adm @@ -0,0 +1 @@ +{ "cnt": 1800 } \ No newline at end of file diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/annotations/ExternalSubpathAnnotation.java similarity index 53% copy from hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java copy to asterixdb/asterix-common/src/main/java/org/apache/asterix/common/annotations/ExternalSubpathAnnotation.java index e15b699b2b..e3e5ea3d30 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/annotations/ExternalSubpathAnnotation.java @@ -16,25 +16,19 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.hyracks.algebricks.core.algebra.metadata; +package org.apache.asterix.common.annotations; -import java.util.List; +import org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation; -import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; -import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain; +public final class ExternalSubpathAnnotation implements IExpressionAnnotation { -public interface IDataSource<T> { - public T getId(); + private final String subPath; - public Object[] getSchemaTypes(); + public ExternalSubpathAnnotation(String subPath) { + this.subPath = subPath == null ? "" : subPath.trim(); + } - public IDataSourcePropertiesProvider getPropertiesProvider(); - - public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList); - - // https://issues.apache.org/jira/browse/ASTERIXDB-1619 - public boolean isScanAccessPathALeaf(); - - public INodeDomain getDomain(); + public String getSubPath() { + return subPath; + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java index 2097b4bcff..050a080622 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java @@ -297,6 +297,7 @@ public class ExternalDataConstants { public static final String DEFINITION_FIELD_NAME = "definition"; public static final String CONTAINER_NAME_FIELD_NAME = "container"; + public static final String SUBPATH = "subpath"; public static class ParquetOptions { private ParquetOptions() { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java index ba07629b05..29e04e9904 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java @@ -738,10 +738,28 @@ public class ExternalDataUtils { public static String getPrefix(Map<String, String> configuration, boolean appendSlash) { String definition = configuration.get(ExternalDataConstants.DEFINITION_FIELD_NAME); - if (definition != null && !definition.isEmpty()) { + String subPath = configuration.get(ExternalDataConstants.SUBPATH); + boolean hasDefinition = definition != null && !definition.isEmpty(); + boolean hasSubPath = subPath != null && !subPath.isEmpty(); + if (hasDefinition && !hasSubPath) { return appendSlash ? definition + (!definition.endsWith("/") ? "/" : "") : definition; } - return ""; + String fullPath = ""; + if (hasSubPath) { + if (!hasDefinition) { + fullPath = subPath.startsWith("/") ? subPath.substring(1) : subPath; + } else { + // concatenate definition + subPath: + if (definition.endsWith("/") && subPath.startsWith("/")) { + subPath = subPath.substring(1); + } else if (!definition.endsWith("/") && !subPath.startsWith("/")) { + definition = definition + "/"; + } + fullPath = definition + subPath; + } + fullPath = appendSlash ? fullPath + (!fullPath.endsWith("/") ? "/" : "") : fullPath; + } + return fullPath; } /** diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/parser/SqlppHint.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/parser/SqlppHint.java index f565fcd494..da3504d448 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/parser/SqlppHint.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/parser/SqlppHint.java @@ -49,6 +49,7 @@ public enum SqlppHint { GEN_FIELDS_HINT("gen-fields"), SINGLE_DATASET_PREDICATE_SELECTIVITY_HINT("selectivity"), JOIN_PREDICATE_PRODUCTIVITY_HINT("productivity"), + SUBPATH_HINT("subpath"), // data generator hints DGEN_HINT("dgen"); diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/VariableCheckAndRewriteVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/VariableCheckAndRewriteVisitor.java index 4b0caca142..8fbad4fba3 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/VariableCheckAndRewriteVisitor.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/VariableCheckAndRewriteVisitor.java @@ -203,6 +203,7 @@ public class VariableCheckAndRewriteVisitor extends AbstractSqlppExpressionScopi argList.add(new LiteralExpr(new StringLiteral(datasetName))); } CallExpr callExpr = new CallExpr(new FunctionSignature(BuiltinFunctions.DATASET), argList); + callExpr.addHints(varExpr.getHints()); callExpr.setSourceLocation(sourceLoc); return callExpr; } diff --git a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj index 759fe9043c..e03a3e31bb 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj +++ b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj @@ -52,6 +52,7 @@ import org.apache.asterix.common.annotations.AutoDataGen; import org.apache.asterix.common.annotations.DateBetweenYearsDataGen; import org.apache.asterix.common.annotations.DatetimeAddRandHoursDataGen; import org.apache.asterix.common.annotations.DatetimeBetweenYearsDataGen; +import org.apache.asterix.common.annotations.ExternalSubpathAnnotation; import org.apache.asterix.common.annotations.FieldIntervalDataGen; import org.apache.asterix.common.annotations.FieldValFileDataGen; import org.apache.asterix.common.annotations.FieldValFileSameIndexDataGen; @@ -4982,7 +4983,16 @@ FromTerm FromTerm() throws ParseException : List<AbstractBinaryCorrelateClause> correlateClauses = new ArrayList<AbstractBinaryCorrelateClause>(); } { - leftExpr = Expression() ((<AS>)? leftVar = Variable())? (<AT> posVar = Variable())? + leftExpr = Expression() + { + if (leftExpr.getKind() == Expression.Kind.VARIABLE_EXPRESSION) { + Token hintToken = fetchHint(token, SqlppHint.SUBPATH_HINT); + if (hintToken != null) { + String subPath = hintToken.hintParams; + ((VariableExpr) leftExpr).addHint(new ExternalSubpathAnnotation(subPath)); + } + } + } ((<AS>)? leftVar = Variable())? (<AT> posVar = Variable())? ( ( correlateClause = JoinOrUnnestClause(JoinType.INNER, UnnestType.INNER) @@ -5053,7 +5063,16 @@ Triple<Expression, VariableExpr, VariableExpr> JoinClauseRightInput() throws Par VariableExpr posVar = null; } { - rightExpr = Expression() ((<AS>)? rightVar = Variable())? (<AT> posVar = Variable())? + rightExpr = Expression() + { + if (rightExpr.getKind() == Expression.Kind.VARIABLE_EXPRESSION) { + Token hintToken = fetchHint(token, SqlppHint.SUBPATH_HINT); + if (hintToken != null) { + String subPath = hintToken.hintParams; + ((VariableExpr) rightExpr).addHint(new ExternalSubpathAnnotation(subPath)); + } + } + } ((<AS>)? rightVar = Variable())? (<AT> posVar = Variable())? { if (rightVar == null) { rightVar = ExpressionToVariableUtil.getGeneratedVariable(rightExpr, true); diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/DatasetDataSource.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/DatasetDataSource.java index 661b954813..66ea5a7313 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/DatasetDataSource.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/DatasetDataSource.java @@ -19,8 +19,10 @@ package org.apache.asterix.metadata.declared; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_EXTERNAL_SCAN_BUFFER_SIZE; +import static org.apache.asterix.external.util.ExternalDataConstants.SUBPATH; import java.io.IOException; +import java.io.Serializable; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -120,7 +122,8 @@ public class DatasetDataSource extends DataSource { IProjectionInfo<?> projectionInfo) throws AlgebricksException { switch (dataset.getDatasetType()) { case EXTERNAL: - Dataset externalDataset = ((DatasetDataSource) dataSource).getDataset(); + DatasetDataSource externalDataSource = (DatasetDataSource) dataSource; + Dataset externalDataset = externalDataSource.getDataset(); String itemTypeName = externalDataset.getItemTypeName(); IAType itemType = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), externalDataset.getItemTypeDataverseName(), itemTypeName).getDatatype(); @@ -129,6 +132,7 @@ public class DatasetDataSource extends DataSource { PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig(); int externalScanBufferSize = physicalOptimizationConfig.getExternalScanBufferSize(); Map<String, String> properties = addExternalProjectionInfo(projectionInfo, edd.getProperties()); + properties = addSubPath(externalDataSource.getProperties(), properties); properties.put(KEY_EXTERNAL_SCAN_BUFFER_SIZE, String.valueOf(externalScanBufferSize)); ITypedAdapterFactory adapterFactory = metadataProvider.getConfiguredAdapterFactory(externalDataset, edd.getAdapter(), properties, (ARecordType) itemType, null, context.getWarningCollector()); @@ -168,6 +172,16 @@ public class DatasetDataSource extends DataSource { return propertiesCopy; } + private Map<String, String> addSubPath(Map<String, Serializable> dataSourceProps, Map<String, String> properties) { + Serializable subPath = dataSourceProps.get(SUBPATH); + if (!(subPath instanceof String)) { + return properties; + } + Map<String, String> propertiesCopy = new HashMap<>(properties); + propertiesCopy.put(SUBPATH, (String) subPath); + return propertiesCopy; + } + private int[] createFilterIndexes(List<LogicalVariable> filterVars, IOperatorSchema opSchema) { if (filterVars != null && !filterVars.isEmpty()) { final int size = filterVars.size(); @@ -185,4 +199,8 @@ public class DatasetDataSource extends DataSource { return dataset.getDatasetType() == DatasetType.EXTERNAL; } + @Override + public boolean compareProperties() { + return dataset.getDatasetType() == DatasetType.EXTERNAL; + } } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java index e15b699b2b..8b4e56d3fc 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java @@ -18,7 +18,9 @@ */ package org.apache.hyracks.algebricks.core.algebra.metadata; +import java.io.Serializable; import java.util.List; +import java.util.Map; import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency; @@ -37,4 +39,10 @@ public interface IDataSource<T> { public boolean isScanAccessPathALeaf(); public INodeDomain getDomain(); + + public Map<String, Serializable> getProperties(); + + default boolean compareProperties() { + return false; + } } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java index b44607513d..9e2e87c197 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java @@ -34,6 +34,7 @@ import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan; import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; +import org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator; @@ -476,6 +477,12 @@ public class IsomorphismOperatorVisitor implements ILogicalOperatorVisitor<Boole if (!isomorphic) { return Boolean.FALSE; } + IDataSource<?> dataSource = op.getDataSource(); + IDataSource<?> argDataSource = argScan.getDataSource(); + if (dataSource.compareProperties() && argDataSource.compareProperties() + && !Objects.equals(dataSource.getProperties(), argDataSource.getProperties())) { + return Boolean.FALSE; + } DataSourceScanOperator scanOpArg = (DataSourceScanOperator) copyAndSubstituteVar(op, arg); ILogicalExpression opCondition = op.getSelectCondition() != null ? op.getSelectCondition().getValue() : null; ILogicalExpression argCondition =
