This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 18f750a6be57041bc2fa608652cc98c71c94efc7 Author: Wail Alkowaileet <[email protected]> AuthorDate: Tue Dec 5 12:49:14 2023 -0800 [ASTERIXDB-3328][COMP] Preserve x-products of external datasets - user model changes: no - storage format changes: no - interface changes: no Details: Cross-products of external datasets are eliminated (incorrectly) by the compiler Change-Id: I75fa9893a58daddb84c82bcb4b02e3cebc03e67b Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17998 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Wail Alkowaileet <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Tested-by: Jenkins <[email protected]> --- .../rules/RemoveUnusedOneToOneEquiJoinRule.java | 11 ++++-- .../queries/external-cross-product.sqlpp | 34 ++++++++++++++++ .../results/external-cross-product.plan | 45 ++++++++++++++++++++++ 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveUnusedOneToOneEquiJoinRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveUnusedOneToOneEquiJoinRule.java index 61361f6e10..265791e3b5 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveUnusedOneToOneEquiJoinRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveUnusedOneToOneEquiJoinRule.java @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import org.apache.asterix.common.config.DatasetConfig; import org.apache.asterix.metadata.declared.DataSource; import org.apache.asterix.metadata.declared.DatasetDataSource; import org.apache.asterix.metadata.entities.Dataset; @@ -165,10 +166,14 @@ public class RemoveUnusedOneToOneEquiJoinRule implements IAlgebraicRewriteRule { // Check that all datascans scan the same dataset, and that the join condition // only used primary key variables of those datascans. for (int i = 0; i < dataScans.size(); i++) { + DatasetDataSource currentDataSource = (DatasetDataSource) dataScans.get(i).getDataSource(); + if (currentDataSource.getDataset().getDatasetType() == DatasetConfig.DatasetType.EXTERNAL) { + // The PK condition is not satisfied when external datasets are involved (no PKs) + return -1; + } if (i > 0) { - DatasetDataSource prevAqlDataSource = (DatasetDataSource) dataScans.get(i - 1).getDataSource(); - DatasetDataSource currAqlDataSource = (DatasetDataSource) dataScans.get(i).getDataSource(); - if (!prevAqlDataSource.getDataset().equals(currAqlDataSource.getDataset())) { + DatasetDataSource previousDataSource = (DatasetDataSource) dataScans.get(i - 1).getDataSource(); + if (!previousDataSource.getDataset().equals(currentDataSource.getDataset())) { return -1; } } diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/external-cross-product.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/external-cross-product.sqlpp new file mode 100644 index 0000000000..ec96012755 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/external-cross-product.sqlpp @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; +USE test; + +CREATE TYPE OpenType AS { +}; + +CREATE EXTERNAL DATASET Orders(OpenType) USING localfs +( + ("path"="asterix_nc1://data/json/double-150-11.json"), + ("format"="json") +); + +SELECT COUNT(*) +FROM Orders o1, Orders o2, Orders o3; diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/external-cross-product.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/external-cross-product.plan new file mode 100644 index 0000000000..fd95cc163c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/external-cross-product.plan @@ -0,0 +1,45 @@ +-- DISTRIBUTE_RESULT |UNPARTITIONED| + -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| + -- STREAM_PROJECT |UNPARTITIONED| + -- ASSIGN |UNPARTITIONED| + -- AGGREGATE |UNPARTITIONED| + -- RANDOM_MERGE_EXCHANGE |PARTITIONED| + -- AGGREGATE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- NESTED_LOOP |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- NESTED_LOOP |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- REPLICATE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (test.Orders) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- REPLICATE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- REPLICATE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (test.Orders) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- REPLICATE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- REPLICATE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (test.Orders) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED|
