This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 18f750a6be57041bc2fa608652cc98c71c94efc7
Author: Wail Alkowaileet <[email protected]>
AuthorDate: Tue Dec 5 12:49:14 2023 -0800

    [ASTERIXDB-3328][COMP] Preserve x-products of external datasets
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Cross-products of external datasets are eliminated
    (incorrectly) by the compiler
    
    Change-Id: I75fa9893a58daddb84c82bcb4b02e3cebc03e67b
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17998
    Integration-Tests: Jenkins <[email protected]>
    Reviewed-by: Wail Alkowaileet <[email protected]>
    Reviewed-by: Ali Alsuliman <[email protected]>
    Tested-by: Jenkins <[email protected]>
---
 .../rules/RemoveUnusedOneToOneEquiJoinRule.java    | 11 ++++--
 .../queries/external-cross-product.sqlpp           | 34 ++++++++++++++++
 .../results/external-cross-product.plan            | 45 ++++++++++++++++++++++
 3 files changed, 87 insertions(+), 3 deletions(-)

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveUnusedOneToOneEquiJoinRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveUnusedOneToOneEquiJoinRule.java
index 61361f6e10..265791e3b5 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveUnusedOneToOneEquiJoinRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveUnusedOneToOneEquiJoinRule.java
@@ -24,6 +24,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.asterix.common.config.DatasetConfig;
 import org.apache.asterix.metadata.declared.DataSource;
 import org.apache.asterix.metadata.declared.DatasetDataSource;
 import org.apache.asterix.metadata.entities.Dataset;
@@ -165,10 +166,14 @@ public class RemoveUnusedOneToOneEquiJoinRule implements 
IAlgebraicRewriteRule {
         // Check that all datascans scan the same dataset, and that the join 
condition
         // only used primary key variables of those datascans.
         for (int i = 0; i < dataScans.size(); i++) {
+            DatasetDataSource currentDataSource = (DatasetDataSource) 
dataScans.get(i).getDataSource();
+            if (currentDataSource.getDataset().getDatasetType() == 
DatasetConfig.DatasetType.EXTERNAL) {
+                // The PK condition is not satisfied when external datasets 
are involved (no PKs)
+                return -1;
+            }
             if (i > 0) {
-                DatasetDataSource prevAqlDataSource = (DatasetDataSource) 
dataScans.get(i - 1).getDataSource();
-                DatasetDataSource currAqlDataSource = (DatasetDataSource) 
dataScans.get(i).getDataSource();
-                if 
(!prevAqlDataSource.getDataset().equals(currAqlDataSource.getDataset())) {
+                DatasetDataSource previousDataSource = (DatasetDataSource) 
dataScans.get(i - 1).getDataSource();
+                if 
(!previousDataSource.getDataset().equals(currentDataSource.getDataset())) {
                     return -1;
                 }
             }
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/external-cross-product.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/external-cross-product.sqlpp
new file mode 100644
index 0000000000..ec96012755
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/external-cross-product.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE OpenType AS {
+};
+
+CREATE EXTERNAL DATASET Orders(OpenType) USING localfs
+(
+   ("path"="asterix_nc1://data/json/double-150-11.json"),
+   ("format"="json")
+);
+
+SELECT COUNT(*)
+FROM Orders o1, Orders o2, Orders o3;
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/external-cross-product.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/external-cross-product.plan
new file mode 100644
index 0000000000..fd95cc163c
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/external-cross-product.plan
@@ -0,0 +1,45 @@
+-- DISTRIBUTE_RESULT  |UNPARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
+    -- STREAM_PROJECT  |UNPARTITIONED|
+      -- ASSIGN  |UNPARTITIONED|
+        -- AGGREGATE  |UNPARTITIONED|
+          -- RANDOM_MERGE_EXCHANGE  |PARTITIONED|
+            -- AGGREGATE  |PARTITIONED|
+              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                -- NESTED_LOOP  |PARTITIONED|
+                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                    -- NESTED_LOOP  |PARTITIONED|
+                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                        -- STREAM_PROJECT  |PARTITIONED|
+                          -- ASSIGN  |PARTITIONED|
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              -- REPLICATE  |PARTITIONED|
+                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                  -- STREAM_PROJECT  |PARTITIONED|
+                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                      -- DATASOURCE_SCAN (test.Orders)  
|PARTITIONED|
+                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                        -- REPLICATE  |PARTITIONED|
+                          -- BROADCAST_EXCHANGE  |PARTITIONED|
+                            -- REPLICATE  |PARTITIONED|
+                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                -- STREAM_PROJECT  |PARTITIONED|
+                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                    -- DATASOURCE_SCAN (test.Orders)  
|PARTITIONED|
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                    -- STREAM_PROJECT  |PARTITIONED|
+                      -- ASSIGN  |PARTITIONED|
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          -- REPLICATE  |PARTITIONED|
+                            -- BROADCAST_EXCHANGE  |PARTITIONED|
+                              -- REPLICATE  |PARTITIONED|
+                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                  -- STREAM_PROJECT  |PARTITIONED|
+                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                      -- DATASOURCE_SCAN (test.Orders)  
|PARTITIONED|
+                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|

Reply via email to