(datafusion-comet) branch main updated: build: Add Spark SQL test pipeline with ANSI mode enabled (#321)

viirya Fri, 03 May 2024 09:36:20 -0700

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git



The following commit(s) were added to refs/heads/main by this push:
     new e48b12ea build: Add Spark SQL test pipeline with ANSI mode enabled 
(#321)
e48b12ea is described below

commit e48b12ead988bdb842728881181d49eca399982a
Author: Parth Chandra <[email protected]>
AuthorDate: Fri May 3 09:36:11 2024 -0700

    build: Add Spark SQL test pipeline with ANSI mode enabled (#321)
    
    * build: Add Spark SQL test pipeline with ANSI mode enabled
    
    * add ENABLE_COMET_ANSI_MODE to actual run of tests
    
    * fix diff and rat exclusion
    
    * fix diff
    
    * Make workflow manual run only
    
    * fix diff
---
 .github/workflows/spark_sql_test_ansi.yml | 81 +++++++++++++++++++++++++++++++
 dev/diffs/3.4.2.diff                      | 40 ++++++++++++---
 pom.xml                                   |  2 +-
 3 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/spark_sql_test_ansi.yml 
b/.github/workflows/spark_sql_test_ansi.yml
new file mode 100644
index 00000000..5c5d2858
--- /dev/null
+++ b/.github/workflows/spark_sql_test_ansi.yml
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Spark SQL Tests (ANSI mode)
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ 
github.workflow }}
+  cancel-in-progress: true
+
+on:
+  # enable the following once Ansi support is completed
+  #  push:
+  #    paths-ignore:
+  #      - "doc/**"
+  #      - "**.md"
+  #  pull_request:
+  #    paths-ignore:
+  #      - "doc/**"
+  #      - "**.md"
+
+  # manual trigger ONLY
+  # 
https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
+  workflow_dispatch:
+
+env:
+  RUST_VERSION: nightly
+
+jobs:
+  spark-sql-catalyst:
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        java-version: [11]
+        spark-version: [{short: '3.4', full: '3.4.2'}]
+        module:
+          - {name: "catalyst", args1: "catalyst/test", args2: ""}
+          - {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l 
org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
+          - {name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n 
org.apache.spark.tags.ExtendedSQLTest"}
+          - {name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n 
org.apache.spark.tags.SlowSQLTest"}
+          - {name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l 
org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
+          - {name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n 
org.apache.spark.tags.ExtendedHiveTest"}
+          - {name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n 
org.apache.spark.tags.SlowHiveTest"}
+      fail-fast: false
+    name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ 
matrix.spark-version.full }}/java-${{ matrix.java-version }}
+    runs-on: ${{ matrix.os }}
+    container:
+      image: amd64/rust
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Rust & Java toolchain
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{env.RUST_VERSION}}
+          jdk-version: ${{ matrix.java-version }}
+      - name: Setup Spark
+        uses: ./.github/actions/setup-spark-builder
+        with:
+          spark-version: ${{ matrix.spark-version.full }}
+          spark-short-version: ${{ matrix.spark-version.short }}
+          comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml
+      - name: Run Spark tests
+        run: |
+          cd apache-spark
+          ENABLE_COMET=true ENABLE_COMET_ANSI_MODE=true build/sbt ${{ 
matrix.module.args1 }} "${{ matrix.module.args2 }}"
+        env:
+          LC_ALL: "C.UTF-8"
+
diff --git a/dev/diffs/3.4.2.diff b/dev/diffs/3.4.2.diff
index 7c7323d3..4154a705 100644
--- a/dev/diffs/3.4.2.diff
+++ b/dev/diffs/3.4.2.diff
@@ -1327,7 +1327,7 @@ index abe606ad9c1..2d930b64cca 100644
      val tblTargetName = "tbl_target"
      val tblSourceQualified = s"default.$tblSourceName"
 diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
-index dd55fcfe42c..cc18147d17a 100644
+index dd55fcfe42c..b4776c50e49 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
 @@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -1351,7 +1351,7 @@ index dd55fcfe42c..cc18147d17a 100644
      }
    }
  
-@@ -242,6 +247,23 @@ private[sql] trait SQLTestUtilsBase
+@@ -242,6 +247,32 @@ private[sql] trait SQLTestUtilsBase
      protected override def _sqlContext: SQLContext = self.spark.sqlContext
    }
  
@@ -1371,11 +1371,20 @@ index dd55fcfe42c..cc18147d17a 100644
 +    val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
 +    v != null && v.toBoolean
 +  }
++
++  /**
++   * Whether to enable ansi mode This is only effective when
++   * [[isCometEnabled]] returns true.
++   */
++  protected def enableCometAnsiMode: Boolean = {
++    val v = System.getenv("ENABLE_COMET_ANSI_MODE")
++    v != null && v.toBoolean
++  }
 +
    protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): 
Unit = {
      SparkSession.setActiveSession(spark)
      super.withSQLConf(pairs: _*)(f)
-@@ -434,6 +456,8 @@ private[sql] trait SQLTestUtilsBase
+@@ -434,6 +465,8 @@ private[sql] trait SQLTestUtilsBase
      val schema = df.schema
      val withoutFilters = df.queryExecution.executedPlan.transform {
        case FilterExec(_, child) => child
@@ -1385,10 +1394,10 @@ index dd55fcfe42c..cc18147d17a 100644
  
      spark.internalCreateDataFrame(withoutFilters.execute(), schema)
 diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-index ed2e309fa07..4cfe0093da7 100644
+index ed2e309fa07..f64cc283903 100644
 --- 
a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
 +++ 
b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-@@ -74,6 +74,21 @@ trait SharedSparkSessionBase
+@@ -74,6 +74,28 @@ trait SharedSparkSessionBase
        // this rule may potentially block testing of other optimization rules 
such as
        // ConstantPropagation etc.
        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, 
ConvertToLocalRelation.ruleName)
@@ -1406,6 +1415,13 @@ index ed2e309fa07..4cfe0093da7 100644
 +            
"org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
 +          .set("spark.comet.exec.shuffle.enabled", "true")
 +      }
++
++      if (enableCometAnsiMode) {
++        conf
++          .set("spark.sql.ansi.enabled", "true")
++          .set("spark.comet.ansi.enabled", "true")
++      }
++
 +    }
      conf.set(
        StaticSQLConf.WAREHOUSE_PATH,
@@ -1447,10 +1463,10 @@ index 1966e1e64fd..cde97a0aafe 100644
        spark.sql(
          """
 diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
-index 07361cfdce9..c5d94c92e32 100644
+index 07361cfdce9..1763168a808 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
 +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
-@@ -55,25 +55,46 @@ object TestHive
+@@ -55,25 +55,54 @@ object TestHive
      new SparkContext(
        System.getProperty("spark.sql.test.master", "local[1]"),
        "TestSQLContext",
@@ -1507,8 +1523,16 @@ index 07361cfdce9..c5d94c92e32 100644
 +                
"org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
 +              .set("spark.comet.exec.shuffle.enabled", "true")
 +          }
-+        }
++
++          val a = System.getenv("ENABLE_COMET_ANSI_MODE")
++          if (a != null && a.toBoolean) {
++            conf
++              .set("spark.sql.ansi.enabled", "true")
++              .set("spark.comet.ansi.enabled", "true")
++          }
  
++        }
++
 +        conf
 +      }
 +    ))
diff --git a/pom.xml b/pom.xml
index 6d28c816..d47953fa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -886,7 +886,7 @@ under the License.
             <exclude>rust-toolchain</exclude>
             <exclude>Makefile</exclude>
             <exclude>dev/Dockerfile*</exclude>
-            <exclude>dev/diff/**</exclude>
+            <exclude>dev/diffs/**</exclude>
             <exclude>dev/deploy-file</exclude>
             <exclude>**/test/resources/**</exclude>
             <exclude>**/benchmarks/*.txt</exclude>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-comet) branch main updated: build: Add Spark SQL test pipeline with ANSI mode enabled (#321)

Reply via email to