This is an automated email from the ASF dual-hosted git repository. zjffdu pushed a commit to branch branch-0.10 in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/branch-0.10 by this push: new 2128a8d [ZEPPELIN-5565] Support Spark 3.2 2128a8d is described below commit 2128a8d42c21c2c36b1253e552bce1ed290fc43f Author: Jeff Zhang <zjf...@apache.org> AuthorDate: Mon Oct 25 15:53:13 2021 +0800 [ZEPPELIN-5565] Support Spark 3.2 Simple PR to support Spark 3.2. No many code change in Spark module, because Spark 3.2 is compatible with Spark 3.1 which we have already supported. Only netty is excluded in pom.xml because it conflicts with the netty that Spark 3.2 use. This only affect the Spark unit test. Integration test still pass even without these netty change, because unit test use different classpath. [ Improvement ] * [ ] - Task * https://issues.apache.org/jira/browse/ZEPPELIN-5565 * CI pass * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: Jeff Zhang <zjf...@apache.org> Closes #4257 from zjffdu/ZEPPELIN-5565 and squashes the following commits: 9b5b6c42d7 [Jeff Zhang] Update doc 7b8924a850 [Jeff Zhang] Update scala version 1b20f934b1 [Jeff Zhang] [ZEPPELIN-5565] Support Spark 3.2 (cherry picked from commit 2f55fe8ed277b28d71f858633f9c9d76fd18f0c3) Signed-off-by: Jeff Zhang <zjf...@apache.org> --- .github/workflows/core.yml | 43 +++++++++++++++- .github/workflows/frontend.yml | 2 +- docs/setup/basics/how_to_build.md | 4 +- spark/interpreter/pom.xml | 12 +++++ .../org/apache/zeppelin/spark/SparkShimsTest.java | 6 +-- spark/pom.xml | 14 +++++- spark/scala-2.12/pom.xml | 2 +- .../org/apache/zeppelin/spark/SparkVersion.java | 8 ++- .../integration/SparkIntegrationTest32.java | 58 ++++++++++++++++++++++ .../integration/ZeppelinSparkClusterTest32.java | 40 +++++++++++++++ 10 files changed, 174 insertions(+), 15 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index e33fe18..29c2a0b 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -217,7 +217,7 @@ jobs: mvn install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B mvn clean package -pl zeppelin-plugins -amd -DskipTests -B - name: run tests - run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest30,SparkIntegrationTest31 -DfailIfNoTests=false + run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest30,SparkIntegrationTest31,ZeppelinSparkClusterTest32,SparkIntegrationTest3 [...] jdbcIntegrationTest-and-unit-test-of-Spark-2-4-with-Scala-2-11: runs-on: ubuntu-20.04 @@ -377,6 +377,47 @@ jobs: run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B - name: run tests with ${{ matrix.python }} run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false + + spark-3-2-and-scala-2-12-and-other-interpreter: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python: [ 3.7, 3.8 ] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + - name: Set up JDK 8 + uses: actions/setup-java@v2 + with: + distribution: 'adopt' + java-version: 8 + - name: Cache local Maven repository + uses: actions/cache@v2 + with: + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/zeppelin/ + key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-zeppelin- + - name: Setup conda environment with python ${{ matrix.python }} and R + uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: python_3_with_R + environment-file: testing/env_python_${{ matrix.python }}_with_R.yml + python-version: ${{ matrix.python }} + auto-activate-base: false + - name: Make IRkernel available to Jupyter + run: | + R -e "IRkernel::installspec()" + - name: install environment + run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.2 -Pspark-scala-2.12 -Phadoop2 -B + - name: run tests with ${{ matrix.python }} + run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.2 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false + test-livy-0-5-with-spark-2-2-0-under-python3: runs-on: ubuntu-20.04 steps: diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml index 9fb1f8b..5a59dcd 100644 --- a/.github/workflows/frontend.yml +++ b/.github/workflows/frontend.yml @@ -71,7 +71,7 @@ jobs: - name: Run headless test run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" mvn package -DskipRat -pl zeppelin-web-angular -Pweb-angular -B - test-selenium-with-spark-module-for-spark-2-3: + test-selenium-with-spark-module-for-spark-2-4: runs-on: ubuntu-20.04 defaults: run: diff --git a/docs/setup/basics/how_to_build.md b/docs/setup/basics/how_to_build.md index 7f70c33..4e43915 100644 --- a/docs/setup/basics/how_to_build.md +++ b/docs/setup/basics/how_to_build.md @@ -106,6 +106,8 @@ Set spark major version Available profiles are ``` +-Pspark-3.2 +-Pspark-3.1 -Pspark-3.0 -Pspark-2.4 -Pspark-2.3 @@ -120,7 +122,7 @@ minor version can be adjusted by `-Dspark.version=x.x.x` ##### `-Pspark-scala-[version] (optional)` To be noticed, these profiles also only affect the embedded mode (no need to specify `SPARK_HOME`) of Spark interpreter. -Actually Zeppelin supports all the versions of scala (2.10, 2.11, 2.12) in Spark interpreter as long as you specify `SPARK_HOME`. +Actually Zeppelin supports all the versions of scala (2.11, 2.12) in Spark interpreter as long as you specify `SPARK_HOME`. Available profiles are diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml index 4c0b918..4f59e9b 100644 --- a/spark/interpreter/pom.xml +++ b/spark/interpreter/pom.xml @@ -112,6 +112,10 @@ <groupId>net.sf.py4j</groupId> <artifactId>py4j</artifactId> </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>*</artifactId> + </exclusion> </exclusions> </dependency> @@ -126,6 +130,10 @@ <groupId>net.sf.py4j</groupId> <artifactId>py4j</artifactId> </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>*</artifactId> + </exclusion> </exclusions> </dependency> @@ -154,6 +162,10 @@ <groupId>net.sf.py4j</groupId> <artifactId>py4j</artifactId> </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>*</artifactId> + </exclusion> </exclusions> </dependency> diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java index d428502..812a981 100644 --- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java +++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java @@ -131,11 +131,7 @@ public class SparkShimsTest { try { sparkShims = SparkShims.getInstance(SparkVersion.SPARK_2_0_0.toString(), new Properties(), null); } catch (Throwable e2) { - try { - sparkShims = SparkShims.getInstance(SparkVersion.SPARK_1_6_0.toString(), new Properties(), null); - } catch (Throwable e3) { - throw new RuntimeException("All SparkShims are tried, but no one can be created."); - } + throw new RuntimeException("All SparkShims are tried, but no one can be created."); } } } diff --git a/spark/pom.xml b/spark/pom.xml index 05e4bee..31ae659 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -169,7 +169,7 @@ <!-- profile spark-x only affect the embedded spark version in zeppelin distribution --> <profile> - <id>spark-3.1</id> + <id>spark-3.2</id> <activation> <activeByDefault>true</activeByDefault> </activation> @@ -177,6 +177,18 @@ <datanucleus.core.version>4.1.17</datanucleus.core.version> <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version> <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version> + <spark.version>3.2.0</spark.version> + <protobuf.version>2.5.0</protobuf.version> + <py4j.version>0.10.9.2</py4j.version> + </properties> + </profile> + + <profile> + <id>spark-3.1</id> + <properties> + <datanucleus.core.version>4.1.17</datanucleus.core.version> + <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version> + <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version> <spark.version>3.1.2</spark.version> <protobuf.version>2.5.0</protobuf.version> <py4j.version>0.10.9</py4j.version> diff --git a/spark/scala-2.12/pom.xml b/spark/scala-2.12/pom.xml index b841b44..c8bb59c 100644 --- a/spark/scala-2.12/pom.xml +++ b/spark/scala-2.12/pom.xml @@ -32,7 +32,7 @@ <properties> <spark.version>2.4.5</spark.version> - <spark.scala.version>2.12.10</spark.scala.version> + <spark.scala.version>2.12.15</spark.scala.version> <spark.scala.binary.version>2.12</spark.scala.binary.version> <spark.scala.compile.version>${spark.scala.version}</spark.scala.compile.version> </properties> diff --git a/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java b/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java index 0ad1a35..6b8ab37 100644 --- a/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java +++ b/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java @@ -25,18 +25,16 @@ import org.slf4j.LoggerFactory; public class SparkVersion { private static final Logger logger = LoggerFactory.getLogger(SparkVersion.class); - public static final SparkVersion SPARK_1_6_0 = SparkVersion.fromVersionString("1.6.0"); - public static final SparkVersion SPARK_2_0_0 = SparkVersion.fromVersionString("2.0.0"); public static final SparkVersion SPARK_2_2_0 = SparkVersion.fromVersionString("2.2.0"); public static final SparkVersion SPARK_2_3_0 = SparkVersion.fromVersionString("2.3.0"); public static final SparkVersion SPARK_2_3_1 = SparkVersion.fromVersionString("2.3.1"); public static final SparkVersion SPARK_2_4_0 = SparkVersion.fromVersionString("2.4.0"); public static final SparkVersion SPARK_3_1_0 = SparkVersion.fromVersionString("3.1.0"); - public static final SparkVersion SPARK_3_2_0 = SparkVersion.fromVersionString("3.2.0"); + public static final SparkVersion SPARK_3_3_0 = SparkVersion.fromVersionString("3.3.0"); - public static final SparkVersion MIN_SUPPORTED_VERSION = SPARK_1_6_0; - public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_3_2_0; + public static final SparkVersion MIN_SUPPORTED_VERSION = SPARK_2_0_0; + public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_3_3_0; private int version; private int majorVersion; diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest32.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest32.java new file mode 100644 index 0000000..001ec0f --- /dev/null +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest32.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.integration; + +import org.apache.zeppelin.interpreter.InterpreterSetting; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.List; + +@RunWith(value = Parameterized.class) +public class SparkIntegrationTest32 extends SparkIntegrationTest { + + public SparkIntegrationTest32(String sparkVersion, String hadoopVersion) { + super(sparkVersion, hadoopVersion); + } + + @Parameterized.Parameters + public static List<Object[]> data() { + return Arrays.asList(new Object[][]{ + {"3.2.0", "2.7"}, + // TODO(zjffdu) Run integration tests under profile hadoop3 + // {"3.2.0", "3.2"} + }); + } + + @Override + protected void setUpSparkInterpreterSetting(InterpreterSetting interpreterSetting) { + // spark3 doesn't support yarn-client and yarn-cluster anymore, use + // spark.master and spark.submit.deployMode instead + String sparkMaster = interpreterSetting.getJavaProperties().getProperty("spark.master"); + if (sparkMaster.equals("yarn-client")) { + interpreterSetting.setProperty("spark.master", "yarn"); + interpreterSetting.setProperty("spark.submit.deployMode", "client"); + } else if (sparkMaster.equals("yarn-cluster")){ + interpreterSetting.setProperty("spark.master", "yarn"); + interpreterSetting.setProperty("spark.submit.deployMode", "cluster"); + } else if (sparkMaster.startsWith("local")) { + interpreterSetting.setProperty("spark.submit.deployMode", "client"); + } + } +} diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest32.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest32.java new file mode 100644 index 0000000..2b6fa13 --- /dev/null +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest32.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.integration; + +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.List; + +@RunWith(value = Parameterized.class) +public class ZeppelinSparkClusterTest32 extends ZeppelinSparkClusterTest { + + public ZeppelinSparkClusterTest32(String sparkVersion, String hadoopVersion) throws Exception { + super(sparkVersion, hadoopVersion); + } + + @Parameterized.Parameters + public static List<Object[]> data() { + return Arrays.asList(new Object[][]{ + {"3.2.0", "2.7"}, + {"3.2.0", "3.2"} + }); + } +}