(incubator-livy) branch master updated: [LIVY-1011] Upgrade CI image, fix Spark 3 tests

ggal Wed, 02 Apr 2025 04:24:13 -0700

This is an automated email from the ASF dual-hosted git repository.

ggal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git



The following commit(s) were added to refs/heads/master by this push:
     new 826ec291 [LIVY-1011] Upgrade CI image, fix Spark 3 tests
826ec291 is described below

commit 826ec291fa83c26fd715b5f043629af2a48014cb
Author: György Gál <[email protected]>
AuthorDate: Wed Apr 2 13:23:42 2025 +0200

    [LIVY-1011] Upgrade CI image, fix Spark 3 tests
    
    ## What changes were proposed in this pull request?
    
    The CI environment is broken due to library version conflicts between
    the image and the checkout action.
    
    The livy-ci Docker image needs to be upgraded to a more recent version
    of Ubuntu, ensuring that the unit tests and integration tests all pass.
    
    Due to a bug in Livy's GitHub workflows, tests had only been executed
    with Spark 2. Now that this is fixed, we also need to make updates to
    some of the Python-based tests to work with Python 3 because Spark 3.2
    does not support Python 2.
    
    Two R-based integration tests are ignored in the Spark 2 integration
    test runs because SparkR 2 does not work with R 4, and the last version
    of Ubuntu with R 3.6 is Ubuntu 20. If we want to re-enable these tests,
    we may need to build R 3.6 for Ubuntu 24.
    
    ## How was this patch tested?
    
    CI and unit test runs in a private fork of the repo.
---
 .github/workflows/build-ci-image.yaml              |  6 +-
 .github/workflows/integration-tests.yaml           | 11 ++--
 .github/workflows/unit-tests.yaml                  |  4 +-
 dev/docker/livy-dev-base/Dockerfile                | 70 +++++++++++++++++-----
 .../src/test/resources/test_python_api.py          | 12 +++-
 .../test/scala/org/apache/livy/test/BatchIT.scala  |  1 +
 .../scala/org/apache/livy/test/InteractiveIT.scala |  3 +-
 .../apache/livy/repl/PythonInterpreterSpec.scala   | 21 +++----
 8 files changed, 89 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/build-ci-image.yaml 
b/.github/workflows/build-ci-image.yaml
index 17437676..828f0a32 100644
--- a/.github/workflows/build-ci-image.yaml
+++ b/.github/workflows/build-ci-image.yaml
@@ -17,7 +17,7 @@
 name: 'Build CI images'
 on: 
   push:
-    branches: ["main"]
+    branches: ["master"]
     paths:
     - 'dev/docker/livy-dev-base/Dockerfile'
 jobs:
@@ -43,6 +43,6 @@ jobs:
         uses: docker/build-push-action@v4
         with:
           push: true
-          context: ./dev/docker
+          context: ./dev/docker/livy-dev-base
           tags: |
-            ghcr.io/${{ github.repository_owner }}/livy-ci:latest
\ No newline at end of file
+            ghcr.io/${{ github.repository_owner }}/livy-ci:latest
diff --git a/.github/workflows/integration-tests.yaml 
b/.github/workflows/integration-tests.yaml
index 36162dbb..3ba4981f 100644
--- a/.github/workflows/integration-tests.yaml
+++ b/.github/workflows/integration-tests.yaml
@@ -23,14 +23,13 @@ env:
   MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false 
-Dmaven.wagon.http.retryHandler.class=standard 
-Dmaven.wagon.http.retryHandler.count=3
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-24.04
     # TODO: Possibly point to the ./build-ci-image.yaml with the "uses" key
     container: ghcr.io/${{ github.repository_owner }}/livy-ci:latest
     strategy:
       matrix:
         maven_profile:
-        - "-Pscala-2.11 -Pspark2"
-        - "-Pscala-2.12 -Pspark2"
+        - "-Pscala-2.11 -Pspark2 -DskipRTests"
         - "-Pscala-2.12 -Pspark3"
     steps:
     - 
@@ -46,9 +45,13 @@ jobs:
         key: ${{ runner.os }}-maven-${{ hashFiles('pom.xml', '*/pom.xml', 
'thriftserver/*/pom.xml', 'core/*/pom.xml', 'repl/*/pom.xml', 
'scala-api/*/pom.xml') }}
         restore-keys: |
           ${{ runner.os }}-maven-
+    -
+      name: Set Python 3 as default for Spark 3 builds
+      if: ${{ contains(matrix.maven_profile, 'spark3') }}
+      run: pyenv global 3
     - 
       name: Build with Maven
-      run: mvn -Pthriftserver ${{ matrix.mvn_profile }} -DskipTests 
-Dmaven.javadoc.skip=true -B -V -e verify
+      run: mvn -Pthriftserver ${{ matrix.maven_profile }} -DskipTests 
-Dmaven.javadoc.skip=true -B -V -e verify
     -
       name: Upload coverage to codecov
       uses: codecov/codecov-action@v3
diff --git a/.github/workflows/unit-tests.yaml 
b/.github/workflows/unit-tests.yaml
index f3b10cc7..afddbf5d 100644
--- a/.github/workflows/unit-tests.yaml
+++ b/.github/workflows/unit-tests.yaml
@@ -21,7 +21,7 @@ env:
   MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false 
-Dmaven.wagon.http.retryHandler.class=standard 
-Dmaven.wagon.http.retryHandler.count=3
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-24.04
     # TODO: Possibly point to the ./build-ci-image.yaml with the "uses" key
     container: ghcr.io/${{ github.repository_owner }}/livy-ci:latest
     strategy:
@@ -46,7 +46,7 @@ jobs:
           ${{ runner.os }}-maven-
     - 
       name: Build with Maven
-      run: mvn -Pthriftserver ${{ matrix.mvn_profile }} -DskipITs 
-Dmaven.javadoc.skip=true -B -V -e verify
+      run: mvn -Pthriftserver ${{ matrix.maven_profile }} -DskipITs 
-Dmaven.javadoc.skip=true -B -V -e verify
     -
       name: Upload coverage to codecov
       uses: codecov/codecov-action@v3
diff --git a/dev/docker/livy-dev-base/Dockerfile 
b/dev/docker/livy-dev-base/Dockerfile
index 8d4f101e..25da0872 100644
--- a/dev/docker/livy-dev-base/Dockerfile
+++ b/dev/docker/livy-dev-base/Dockerfile
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-FROM ubuntu:xenial
+FROM ubuntu:noble
 
 # configure locale
 RUN apt-get update -qq > /dev/null && apt-get install -qq --yes 
--no-install-recommends \
@@ -23,7 +23,9 @@ RUN apt-get update -qq > /dev/null && apt-get install -qq 
--yes --no-install-rec
     locale-gen en_US.UTF-8
 ENV LANG="en_US.UTF-8" \
     LANGUAGE="en_US.UTF-8" \
-    LC_ALL="en_US.UTF-8"
+    LC_ALL="en_US.UTF-8" \
+    TZ=US \
+    DEBIAN_FRONTEND=noninteractive
 
 # Install necessary dependencies for build/test/debug
 # Use `lsof -i -P -n` to find open ports
@@ -32,31 +34,54 @@ RUN apt-get install -qq \
     curl \
     git \
     libkrb5-dev \
+    openjdk-8-jdk-headless \
+    r-base \
     maven \
-    openjdk-8-jdk \
-    python-dev \
-    python-pip \
-    python3-pip \
     software-properties-common \
     vim \
     wget \
     telnet \
     lsof
 
-# R 3.x install - ensure to add the signing key per 
https://cran.r-project.org/bin/linux/ubuntu/olderreleasesREADME.html
-RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu 
xenial-cran35/' && \
-    apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 
E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
-    apt-get update && \
-    apt-get -qq install r-base
+# This fixes integration tests
+# If setsid is available, signals are sent to containers in MiniYarnCluster 
using negative pids, however
+# in the Docker container this results in a kill(0) system call which triggers 
an ExitCodeException in
+# the kill function that breaks test execution. If setsid is removed, pids < 0 
are not used.
+# See 
https://github.com/apache/hadoop/blob/rel/release-2.7.3/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java#L238
+RUN rm /usr/bin/setsid
+
+# python build
+RUN apt-get install -y \
+  build-essential \
+  libbz2-dev \
+  libffi-dev \
+  liblzma-dev \
+  libncurses-dev \
+  libreadline-dev \
+  libsqlite3-dev \
+  libssl-dev \
+  zlib1g-dev
+
+ENV HOME=/root
+
+RUN git clone https://github.com/pyenv/pyenv.git $HOME/pyenv
+
+ENV PYENV_ROOT=$HOME/pyenv
+ENV PATH="$HOME/pyenv/shims:$HOME/pyenv/bin:$HOME/bin:$PATH"
+
+RUN pyenv install -v 2.7.18 && \
+  pyenv install -v 3.9.21 && \
+  pyenv global 2.7.18 3.9.21 && \
+  pyenv rehash
 
 # Add build dependencies for python2
 # - First we upgrade pip because that makes a lot of things better
 # - Then we remove the provided version of setuptools and install a different 
version
 # - Then we install additional dependencies
-RUN python -m pip install -U "pip < 21.0" && \
-       apt-get remove -y python-setuptools && \
-       python -m pip install "setuptools < 36" && \
-       python -m pip install \
+RUN python2 -m pip install -U "pip < 21.0" && \
+        apt-get remove -y python-setuptools && \
+        python2 -m pip install "setuptools < 36" && \
+        python2 -m pip install \
         cloudpickle \
         codecov \
         flake8 \
@@ -70,7 +95,20 @@ RUN python -m pip install -U "pip < 21.0" && \
         "responses >= 0.5.1"
 
 # Now do the same for python3
-RUN python3 -m pip install -U pip
+RUN python3 -m pip install -U pip && pip3 install \
+        cloudpickle \
+        codecov \
+        flake8 \
+        flaky \
+        pytest \
+        pytest-runner \
+        requests-kerberos \
+        requests \
+        responses
+
+RUN pyenv rehash
+
+RUN apt remove -y openjdk-11-jre-headless
 
 WORKDIR /workspace
 
diff --git a/integration-test/src/test/resources/test_python_api.py 
b/integration-test/src/test/resources/test_python_api.py
index 0a2d8813..f89f85d8 100644
--- a/integration-test/src/test/resources/test_python_api.py
+++ b/integration-test/src/test/resources/test_python_api.py
@@ -18,12 +18,18 @@ import os
 import base64
 import json
 import time
-from urlparse import urlparse
+try:
+    from urllib.parse import urlparse
+except ImportError:
+     from urlparse import urlparse
 import requests
 from requests_kerberos import HTTPKerberosAuth, REQUIRED, OPTIONAL
 import cloudpickle
 import pytest
-import httplib
+try:
+    import httplib
+except ImportError:
+    from http import HTTPStatus as httplib
 from flaky import flaky
 
 global session_id, job_id
@@ -144,7 +150,7 @@ def test_error_job():
         return "hello" + 1
 
     process_job(error_job,
-        "TypeError: cannot concatenate 'str' and 'int' objects", True)
+        "TypeError: ", True)
 
 
 def test_reconnect():
diff --git a/integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala 
b/integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala
index 744a8f53..acfa4a02 100644
--- a/integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala
+++ b/integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala
@@ -76,6 +76,7 @@ class BatchIT extends BaseIntegrationTestSuite with 
BeforeAndAfterAll {
   }
 
   test("submit a SparkR application") {
+    assume(!sys.props.getOrElse("skipRTests", "false").toBoolean, "Skipping R 
tests.")
     val hdfsPath = uploadResource("rtest.R")
     withScript(hdfsPath, List.empty) { s =>
       s.verifySessionSuccess()
diff --git 
a/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala 
b/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala
index 237ee8f3..e5d77cab 100644
--- a/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala
+++ b/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala
@@ -93,12 +93,13 @@ class InteractiveIT extends BaseIntegrationTestSuite {
       }
       s.run("%table x").verifyResult(".*headers.*type.*name.*data.*")
       s.run("abcde").verifyError(ename = "NameError", evalue = "name 'abcde' 
is not defined")
-      s.run("raise KeyError, 'foo'").verifyError(ename = "KeyError", evalue = 
"'foo'")
+      s.run("raise KeyError('foo')").verifyError(ename = "KeyError", evalue = 
"'foo'")
       s.run("print(1)\r\nprint(1)").verifyResult("1\n1")
     }
   }
 
   test("R interactive session") {
+    assume(!sys.props.getOrElse("skipRTests", "false").toBoolean, "Skipping R 
tests.")
     withNewSession(SparkR) { s =>
       // R's output sometimes includes the count of statements, which makes it 
annoying to test
       // things. This helps a bit.
diff --git 
a/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala 
b/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
index 52a42918..e2d63e8c 100644
--- a/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
+++ b/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkConf
 import org.json4s.{DefaultFormats, JNull, JValue}
 import org.json4s.JsonDSL._
 import org.scalatest._
+import org.scalatest.Inside.inside
 
 import org.apache.livy.rsc.driver.SparkEntries
 import org.apache.livy.sessions._
@@ -228,16 +229,16 @@ abstract class PythonBaseInterpreterSpec extends 
BaseInterpreterSpec {
         |'
       """.stripMargin)
 
-    response should equal(Interpreter.ExecuteError(
-      "SyntaxError",
-      "EOL while scanning string literal (<stdin>, line 2)",
-      List(
-        "  File \"<stdin>\", line 2\n",
-        "    '\n",
-        "    ^\n",
-        "SyntaxError: EOL while scanning string literal\n"
-      )
-    ))
+    inside (response) {
+      case Interpreter.ExecuteError(ename, evalue, traceback) => {
+        ename shouldBe "SyntaxError"
+        evalue should (startWith("EOL while scanning string literal")
+          or startWith("unterminated string literal"))
+        traceback.last should (startWith("SyntaxError: EOL while scanning 
string literal")
+          or startWith("SyntaxError: unterminated string literal"))
+      }
+      case _ => fail()
+    }
 
     response = intp.execute("x")
     response should equal(Interpreter.ExecuteError(

(incubator-livy) branch master updated: [LIVY-1011] Upgrade CI image, fix Spark 3 tests

Reply via email to