This is an automated email from the ASF dual-hosted git repository.
ggal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git
The following commit(s) were added to refs/heads/master by this push:
new 826ec291 [LIVY-1011] Upgrade CI image, fix Spark 3 tests
826ec291 is described below
commit 826ec291fa83c26fd715b5f043629af2a48014cb
Author: György Gál <[email protected]>
AuthorDate: Wed Apr 2 13:23:42 2025 +0200
[LIVY-1011] Upgrade CI image, fix Spark 3 tests
## What changes were proposed in this pull request?
The CI environment is broken due to library version conflicts between
the image and the checkout action.
The livy-ci Docker image needs to be upgraded to a more recent version
of Ubuntu, ensuring that the unit tests and integration tests all pass.
Due to a bug in Livy's GitHub workflows, tests had only been executed
with Spark 2. Now that this is fixed, we also need to make updates to
some of the Python-based tests to work with Python 3 because Spark 3.2
does not support Python 2.
Two R-based integration tests are ignored in the Spark 2 integration
test runs because SparkR 2 does not work with R 4, and the last version
of Ubuntu with R 3.6 is Ubuntu 20. If we want to re-enable these tests,
we may need to build R 3.6 for Ubuntu 24.
## How was this patch tested?
CI and unit test runs in a private fork of the repo.
---
.github/workflows/build-ci-image.yaml | 6 +-
.github/workflows/integration-tests.yaml | 11 ++--
.github/workflows/unit-tests.yaml | 4 +-
dev/docker/livy-dev-base/Dockerfile | 70 +++++++++++++++++-----
.../src/test/resources/test_python_api.py | 12 +++-
.../test/scala/org/apache/livy/test/BatchIT.scala | 1 +
.../scala/org/apache/livy/test/InteractiveIT.scala | 3 +-
.../apache/livy/repl/PythonInterpreterSpec.scala | 21 +++----
8 files changed, 89 insertions(+), 39 deletions(-)
diff --git a/.github/workflows/build-ci-image.yaml
b/.github/workflows/build-ci-image.yaml
index 17437676..828f0a32 100644
--- a/.github/workflows/build-ci-image.yaml
+++ b/.github/workflows/build-ci-image.yaml
@@ -17,7 +17,7 @@
name: 'Build CI images'
on:
push:
- branches: ["main"]
+ branches: ["master"]
paths:
- 'dev/docker/livy-dev-base/Dockerfile'
jobs:
@@ -43,6 +43,6 @@ jobs:
uses: docker/build-push-action@v4
with:
push: true
- context: ./dev/docker
+ context: ./dev/docker/livy-dev-base
tags: |
- ghcr.io/${{ github.repository_owner }}/livy-ci:latest
\ No newline at end of file
+ ghcr.io/${{ github.repository_owner }}/livy-ci:latest
diff --git a/.github/workflows/integration-tests.yaml
b/.github/workflows/integration-tests.yaml
index 36162dbb..3ba4981f 100644
--- a/.github/workflows/integration-tests.yaml
+++ b/.github/workflows/integration-tests.yaml
@@ -23,14 +23,13 @@ env:
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false
-Dmaven.wagon.http.retryHandler.class=standard
-Dmaven.wagon.http.retryHandler.count=3
jobs:
build:
- runs-on: ubuntu-20.04
+ runs-on: ubuntu-24.04
# TODO: Possibly point to the ./build-ci-image.yaml with the "uses" key
container: ghcr.io/${{ github.repository_owner }}/livy-ci:latest
strategy:
matrix:
maven_profile:
- - "-Pscala-2.11 -Pspark2"
- - "-Pscala-2.12 -Pspark2"
+ - "-Pscala-2.11 -Pspark2 -DskipRTests"
- "-Pscala-2.12 -Pspark3"
steps:
-
@@ -46,9 +45,13 @@ jobs:
key: ${{ runner.os }}-maven-${{ hashFiles('pom.xml', '*/pom.xml',
'thriftserver/*/pom.xml', 'core/*/pom.xml', 'repl/*/pom.xml',
'scala-api/*/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
+ -
+ name: Set Python 3 as default for Spark 3 builds
+ if: ${{ contains(matrix.maven_profile, 'spark3') }}
+ run: pyenv global 3
-
name: Build with Maven
- run: mvn -Pthriftserver ${{ matrix.mvn_profile }} -DskipTests
-Dmaven.javadoc.skip=true -B -V -e verify
+ run: mvn -Pthriftserver ${{ matrix.maven_profile }} -DskipTests
-Dmaven.javadoc.skip=true -B -V -e verify
-
name: Upload coverage to codecov
uses: codecov/codecov-action@v3
diff --git a/.github/workflows/unit-tests.yaml
b/.github/workflows/unit-tests.yaml
index f3b10cc7..afddbf5d 100644
--- a/.github/workflows/unit-tests.yaml
+++ b/.github/workflows/unit-tests.yaml
@@ -21,7 +21,7 @@ env:
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false
-Dmaven.wagon.http.retryHandler.class=standard
-Dmaven.wagon.http.retryHandler.count=3
jobs:
build:
- runs-on: ubuntu-20.04
+ runs-on: ubuntu-24.04
# TODO: Possibly point to the ./build-ci-image.yaml with the "uses" key
container: ghcr.io/${{ github.repository_owner }}/livy-ci:latest
strategy:
@@ -46,7 +46,7 @@ jobs:
${{ runner.os }}-maven-
-
name: Build with Maven
- run: mvn -Pthriftserver ${{ matrix.mvn_profile }} -DskipITs
-Dmaven.javadoc.skip=true -B -V -e verify
+ run: mvn -Pthriftserver ${{ matrix.maven_profile }} -DskipITs
-Dmaven.javadoc.skip=true -B -V -e verify
-
name: Upload coverage to codecov
uses: codecov/codecov-action@v3
diff --git a/dev/docker/livy-dev-base/Dockerfile
b/dev/docker/livy-dev-base/Dockerfile
index 8d4f101e..25da0872 100644
--- a/dev/docker/livy-dev-base/Dockerfile
+++ b/dev/docker/livy-dev-base/Dockerfile
@@ -15,7 +15,7 @@
# limitations under the License.
#
-FROM ubuntu:xenial
+FROM ubuntu:noble
# configure locale
RUN apt-get update -qq > /dev/null && apt-get install -qq --yes
--no-install-recommends \
@@ -23,7 +23,9 @@ RUN apt-get update -qq > /dev/null && apt-get install -qq
--yes --no-install-rec
locale-gen en_US.UTF-8
ENV LANG="en_US.UTF-8" \
LANGUAGE="en_US.UTF-8" \
- LC_ALL="en_US.UTF-8"
+ LC_ALL="en_US.UTF-8" \
+ TZ=US \
+ DEBIAN_FRONTEND=noninteractive
# Install necessary dependencies for build/test/debug
# Use `lsof -i -P -n` to find open ports
@@ -32,31 +34,54 @@ RUN apt-get install -qq \
curl \
git \
libkrb5-dev \
+ openjdk-8-jdk-headless \
+ r-base \
maven \
- openjdk-8-jdk \
- python-dev \
- python-pip \
- python3-pip \
software-properties-common \
vim \
wget \
telnet \
lsof
-# R 3.x install - ensure to add the signing key per
https://cran.r-project.org/bin/linux/ubuntu/olderreleasesREADME.html
-RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu
xenial-cran35/' && \
- apt-key adv --keyserver keyserver.ubuntu.com --recv-keys
E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
- apt-get update && \
- apt-get -qq install r-base
+# This fixes integration tests
+# If setsid is available, signals are sent to containers in MiniYarnCluster
using negative pids, however
+# in the Docker container this results in a kill(0) system call which triggers
an ExitCodeException in
+# the kill function that breaks test execution. If setsid is removed, pids < 0
are not used.
+# See
https://github.com/apache/hadoop/blob/rel/release-2.7.3/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java#L238
+RUN rm /usr/bin/setsid
+
+# python build
+RUN apt-get install -y \
+ build-essential \
+ libbz2-dev \
+ libffi-dev \
+ liblzma-dev \
+ libncurses-dev \
+ libreadline-dev \
+ libsqlite3-dev \
+ libssl-dev \
+ zlib1g-dev
+
+ENV HOME=/root
+
+RUN git clone https://github.com/pyenv/pyenv.git $HOME/pyenv
+
+ENV PYENV_ROOT=$HOME/pyenv
+ENV PATH="$HOME/pyenv/shims:$HOME/pyenv/bin:$HOME/bin:$PATH"
+
+RUN pyenv install -v 2.7.18 && \
+ pyenv install -v 3.9.21 && \
+ pyenv global 2.7.18 3.9.21 && \
+ pyenv rehash
# Add build dependencies for python2
# - First we upgrade pip because that makes a lot of things better
# - Then we remove the provided version of setuptools and install a different
version
# - Then we install additional dependencies
-RUN python -m pip install -U "pip < 21.0" && \
- apt-get remove -y python-setuptools && \
- python -m pip install "setuptools < 36" && \
- python -m pip install \
+RUN python2 -m pip install -U "pip < 21.0" && \
+ apt-get remove -y python-setuptools && \
+ python2 -m pip install "setuptools < 36" && \
+ python2 -m pip install \
cloudpickle \
codecov \
flake8 \
@@ -70,7 +95,20 @@ RUN python -m pip install -U "pip < 21.0" && \
"responses >= 0.5.1"
# Now do the same for python3
-RUN python3 -m pip install -U pip
+RUN python3 -m pip install -U pip && pip3 install \
+ cloudpickle \
+ codecov \
+ flake8 \
+ flaky \
+ pytest \
+ pytest-runner \
+ requests-kerberos \
+ requests \
+ responses
+
+RUN pyenv rehash
+
+RUN apt remove -y openjdk-11-jre-headless
WORKDIR /workspace
diff --git a/integration-test/src/test/resources/test_python_api.py
b/integration-test/src/test/resources/test_python_api.py
index 0a2d8813..f89f85d8 100644
--- a/integration-test/src/test/resources/test_python_api.py
+++ b/integration-test/src/test/resources/test_python_api.py
@@ -18,12 +18,18 @@ import os
import base64
import json
import time
-from urlparse import urlparse
+try:
+ from urllib.parse import urlparse
+except ImportError:
+ from urlparse import urlparse
import requests
from requests_kerberos import HTTPKerberosAuth, REQUIRED, OPTIONAL
import cloudpickle
import pytest
-import httplib
+try:
+ import httplib
+except ImportError:
+ from http import HTTPStatus as httplib
from flaky import flaky
global session_id, job_id
@@ -144,7 +150,7 @@ def test_error_job():
return "hello" + 1
process_job(error_job,
- "TypeError: cannot concatenate 'str' and 'int' objects", True)
+ "TypeError: ", True)
def test_reconnect():
diff --git a/integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala
b/integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala
index 744a8f53..acfa4a02 100644
--- a/integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala
+++ b/integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala
@@ -76,6 +76,7 @@ class BatchIT extends BaseIntegrationTestSuite with
BeforeAndAfterAll {
}
test("submit a SparkR application") {
+ assume(!sys.props.getOrElse("skipRTests", "false").toBoolean, "Skipping R
tests.")
val hdfsPath = uploadResource("rtest.R")
withScript(hdfsPath, List.empty) { s =>
s.verifySessionSuccess()
diff --git
a/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala
b/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala
index 237ee8f3..e5d77cab 100644
--- a/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala
+++ b/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala
@@ -93,12 +93,13 @@ class InteractiveIT extends BaseIntegrationTestSuite {
}
s.run("%table x").verifyResult(".*headers.*type.*name.*data.*")
s.run("abcde").verifyError(ename = "NameError", evalue = "name 'abcde'
is not defined")
- s.run("raise KeyError, 'foo'").verifyError(ename = "KeyError", evalue =
"'foo'")
+ s.run("raise KeyError('foo')").verifyError(ename = "KeyError", evalue =
"'foo'")
s.run("print(1)\r\nprint(1)").verifyResult("1\n1")
}
}
test("R interactive session") {
+ assume(!sys.props.getOrElse("skipRTests", "false").toBoolean, "Skipping R
tests.")
withNewSession(SparkR) { s =>
// R's output sometimes includes the count of statements, which makes it
annoying to test
// things. This helps a bit.
diff --git
a/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
b/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
index 52a42918..e2d63e8c 100644
--- a/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
+++ b/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkConf
import org.json4s.{DefaultFormats, JNull, JValue}
import org.json4s.JsonDSL._
import org.scalatest._
+import org.scalatest.Inside.inside
import org.apache.livy.rsc.driver.SparkEntries
import org.apache.livy.sessions._
@@ -228,16 +229,16 @@ abstract class PythonBaseInterpreterSpec extends
BaseInterpreterSpec {
|'
""".stripMargin)
- response should equal(Interpreter.ExecuteError(
- "SyntaxError",
- "EOL while scanning string literal (<stdin>, line 2)",
- List(
- " File \"<stdin>\", line 2\n",
- " '\n",
- " ^\n",
- "SyntaxError: EOL while scanning string literal\n"
- )
- ))
+ inside (response) {
+ case Interpreter.ExecuteError(ename, evalue, traceback) => {
+ ename shouldBe "SyntaxError"
+ evalue should (startWith("EOL while scanning string literal")
+ or startWith("unterminated string literal"))
+ traceback.last should (startWith("SyntaxError: EOL while scanning
string literal")
+ or startWith("SyntaxError: unterminated string literal"))
+ }
+ case _ => fail()
+ }
response = intp.execute("x")
response should equal(Interpreter.ExecuteError(