This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch fix-image-building in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 04b7334d946408bdc216ed3cbbabbd1bc1272238 Author: Jia Yu <[email protected]> AuthorDate: Sun Nov 23 00:28:16 2025 -0800 Update the old dependencies --- .github/workflows/docker-build.yml | 14 +++++++------- docker/build.sh | 2 +- docker/install-sedona.sh | 10 +++++----- docker/install-spark.sh | 4 ---- docker/requirements.txt | 8 ++++---- docker/sedona-docker.dockerfile | 23 +++++++++++------------ docker/zeppelin/conf/interpreter.json | 4 ++-- 7 files changed, 30 insertions(+), 35 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 790daa1778..5802690959 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -46,14 +46,14 @@ jobs: fail-fast: true matrix: os: ['ubuntu-latest'] - spark: ['3.5.1', '3.4.3'] + spark: ['4.0.1'] include: - - spark: 3.5.1 + - spark: 4.0.1 sedona: 'latest' - geotools: '33.1-rc1' - - spark: 3.4.3 - sedona: 1.6.0 - geotools: 28.2 + geotools: '33.1' + - spark: 4.0.1 + sedona: 1.8.0 + geotools: '33.1' runs-on: ${{ matrix.os }} defaults: run: @@ -63,7 +63,7 @@ jobs: - uses: actions/setup-java@v5 with: distribution: 'zulu' - java-version: 11 + java-version: 17 - name: Cache Maven packages uses: actions/cache@v4 with: diff --git a/docker/build.sh b/docker/build.sh index 101295cc08..7cf3d95a8c 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -80,7 +80,7 @@ if [ "$SEDONA_VERSION" = "latest" ]; then echo "Using latest geotools-wrapper version: $GEOTOOLS_WRAPPER_VERSION" # The compilation must take place outside Docker to avoid unnecessary maven packages - mvn clean install -DskipTests -Dspark="${SEDONA_SPARK_VERSION}" -Dscala=2.12 + mvn clean install -DskipTests -Dspark="${SEDONA_SPARK_VERSION}" -Dscala=2.13 fi # -- Building the image diff --git a/docker/install-sedona.sh b/docker/install-sedona.sh index 9e404c6a86..35507f2e57 100755 --- a/docker/install-sedona.sh +++ b/docker/install-sedona.sh @@ -35,21 +35,21 @@ fi if [ "$sedona_version" = "latest" ]; then # Code to execute when SEDONA_VERSION is "latest" cp "${SEDONA_HOME}"/spark-shaded/target/sedona-spark-shaded-*.jar "${SPARK_HOME}"/jars/ - cd "${SEDONA_HOME}"/python;pip3 install . + cd "${SEDONA_HOME}"/python;pip3 install . --break-system-packages else # Code to execute when SEDONA_VERSION is not "latest" # Download Sedona - curl --retry 5 --retry-delay 10 --retry-connrefused https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-"${sedona_spark_version}"_2.12/"${sedona_version}"/sedona-spark-shaded-"${sedona_spark_version}"_2.12-"${sedona_version}".jar -o "$SPARK_HOME"/jars/sedona-spark-shaded-"${sedona_spark_version}"_2.12-"${sedona_version}".jar + curl --retry 5 --retry-delay 10 --retry-connrefused https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-"${sedona_spark_version}"_2.13/"${sedona_version}"/sedona-spark-shaded-"${sedona_spark_version}"_2.13-"${sedona_version}".jar -o "$SPARK_HOME"/jars/sedona-spark-shaded-"${sedona_spark_version}"_2.13-"${sedona_version}".jar # Install Sedona Python - pip3 install apache-sedona=="${sedona_version}" + pip3 install apache-sedona=="${sedona_version}" --break-system-packages fi # Download gresearch spark extension -curl --retry 5 --retry-delay 10 --retry-connrefused https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_2.12/"${spark_extension_version}"-"${spark_compat_version}"/spark-extension_2.12-"${spark_extension_version}"-"${spark_compat_version}".jar -o "$SPARK_HOME"/jars/spark-extension_2.12-"${spark_extension_version}"-"${spark_compat_version}".jar +curl --retry 5 --retry-delay 10 --retry-connrefused https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_2.13/"${spark_extension_version}"-"${spark_compat_version}"/spark-extension_2.13-"${spark_extension_version}"-"${spark_compat_version}".jar -o "$SPARK_HOME"/jars/spark-extension_2.13-"${spark_extension_version}"-"${spark_compat_version}".jar # Install Spark extension Python -pip3 install pyspark-extension=="${spark_extension_version}"."${spark_compat_version}" +pip3 install pyspark-extension=="${spark_extension_version}"."${spark_compat_version}" --break-system-packages # Download GeoTools jar curl --retry 5 --retry-delay 10 --retry-connrefused https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/"${geotools_wrapper_version}"/geotools-wrapper-"${geotools_wrapper_version}".jar -o "$SPARK_HOME"/jars/geotools-wrapper-"${geotools_wrapper_version}".jar diff --git a/docker/install-spark.sh b/docker/install-spark.sh index a66945c2fd..0210a89a9d 100755 --- a/docker/install-spark.sh +++ b/docker/install-spark.sh @@ -23,7 +23,6 @@ set -e spark_version=$1 hadoop_s3_version=$2 aws_sdk_version=$3 -spark_xml_version=$4 # Download Spark jar and set up PySpark curl --retry 5 --retry-delay 10 --retry-connrefused https://archive.apache.org/dist/spark/spark-"${spark_version}"/spark-"${spark_version}"-bin-hadoop3.tgz -o spark.tgz @@ -34,9 +33,6 @@ rm spark.tgz && rm -rf spark-"${spark_version}"-bin-hadoop3 curl --retry 5 --retry-delay 10 --retry-connrefused https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/"${hadoop_s3_version}"/hadoop-aws-"${hadoop_s3_version}".jar -o "${SPARK_HOME}"/jars/hadoop-aws-"${hadoop_s3_version}".jar curl --retry 5 --retry-delay 10 --retry-connrefused https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/"${aws_sdk_version}"/aws-java-sdk-bundle-"${aws_sdk_version}".jar -o "${SPARK_HOME}"/jars/aws-java-sdk-bundle-"${aws_sdk_version}".jar -# Add spark-xml jar -curl --retry 5 --retry-delay 10 --retry-connrefused https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/"${spark_xml_version}"/spark-xml_2.12-"${spark_xml_version}".jar -o "${SPARK_HOME}"/jars/spark-xml_2.12-"${spark_xml_version}".jar - # Set up master IP address and executor memory cp "${SPARK_HOME}"/conf/spark-defaults.conf.template "${SPARK_HOME}"/conf/spark-defaults.conf diff --git a/docker/requirements.txt b/docker/requirements.txt index 3ed7285f49..f186cefcbd 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -1,15 +1,15 @@ attrs descartes fiona==1.10.1 -geopandas==1.0.1 +geopandas==1.1.1 ipykernel ipywidgets jupyterlab==4.4.9 jupyterlab-widgets==3.0.13 keplergl==0.3.7 matplotlib -numpy<2 -pandas==1.5.3 +numpy==1.26.4 +pandas==2.3.3 pydeck==0.9.1 rasterio==1.4.3 -shapely==2.0.4 +shapely==2.1.2 diff --git a/docker/sedona-docker.dockerfile b/docker/sedona-docker.dockerfile index 3153382c72..2671a18f33 100644 --- a/docker/sedona-docker.dockerfile +++ b/docker/sedona-docker.dockerfile @@ -15,16 +15,15 @@ # limitations under the License. # -FROM ubuntu:22.04 +FROM ubuntu:24.04 ARG shared_workspace=/opt/workspace -ARG spark_version=3.5.5 -ARG hadoop_s3_version=3.3.4 -ARG aws_sdk_version=1.12.402 -ARG spark_xml_version=0.16.0 -ARG sedona_version=1.7.1 -ARG geotools_wrapper_version=1.7.1-28.5 -ARG spark_extension_version=2.11.0 +ARG spark_version=4.0.1 +ARG hadoop_s3_version=3.4.1 +ARG aws_sdk_version=2.38.2 +ARG sedona_version=1.8.0 +ARG geotools_wrapper_version=1.8.1-33.1 +ARG spark_extension_version=2.14.2 ARG zeppelin_version=0.12.0 # Set up envs @@ -43,15 +42,15 @@ ENV PYSPARK_DRIVER_PYTHON=jupyter # Set up OS libraries and PySpark RUN apt-get update -RUN apt-get install -y openjdk-19-jdk-headless curl python3-pip maven -RUN pip3 install --upgrade pip && pip3 install pipenv +RUN apt-get install -y openjdk-17-jdk-headless curl python3-pip maven +RUN pip3 install pipenv --break-system-packages COPY ./docker/install-spark.sh ${SEDONA_HOME}/docker/ RUN chmod +x ${SEDONA_HOME}/docker/install-spark.sh -RUN ${SEDONA_HOME}/docker/install-spark.sh ${spark_version} ${hadoop_s3_version} ${aws_sdk_version} ${spark_xml_version} +RUN ${SEDONA_HOME}/docker/install-spark.sh ${spark_version} ${hadoop_s3_version} ${aws_sdk_version} # Install Python dependencies COPY docker/requirements.txt /opt/requirements.txt -RUN pip3 install -r /opt/requirements.txt +RUN pip3 install -r /opt/requirements.txt --break-system-packages # Copy local compiled jars and python code to the docker environment diff --git a/docker/zeppelin/conf/interpreter.json b/docker/zeppelin/conf/interpreter.json index 41ff251b16..f82f3e67ca 100644 --- a/docker/zeppelin/conf/interpreter.json +++ b/docker/zeppelin/conf/interpreter.json @@ -1638,11 +1638,11 @@ ], "dependencies": [ { - "groupArtifactVersion": "/opt/spark/jars/sedona-spark-shaded-3.5_2.12-1.7.1.jar", + "groupArtifactVersion": "/opt/spark/jars/sedona-spark-shaded-4.0_2.13-1.8.0.jar", "local": false }, { - "groupArtifactVersion": "/opt/spark/jars/geotools-wrapper-1.7.1-28.5.jar", + "groupArtifactVersion": "/opt/spark/jars/geotools-wrapper-1.8.0-33.1.jar", "local": false } ],
