This is an automated email from the ASF dual-hosted git repository.
raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4c1448e850 GH-33697: [CI][Python] Nightly test for PySpark 3.2.0 fail
with AttributeError on numpy.bool (#33714)
4c1448e850 is described below
commit 4c1448e85011c24f2dde087dc75035c91be7afcd
Author: Alenka Frim <[email protected]>
AuthorDate: Wed Mar 1 14:05:55 2023 +0100
GH-33697: [CI][Python] Nightly test for PySpark 3.2.0 fail with
AttributeError on numpy.bool (#33714)
### Rationale for this change
Fix for nightly integration tests with PySpark 3.2.0 failure.
### What changes are included in this PR?
NumPy version pin in `docker-compose.yml`.
### Are these changes tested?
Will test on the open PR with the CI.
### Are there any user-facing changes?
No.
* Closes: #33697
Lead-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
---
ci/docker/conda-python-spark.dockerfile | 7 +++-
.../install_numpy.sh} | 40 +++++++---------------
dev/tasks/tasks.yml | 7 ++--
docker-compose.yml | 1 +
4 files changed, 24 insertions(+), 31 deletions(-)
diff --git a/ci/docker/conda-python-spark.dockerfile
b/ci/docker/conda-python-spark.dockerfile
index 861d83fe60..58e3d5e5d5 100644
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/docker/conda-python-spark.dockerfile
@@ -23,11 +23,16 @@ FROM ${repo}:${arch}-conda-python-${python}
ARG jdk=8
ARG maven=3.5
+ARG numpy=latest
+COPY ci/scripts/install_numpy.sh /arrow/ci/scripts/
+
RUN mamba install -q -y \
openjdk=${jdk} \
maven=${maven} \
pandas && \
- mamba clean --all
+ mamba clean --all && \
+ mamba uninstall -q -y numpy && \
+ /arrow/ci/scripts/install_numpy.sh ${numpy}
# installing specific version of spark
ARG spark=master
diff --git a/ci/docker/conda-python-spark.dockerfile
b/ci/scripts/install_numpy.sh
old mode 100644
new mode 100755
similarity index 55%
copy from ci/docker/conda-python-spark.dockerfile
copy to ci/scripts/install_numpy.sh
index 861d83fe60..f04fe81b66
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/scripts/install_numpy.sh
@@ -1,3 +1,5 @@
+#!/usr/bin/env bash
+#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -15,33 +17,17 @@
# specific language governing permissions and limitations
# under the License.
-ARG repo
-ARG arch=amd64
-ARG python=3.8
-FROM ${repo}:${arch}-conda-python-${python}
-
-ARG jdk=8
-ARG maven=3.5
+set -e
-RUN mamba install -q -y \
- openjdk=${jdk} \
- maven=${maven} \
- pandas && \
- mamba clean --all
+if [ $# -gt 1 ]; then
+ echo "Usage: $0 <optional numpy version = latest>"
+ exit 1
+fi
-# installing specific version of spark
-ARG spark=master
-COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark
+numpy=${1:-"latest"}
-# build cpp with tests
-ENV CC=gcc \
- CXX=g++ \
- ARROW_BUILD_TESTS=OFF \
- ARROW_COMPUTE=ON \
- ARROW_CSV=ON \
- ARROW_DATASET=ON \
- ARROW_FILESYSTEM=ON \
- ARROW_HDFS=ON \
- ARROW_JSON=ON \
- SPARK_VERSION=${spark}
+if [ "${numpy}" = "latest" ]; then
+ pip install numpy
+else
+ pip install numpy==${numpy}
+fi
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 4c4302a72f..b345bcd48e 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1589,9 +1589,9 @@ tasks:
image: conda-python-hdfs
{% endfor %}
-{% for python_version, spark_version, test_pyarrow_only in [("3.7", "v3.1.2",
"false"),
- ("3.8", "v3.2.0",
"false"),
- ("3.9", "master",
"false")] %}
+{% for python_version, spark_version, test_pyarrow_only, numpy_version in
[("3.7", "v3.1.2", "false", "latest"),
+
("3.8", "v3.2.0", "false", "1.23"),
+
("3.9", "master", "false", "latest")] %}
test-conda-python-{{ python_version }}-spark-{{ spark_version }}:
ci: github
template: docker-tests/github.linux.yml
@@ -1600,6 +1600,7 @@ tasks:
PYTHON: "{{ python_version }}"
SPARK: "{{ spark_version }}"
TEST_PYARROW_ONLY: "{{ test_pyarrow_only }}"
+ NUMPY: "{{ numpy_version }}"
# use the branch-3.0 of spark, so prevent reusing any layers
flags: --no-leaf-cache
image: conda-python-spark
diff --git a/docker-compose.yml b/docker-compose.yml
index 12071a57bd..c9b02c45d1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1788,6 +1788,7 @@ services:
# be set to ${MAVEN}
maven: 3.5
spark: ${SPARK}
+ numpy: ${NUMPY}
shm_size: *shm-size
environment:
<<: *ccache