[arrow] branch main updated: GH-33697: [CI][Python] Nightly test for PySpark 3.2.0 fail with AttributeError on numpy.bool (#33714)

raulcd Wed, 01 Mar 2023 05:06:58 -0800

This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 4c1448e850 GH-33697: [CI][Python] Nightly test for PySpark 3.2.0 fail 
with AttributeError on numpy.bool (#33714)
4c1448e850 is described below

commit 4c1448e85011c24f2dde087dc75035c91be7afcd
Author: Alenka Frim <[email protected]>
AuthorDate: Wed Mar 1 14:05:55 2023 +0100

    GH-33697: [CI][Python] Nightly test for PySpark 3.2.0 fail with 
AttributeError on numpy.bool (#33714)
    
    ### Rationale for this change
    Fix for nightly integration tests with PySpark 3.2.0 failure.
    
    ### What changes are included in this PR?
    NumPy version pin in `docker-compose.yml`.
    
    ### Are these changes tested?
    Will test on the open PR with the CI.
    
    ### Are there any user-facing changes?
    No.
    * Closes: #33697
    
    Lead-authored-by: Alenka Frim <[email protected]>
    Co-authored-by: Alenka Frim <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Raúl Cumplido <[email protected]>
---
 ci/docker/conda-python-spark.dockerfile            |  7 +++-
 .../install_numpy.sh}                              | 40 +++++++---------------
 dev/tasks/tasks.yml                                |  7 ++--
 docker-compose.yml                                 |  1 +
 4 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/ci/docker/conda-python-spark.dockerfile 
b/ci/docker/conda-python-spark.dockerfile
index 861d83fe60..58e3d5e5d5 100644
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/docker/conda-python-spark.dockerfile
@@ -23,11 +23,16 @@ FROM ${repo}:${arch}-conda-python-${python}
 ARG jdk=8
 ARG maven=3.5
 
+ARG numpy=latest
+COPY ci/scripts/install_numpy.sh /arrow/ci/scripts/
+
 RUN mamba install -q -y \
         openjdk=${jdk} \
         maven=${maven} \
         pandas && \
-    mamba clean --all
+    mamba clean --all && \
+    mamba uninstall -q -y numpy && \
+    /arrow/ci/scripts/install_numpy.sh ${numpy}
 
 # installing specific version of spark
 ARG spark=master
diff --git a/ci/docker/conda-python-spark.dockerfile 
b/ci/scripts/install_numpy.sh
old mode 100644
new mode 100755
similarity index 55%
copy from ci/docker/conda-python-spark.dockerfile
copy to ci/scripts/install_numpy.sh
index 861d83fe60..f04fe81b66
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/scripts/install_numpy.sh
@@ -1,3 +1,5 @@
+#!/usr/bin/env bash
+#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -15,33 +17,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
-ARG repo
-ARG arch=amd64
-ARG python=3.8
-FROM ${repo}:${arch}-conda-python-${python}
-
-ARG jdk=8
-ARG maven=3.5
+set -e
 
-RUN mamba install -q -y \
-        openjdk=${jdk} \
-        maven=${maven} \
-        pandas && \
-    mamba clean --all
+if [ $# -gt 1 ]; then
+  echo "Usage: $0 <optional numpy version = latest>"
+  exit 1
+fi
 
-# installing specific version of spark
-ARG spark=master
-COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark
+numpy=${1:-"latest"}
 
-# build cpp with tests
-ENV CC=gcc \
-    CXX=g++ \
-    ARROW_BUILD_TESTS=OFF \
-    ARROW_COMPUTE=ON \
-    ARROW_CSV=ON \
-    ARROW_DATASET=ON \
-    ARROW_FILESYSTEM=ON \
-    ARROW_HDFS=ON \
-    ARROW_JSON=ON \
-    SPARK_VERSION=${spark}
+if [ "${numpy}" = "latest" ]; then
+  pip install numpy
+else
+  pip install numpy==${numpy}
+fi
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 4c4302a72f..b345bcd48e 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1589,9 +1589,9 @@ tasks:
       image: conda-python-hdfs
 {% endfor %}
 
-{% for python_version, spark_version, test_pyarrow_only in [("3.7", "v3.1.2", 
"false"),
-                                                            ("3.8", "v3.2.0", 
"false"),
-                                                            ("3.9", "master", 
"false")] %}
+{% for python_version, spark_version, test_pyarrow_only, numpy_version in 
[("3.7", "v3.1.2", "false", "latest"),
+                                                                           
("3.8", "v3.2.0", "false", "1.23"),
+                                                                           
("3.9", "master", "false", "latest")] %}
   test-conda-python-{{ python_version }}-spark-{{ spark_version }}:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1600,6 +1600,7 @@ tasks:
         PYTHON: "{{ python_version }}"
         SPARK: "{{ spark_version }}"
         TEST_PYARROW_ONLY: "{{ test_pyarrow_only }}"
+        NUMPY: "{{ numpy_version }}"
       # use the branch-3.0 of spark, so prevent reusing any layers
       flags: --no-leaf-cache
       image: conda-python-spark
diff --git a/docker-compose.yml b/docker-compose.yml
index 12071a57bd..c9b02c45d1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1788,6 +1788,7 @@ services:
         # be set to ${MAVEN}
         maven: 3.5
         spark: ${SPARK}
+        numpy: ${NUMPY}
     shm_size: *shm-size
     environment:
       <<: *ccache

[arrow] branch main updated: GH-33697: [CI][Python] Nightly test for PySpark 3.2.0 fail with AttributeError on numpy.bool (#33714)

Reply via email to