This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8b889ce1cbf3 [SPARK-52897][PYTHON] Update `pandas` to 2.3.1 8b889ce1cbf3 is described below commit 8b889ce1cbf3d6b6f39a3a89a46df29ceca89c59 Author: Bjørn Jørgensen <bjornjorgen...@gmail.com> AuthorDate: Thu Jul 24 13:46:08 2025 +0800 [SPARK-52897][PYTHON] Update `pandas` to 2.3.1 ### What changes were proposed in this pull request? Upgrading from pandas==2.3.0 to pandas==2.3.1 ### Why are the changes needed? [Release Github](https://github.com/pandas-dev/pandas/releases/tag/v2.3.1) [Release notes](https://pandas.pydata.org/pandas-docs/version/2.3.1/whatsnew/index.html#release) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51579 from bjornjorgensen/pandas2_3-1. Authored-by: Bjørn Jørgensen <bjornjorgen...@gmail.com> Signed-off-by: Kent Yao <y...@apache.org> --- .github/workflows/build_python_connect.yml | 2 +- .github/workflows/pages.yml | 2 +- .github/workflows/python_hosted_runner_test.yml | 2 +- dev/create-release/spark-rm/Dockerfile | 4 ++-- dev/spark-test-image/docs/Dockerfile | 2 +- dev/spark-test-image/pypy-310/Dockerfile | 2 +- dev/spark-test-image/python-309/Dockerfile | 2 +- dev/spark-test-image/python-310/Dockerfile | 2 +- dev/spark-test-image/python-311-classic-only/Dockerfile | 2 +- dev/spark-test-image/python-311/Dockerfile | 2 +- dev/spark-test-image/python-312/Dockerfile | 2 +- dev/spark-test-image/python-313-nogil/Dockerfile | 4 ++-- dev/spark-test-image/python-313/Dockerfile | 2 +- python/pyspark/pandas/supported_api_gen.py | 2 +- 14 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index 761c4f9ebdcb..5c61f4372fb2 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -72,7 +72,7 @@ jobs: python packaging/client/setup.py sdist cd dist pip install pyspark*client-*.tar.gz - pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.0' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting + pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.1' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting - name: List Python packages run: python -m pip list - name: Run tests (local) diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 760e8117dcc0..fdc225a0a0a7 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -61,7 +61,7 @@ jobs: - name: Install Python dependencies run: | pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ - ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.3.0' 'plotly>=4.8' 'docutils<0.18.0' \ + ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.3.1' 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \ 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' diff --git a/.github/workflows/python_hosted_runner_test.yml b/.github/workflows/python_hosted_runner_test.yml index a56bbbdd47af..ec7c23f63dfb 100644 --- a/.github/workflows/python_hosted_runner_test.yml +++ b/.github/workflows/python_hosted_runner_test.yml @@ -149,7 +149,7 @@ jobs: run: | python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2' python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0' - python${{matrix.python}} -m pip install numpy 'pyarrow>=19.0.0' 'six==1.16.0' 'pandas==2.3.0' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ + python${{matrix.python}} -m pip install numpy 'pyarrow>=19.0.0' 'six==1.16.0' 'pandas==2.3.1' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \ python${{matrix.python}} -m pip cache purge - name: List Python packages diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 7ef6f7b9ab1d..516359ba0cd5 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -98,10 +98,10 @@ RUN mkdir -p /usr/local/pypy/pypy3.10 && \ ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \ ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 -RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.0' scipy coverage matplotlib lxml +RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.1' scipy coverage matplotlib lxml -ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.0 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1" +ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.1 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/docs/Dockerfile b/dev/spark-test-image/docs/Dockerfile index 3ab1430cedd3..311e43bb7b34 100644 --- a/dev/spark-test-image/docs/Dockerfile +++ b/dev/spark-test-image/docs/Dockerfile @@ -88,7 +88,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 # See 'ipython_genutils' in SPARK-38517 # See 'docutils<0.18.0' in SPARK-39421 RUN python3.11 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ - ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \ + ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.3.1' 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \ 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \ diff --git a/dev/spark-test-image/pypy-310/Dockerfile b/dev/spark-test-image/pypy-310/Dockerfile index 64782117866c..73ac6aca04cf 100644 --- a/dev/spark-test-image/pypy-310/Dockerfile +++ b/dev/spark-test-image/pypy-310/Dockerfile @@ -68,4 +68,4 @@ RUN mkdir -p /usr/local/pypy/pypy3.10 && \ ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \ ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 -RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.0' scipy coverage matplotlib lxml +RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.1' scipy coverage matplotlib lxml diff --git a/dev/spark-test-image/python-309/Dockerfile b/dev/spark-test-image/python-309/Dockerfile index 305e7ea1973a..2e447ac2b2ff 100644 --- a/dev/spark-test-image/python-309/Dockerfile +++ b/dev/spark-test-image/python-309/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/python-310/Dockerfile b/dev/spark-test-image/python-310/Dockerfile index d0c26aa8c7c6..83994e94c4d1 100644 --- a/dev/spark-test-image/python-310/Dockerfile +++ b/dev/spark-test-image/python-310/Dockerfile @@ -63,7 +63,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/python-311-classic-only/Dockerfile b/dev/spark-test-image/python-311-classic-only/Dockerfile index 5c2f8a4f3d38..12705bb7121e 100644 --- a/dev/spark-test-image/python-311-classic-only/Dockerfile +++ b/dev/spark-test-image/python-311-classic-only/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 pandas==2.3.0 plotly<6.0.0 matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 pandas==2.3.1 plotly<6.0.0 matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy scikit-learn>=1.3.2" ARG TEST_PIP_PKGS="coverage unittest-xml-reporting" # Install Python 3.11 packages diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile index 38cba230f4bd..32ff3b40b8d2 100644 --- a/dev/spark-test-image/python-311/Dockerfile +++ b/dev/spark-test-image/python-311/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile index 3f66635009dd..51be9774c0ea 100644 --- a/dev/spark-test-image/python-312/Dockerfile +++ b/dev/spark-test-image/python-312/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/dev/spark-test-image/python-313-nogil/Dockerfile b/dev/spark-test-image/python-313-nogil/Dockerfile index f05d85691346..5fc6adf90419 100644 --- a/dev/spark-test-image/python-313-nogil/Dockerfile +++ b/dev/spark-test-image/python-313-nogil/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" @@ -76,5 +76,5 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13t # TODO: Add BASIC_PIP_PKGS and CONNECT_PIP_PKGS when it supports Python 3.13 free threaded # TODO: Add lxml, grpcio, grpcio-status back when they support Python 3.13 free threaded RUN python3.13t -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this -RUN python3.13t -m pip install numpy>=2.1 pyarrow>=19.0.0 six==1.16.0 pandas==2.3.0 scipy coverage matplotlib openpyxl jinja2 && \ +RUN python3.13t -m pip install numpy>=2.1 pyarrow>=19.0.0 six==1.16.0 pandas==2.3.1 scipy coverage matplotlib openpyxl jinja2 && \ python3.13t -m pip cache purge diff --git a/dev/spark-test-image/python-313/Dockerfile b/dev/spark-test-image/python-313/Dockerfile index 1ef08da0f40d..1ee831becc8c 100644 --- a/dev/spark-test-image/python-313/Dockerfile +++ b/dev/spark-test-image/python-313/Dockerfile @@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3" diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py index 62a3e23816ef..159996728cd8 100644 --- a/python/pyspark/pandas/supported_api_gen.py +++ b/python/pyspark/pandas/supported_api_gen.py @@ -38,7 +38,7 @@ from pyspark.pandas.exceptions import PandasNotImplementedError MAX_MISSING_PARAMS_SIZE = 5 COMMON_PARAMETER_SET = {"kwargs", "args", "cls"} MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)] -PANDAS_LATEST_VERSION = "2.3.0" +PANDAS_LATEST_VERSION = "2.3.1" RST_HEADER = """ ===================== --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org