This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new ff2847e23798 [SPARK-54131][PYTHON][TESTS] Update `Pandas` version 2.3.3
ff2847e23798 is described below
commit ff2847e237989e5c9e740c5154f05da012b7919c
Author: Bjørn Jørgensen <[email protected]>
AuthorDate: Mon Nov 3 10:56:21 2025 -0800
[SPARK-54131][PYTHON][TESTS] Update `Pandas` version 2.3.3
### What changes were proposed in this pull request?
Update pandas to 2.3.3
### Why are the changes needed?
New version with some bug fixes and support for python 3.14
_Pandas 2.3.3 is the first version of pandas that is generally compatible
with the upcoming Python 3.14, and both wheels for free-threaded and normal
Python 3.14 will be uploaded for this release._
[Release
notes](https://pandas.pydata.org/pandas-docs/version/2.3/whatsnew/index.html#release)
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass CI/CD tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52828 from bjornjorgensen/pandas-2_3_3.
Authored-by: Bjørn Jørgensen <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit fc3a590d9847251a34545ecbc2c8b0c4170370d4)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.github/workflows/build_python_connect.yml | 2 +-
.github/workflows/pages.yml | 2 +-
.github/workflows/python_hosted_runner_test.yml | 2 +-
dev/create-release/spark-rm/Dockerfile | 2 +-
dev/infra/Dockerfile | 6 +++---
dev/spark-test-image/docs/Dockerfile | 2 +-
dev/spark-test-image/pypy-310/Dockerfile | 2 +-
dev/spark-test-image/python-310/Dockerfile | 2 +-
dev/spark-test-image/python-311-classic-only/Dockerfile | 2 +-
dev/spark-test-image/python-311/Dockerfile | 2 +-
dev/spark-test-image/python-312/Dockerfile | 2 +-
dev/spark-test-image/python-313-nogil/Dockerfile | 4 ++--
dev/spark-test-image/python-313/Dockerfile | 2 +-
python/pyspark/pandas/supported_api_gen.py | 2 +-
14 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/build_python_connect.yml
b/.github/workflows/build_python_connect.yml
index 5edb54de82b6..cec37af22dd7 100644
--- a/.github/workflows/build_python_connect.yml
+++ b/.github/workflows/build_python_connect.yml
@@ -72,7 +72,7 @@ jobs:
python packaging/client/setup.py sdist
cd dist
pip install pyspark*client-*.tar.gz
- pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0'
'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'
'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2'
'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed
unittest-xml-reporting
+ pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0'
'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'
'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2'
'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed
unittest-xml-reporting
- name: List Python packages
run: python -m pip list
- name: Run tests (local)
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 86ef00220b37..e800b40106ee 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -61,7 +61,7 @@ jobs:
- name: Install Python dependencies
run: |
pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13'
sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
- ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22'
pyarrow 'pandas==2.3.2' 'plotly>=4.8' 'docutils<0.18.0' \
+ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22'
pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3'
'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0'
'protobuf==5.29.5' 'grpc-stubs==1.24.11'
'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2'
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3'
'sphinxcontrib-serializinghtml==1.1.5'
diff --git a/.github/workflows/python_hosted_runner_test.yml
b/.github/workflows/python_hosted_runner_test.yml
index 9a6afc095063..77e85222c29d 100644
--- a/.github/workflows/python_hosted_runner_test.yml
+++ b/.github/workflows/python_hosted_runner_test.yml
@@ -147,7 +147,7 @@ jobs:
run: |
python${{matrix.python}} -m pip install --ignore-installed
'blinker>=1.6.2'
python${{matrix.python}} -m pip install --ignore-installed
'six==1.16.0'
- python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0'
'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2'
unittest-xml-reporting && \
+ python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0'
'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2'
unittest-xml-reporting && \
python${{matrix.python}} -m pip install 'grpcio==1.67.0'
'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0'
'graphviz==0.20.3' && \
python${{matrix.python}} -m pip cache purge
- name: List Python packages
diff --git a/dev/create-release/spark-rm/Dockerfile
b/dev/create-release/spark-rm/Dockerfile
index e53ac6b439c8..86be7e0a8229 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -92,7 +92,7 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr',
'markdown', \
# See more in SPARK-39735
ENV
R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
-ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.2 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2 twine==3.4.1"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.3 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2 twine==3.4.1"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index afab64a67b5e..1aa03735ce92 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -92,10 +92,10 @@ RUN mkdir -p /usr/local/pypy/pypy3.10 && \
ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
-RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.0' scipy coverage
matplotlib lxml
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.3' scipy coverage
matplotlib lxml
-ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.3.3 scipy
plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5
googleapis-common-protos==1.65.0 graphviz==0.20.3"
@@ -149,7 +149,7 @@ RUN apt-get update && apt-get install -y \
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13
# TODO(SPARK-49862) Add BASIC_PIP_PKGS and CONNECT_PIP_PKGS to Python 3.13
image when it supports Python 3.13
RUN python3.13 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs
this
-RUN python3.13 -m pip install numpy>=2.1 pyarrow>=18.0.0 six==1.16.0
pandas==2.3.0 scipy coverage matplotlib openpyxl grpcio==1.67.0
grpcio-status==1.67.0 lxml jinja2 && \
+RUN python3.13 -m pip install numpy>=2.1 pyarrow>=18.0.0 six==1.16.0
pandas==2.3.3 scipy coverage matplotlib openpyxl grpcio==1.67.0
grpcio-status==1.67.0 lxml jinja2 && \
python3.13 -m pip cache purge
# Remove unused installation packages to free up disk space
diff --git a/dev/spark-test-image/docs/Dockerfile
b/dev/spark-test-image/docs/Dockerfile
index 4c1e68f72447..1c17ae122d63 100644
--- a/dev/spark-test-image/docs/Dockerfile
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -89,7 +89,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
# See 'ipython_genutils' in SPARK-38517
# See 'docutils<0.18.0' in SPARK-39421
RUN python3.11 -m pip install 'sphinx==4.5.0' mkdocs
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2
markupsafe 'pyzmq<24.0.0' \
- ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow
'pandas==2.3.2' 'plotly>=4.8' 'docutils<0.18.0' \
+ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow
'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3'
'black==23.12.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0'
'protobuf==5.29.5' 'grpc-stubs==1.24.11'
'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2'
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3'
'sphinxcontrib-serializinghtml==1.1.5' \
diff --git a/dev/spark-test-image/pypy-310/Dockerfile
b/dev/spark-test-image/pypy-310/Dockerfile
index 6f0b938bd199..c8672fc0ec06 100644
--- a/dev/spark-test-image/pypy-310/Dockerfile
+++ b/dev/spark-test-image/pypy-310/Dockerfile
@@ -69,4 +69,4 @@ RUN mkdir -p /usr/local/pypy/pypy3.10 && \
ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
-RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.2' scipy coverage
matplotlib lxml
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.3' scipy coverage
matplotlib lxml
diff --git a/dev/spark-test-image/python-310/Dockerfile
b/dev/spark-test-image/python-310/Dockerfile
index ef59237afee2..ce2ca23d18a6 100644
--- a/dev/spark-test-image/python-310/Dockerfile
+++ b/dev/spark-test-image/python-310/Dockerfile
@@ -64,7 +64,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/spark-test-image/python-311-classic-only/Dockerfile
b/dev/spark-test-image/python-311-classic-only/Dockerfile
index 72f7ebec686e..c2d48c5ce877 100644
--- a/dev/spark-test-image/python-311-classic-only/Dockerfile
+++ b/dev/spark-test-image/python-311-classic-only/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 pandas==2.3.2 plotly<6.0.0
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 pandas==2.3.3 plotly<6.0.0
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy
scikit-learn>=1.3.2"
ARG TEST_PIP_PKGS="coverage unittest-xml-reporting"
# Install Python 3.11 packages
diff --git a/dev/spark-test-image/python-311/Dockerfile
b/dev/spark-test-image/python-311/Dockerfile
index 25fd065753bd..00fb7be788fd 100644
--- a/dev/spark-test-image/python-311/Dockerfile
+++ b/dev/spark-test-image/python-311/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/spark-test-image/python-312/Dockerfile
b/dev/spark-test-image/python-312/Dockerfile
index 82016bbec860..79cab824a5b2 100644
--- a/dev/spark-test-image/python-312/Dockerfile
+++ b/dev/spark-test-image/python-312/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/spark-test-image/python-313-nogil/Dockerfile
b/dev/spark-test-image/python-313-nogil/Dockerfile
index 7f608caab193..031eb8772b59 100644
--- a/dev/spark-test-image/python-313-nogil/Dockerfile
+++ b/dev/spark-test-image/python-313-nogil/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5
googleapis-common-protos==1.65.0 graphviz==0.20.3"
@@ -77,5 +77,5 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py |
python3.13t
# TODO: Add BASIC_PIP_PKGS and CONNECT_PIP_PKGS when it supports Python 3.13
free threaded
# TODO: Add lxml, grpcio, grpcio-status back when they support Python 3.13
free threaded
RUN python3.13t -m pip install --ignore-installed blinker>=1.6.2 # mlflow
needs this
-RUN python3.13t -m pip install numpy>=2.1 pyarrow>=19.0.0 six==1.16.0
pandas==2.3.2 scipy coverage matplotlib openpyxl jinja2 && \
+RUN python3.13t -m pip install numpy>=2.1 pyarrow>=19.0.0 six==1.16.0
pandas==2.3.3 scipy coverage matplotlib openpyxl jinja2 && \
python3.13t -m pip cache purge
diff --git a/dev/spark-test-image/python-313/Dockerfile
b/dev/spark-test-image/python-313/Dockerfile
index 9fd53d233ac0..abd5a7e01093 100644
--- a/dev/spark-test-image/python-313/Dockerfile
+++ b/dev/spark-test-image/python-313/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.2 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.5
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/python/pyspark/pandas/supported_api_gen.py
b/python/pyspark/pandas/supported_api_gen.py
index 595c11c559d0..3f7efa7784ab 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -38,7 +38,7 @@ from pyspark.pandas.exceptions import
PandasNotImplementedError
MAX_MISSING_PARAMS_SIZE = 5
COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
-PANDAS_LATEST_VERSION = "2.3.2"
+PANDAS_LATEST_VERSION = "2.3.3"
RST_HEADER = """
=====================
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]