This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new a3c2ff18f8 GH-49522: [CI] Update chrome_version for emscripten job to
latest stable (v148) (#49523)
a3c2ff18f8 is described below
commit a3c2ff18f8e1615a96ee047b1c9faeffe8dacf0d
Author: Raúl Cumplido <[email protected]>
AuthorDate: Mon May 11 13:39:39 2026 +0200
GH-49522: [CI] Update chrome_version for emscripten job to latest stable
(v148) (#49523)
### Rationale for this change
The `134` version fails to be installed from stable releases on chrome due
to a new stable version released.
### What changes are included in this PR?
- Update version of latest stable chrome driver to `148` and how we look
for version on instrall script.
- Update selenium to newer version to work with newer chrome.
- Fixes to cross-compilation build after upgrading Abseil and Protobuf and
moving to FetchContent.
- Updated timeout so Chrome job doesn't time out.
- Skip substrait tests which currently fail on Pyodide.
### Are these changes tested?
Yes, via archery job.
### Are there any user-facing changes?
No
* GitHub Issue: #49522
Authored-by: Raúl Cumplido <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ci/docker/conda-python-emscripten.dockerfile | 2 +-
ci/scripts/install_chromedriver.sh | 21 ++++--
compose.yaml | 4 +-
cpp/cmake_modules/ThirdpartyToolchain.cmake | 108 ++++++++++++++++++++++++++-
python/pyarrow/tests/test_dataset.py | 1 +
python/scripts/run_emscripten_tests.py | 15 +++-
6 files changed, 134 insertions(+), 17 deletions(-)
diff --git a/ci/docker/conda-python-emscripten.dockerfile
b/ci/docker/conda-python-emscripten.dockerfile
index 878f918710..c56bf4f0c5 100644
--- a/ci/docker/conda-python-emscripten.dockerfile
+++ b/ci/docker/conda-python-emscripten.dockerfile
@@ -20,7 +20,7 @@ ARG arch
ARG python="3.12"
FROM ${repo}:${arch}-conda-python-${python}
-ARG selenium_version="4.15.2"
+ARG selenium_version="4.41.0"
ARG pyodide_version="0.26.0"
ARG chrome_version="latest"
ARG required_python_min="(3,12)"
diff --git a/ci/scripts/install_chromedriver.sh
b/ci/scripts/install_chromedriver.sh
index 9167ae70e8..defc1a9e50 100755
--- a/ci/scripts/install_chromedriver.sh
+++ b/ci/scripts/install_chromedriver.sh
@@ -23,15 +23,22 @@ set -e
chrome_version=$1
-if [ "$chrome_version" = "latest" ]; then
- latest_release_path=LATEST_RELEASE_STABLE
-else
- latest_release_path=LATEST_RELEASE_${chrome_version}
+# Look up the Chrome version from the apt repo's Packages file.
+CHROME_DEB_VERSION=$(wget --no-verbose -O - \
+
"https://dl.google.com/linux/chrome/deb/dists/stable/main/binary-amd64/Packages.gz"
\
+ | gunzip \
+ | awk '/^Package: google-chrome-stable$/{found=1} found && /^Version:
/{print $2; exit}')
+CHROME_VERSION_FULL=${CHROME_DEB_VERSION%-*}
+
+# Validate there hasn't been major version bumps since the last time we
updated this script.
+if [ "$chrome_version" != "latest" ] && [ "${CHROME_VERSION_FULL%%.*}" !=
"$chrome_version" ]; then
+ echo "Requested Chrome major ${chrome_version}, but apt repo currently
publishes ${CHROME_VERSION_FULL}" >&2
+ exit 1
fi
-CHROME_VERSION_FULL=$(wget -q --no-verbose -O -
"https://googlechromelabs.github.io/chrome-for-testing/${latest_release_path}")
-CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION_FULL}-1_amd64.deb"
+
+CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_DEB_VERSION}_amd64.deb"
CHROMEDRIVER_DOWNLOAD_URL="https://storage.googleapis.com/chrome-for-testing-public/${CHROME_VERSION_FULL}/linux64/chromedriver-linux64.zip"
-wget -q --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}"
+wget --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}"
apt-get update
apt install -qqy /tmp/google-chrome.deb
rm -f /tmp/google-chrome.deb
diff --git a/compose.yaml b/compose.yaml
index be32a95dd9..f527a835a3 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -951,8 +951,8 @@ services:
clang_tools: ${CLANG_TOOLS}
llvm: ${LLVM}
pyodide_version: "0.26.0"
- chrome_version: "134"
- selenium_version: "4.15.2"
+ chrome_version: "148"
+ selenium_version: "4.41.0"
required_python_min: "(3,12)"
python: ${PYTHON}
shm_size: *shm-size
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 2f5bbf55a4..9787fabafa 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2059,9 +2059,7 @@ function(build_protobuf)
# Make protobuf_fc depend on the install completion marker
add_custom_target(protobuf_fc DEPENDS
"${PROTOBUF_PREFIX}/.protobuf_installed")
- set(ARROW_BUNDLED_STATIC_LIBS
- ${ARROW_BUNDLED_STATIC_LIBS} protobuf::libprotobuf
- PARENT_SCOPE)
+ list(APPEND ARROW_BUNDLED_STATIC_LIBS protobuf::libprotobuf)
if(CMAKE_CROSSCOMPILING)
# If we are cross compiling, we need to build protoc for the host
@@ -2069,12 +2067,21 @@ function(build_protobuf)
set(PROTOBUF_HOST_PREFIX
"${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install")
set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc")
+ # cross-compiled (PyArrow on emscripten) needs utf8_range bundled
explicitly.
+ list(APPEND ARROW_BUNDLED_STATIC_LIBS utf8_range)
+
set(PROTOBUF_HOST_CMAKE_ARGS
"-DCMAKE_CXX_FLAGS="
"-DCMAKE_C_FLAGS="
"-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}"
-Dprotobuf_BUILD_TESTS=OFF
-Dprotobuf_DEBUG_POSTFIX=)
+ if(ABSL_VENDORED)
+ # Force protobuf to reuse Arrow's already-extracted absl source
+ # so we don't re-download and we don't have issues with multiple abseil.
+ list(APPEND PROTOBUF_HOST_CMAKE_ARGS
-Dprotobuf_FORCE_FETCH_DEPENDENCIES=ON
+ "-DFETCHCONTENT_SOURCE_DIR_ABSL=${absl_SOURCE_DIR}")
+ endif()
# We reuse the FetchContent downloaded source but build it with host
compiler
externalproject_add(protobuf_ep_host
@@ -2089,7 +2096,102 @@ function(build_protobuf)
PROPERTIES IMPORTED_LOCATION
"${PROTOBUF_HOST_COMPILER}")
add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host)
+ # For cross-compilation along with ExternalProject we need to
+ # manually include absl deps to the bundled static libs so that
+ # they are available for the generated code in protobuf v31.
+ list(APPEND
+ ARROW_BUNDLED_STATIC_LIBS
+ absl::bad_any_cast_impl
+ absl::bad_optional_access
+ absl::bad_variant_access
+ absl::base
+ absl::city
+ absl::civil_time
+ absl::cord
+ absl::cord_internal
+ absl::cordz_functions
+ absl::cordz_handle
+ absl::cordz_info
+ absl::cordz_sample_token
+ absl::crc32c
+ absl::crc_cord_state
+ absl::crc_cpu_detect
+ absl::crc_internal
+ absl::debugging_internal
+ absl::decode_rust_punycode
+ absl::demangle_internal
+ absl::demangle_rust
+ absl::die_if_null
+ absl::examine_stack
+ absl::exponential_biased
+ absl::failure_signal_handler
+ absl::flags_commandlineflag
+ absl::flags_commandlineflag_internal
+ absl::flags_config
+ absl::flags_internal
+ absl::flags_marshalling
+ absl::flags_parse
+ absl::flags_private_handle_accessor
+ absl::flags_program_name
+ absl::flags_reflection
+ absl::flags_usage
+ absl::flags_usage_internal
+ absl::graphcycles_internal
+ absl::hash
+ absl::hashtablez_sampler
+ absl::int128
+ absl::kernel_timeout_internal
+ absl::leak_check
+ absl::log_globals
+ absl::log_initialize
+ absl::log_internal_check_op
+ absl::log_internal_conditions
+ absl::log_internal_fnmatch
+ absl::log_internal_format
+ absl::log_internal_globals
+ absl::log_internal_log_sink_set
+ absl::log_internal_message
+ absl::log_internal_nullguard
+ absl::log_internal_proto
+ absl::log_severity
+ absl::log_sink
+ absl::low_level_hash
+ absl::malloc_internal
+ absl::periodic_sampler
+ absl::poison
+ absl::random_distributions
+ absl::random_internal_distribution_test_util
+ absl::random_internal_platform
+ absl::random_internal_pool_urbg
+ absl::random_internal_randen
+ absl::random_internal_randen_hwaes
+ absl::random_internal_randen_hwaes_impl
+ absl::random_internal_randen_slow
+ absl::random_internal_seed_material
+ absl::random_seed_gen_exception
+ absl::random_seed_sequences
+ absl::raw_hash_set
+ absl::raw_logging_internal
+ absl::scoped_set_env
+ absl::spinlock_wait
+ absl::stacktrace
+ absl::status
+ absl::statusor
+ absl::str_format_internal
+ absl::strerror
+ absl::strings
+ absl::strings_internal
+ absl::symbolize
+ absl::synchronization
+ absl::throw_delegate
+ absl::time
+ absl::time_zone
+ absl::utf8_for_code_point
+ absl::vlog_config_internal)
endif()
+ set(ARROW_BUNDLED_STATIC_LIBS
+ "${ARROW_BUNDLED_STATIC_LIBS}"
+ PARENT_SCOPE)
list(POP_BACK CMAKE_MESSAGE_INDENT)
endfunction()
diff --git a/python/pyarrow/tests/test_dataset.py
b/python/pyarrow/tests/test_dataset.py
index d00c0c4b3e..3afe3281cb 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -5907,6 +5907,7 @@ def test_make_write_options_error():
pformat.make_write_options(43)
[email protected]
def test_scanner_from_substrait(dataset):
try:
import pyarrow.substrait as ps
diff --git a/python/scripts/run_emscripten_tests.py
b/python/scripts/run_emscripten_tests.py
index a4f9ce9d9a..3cd7d5fd67 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -35,7 +35,9 @@ from selenium import webdriver
class TemplateOverrider(http.server.SimpleHTTPRequestHandler):
def log_request(self, code="-", size="-"):
- # don't log successful requests
+ # don't log successful requests but log errors
+ if isinstance(code, int) and code >= 400:
+ sys.stderr.write(f"HTTP {code} for {self.path}\n")
return
def do_GET(self) -> bytes | None:
@@ -200,7 +202,8 @@ class BrowserDriver:
def __init__(self, hostname, port, driver):
self.driver = driver
self.driver.get(f"http://{hostname}:{port}/test.html")
- self.driver.set_script_timeout(100)
+ # Chrome on CI takes longer than locally to compile.
+ self.driver.set_script_timeout(1200)
def load_pyodide(self, dist_dir):
pass
@@ -259,7 +262,9 @@ class ChromeDriver(BrowserDriver):
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
- super().__init__(hostname, port, webdriver.Chrome(options=options))
+ driver = webdriver.Chrome(options=options)
+ driver.command_executor._client_config.timeout = 1200
+ super().__init__(hostname, port, driver)
class FirefoxDriver(BrowserDriver):
@@ -336,7 +341,9 @@ with launch_server(dist_dir) as (hostname, port):
"""
import pyarrow,pathlib
pyarrow_dir = pathlib.Path(pyarrow.__file__).parent
-pytest.main([pyarrow_dir, '-r', 's'])
+# Substrait expression serialization crashes pyodide with a
+# "Cannot convert a BigInt value to a number" error.
+pytest.main([pyarrow_dir, '-r', 's', '-m', 'not substrait'])
""",
wait_for_terminate=False,
)