This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a3c2ff18f8 GH-49522: [CI] Update chrome_version for emscripten job to 
latest stable (v148) (#49523)
a3c2ff18f8 is described below

commit a3c2ff18f8e1615a96ee047b1c9faeffe8dacf0d
Author: Raúl Cumplido <[email protected]>
AuthorDate: Mon May 11 13:39:39 2026 +0200

    GH-49522: [CI] Update chrome_version for emscripten job to latest stable 
(v148) (#49523)
    
    ### Rationale for this change
    
    The `134` version fails to be installed from stable releases on chrome due 
to a new stable version released.
    
    ### What changes are included in this PR?
    
    - Update version of latest stable chrome driver to `148` and how we look 
for version on instrall script.
    - Update selenium to newer version to work with newer chrome.
    - Fixes to cross-compilation build after upgrading Abseil and Protobuf and 
moving to FetchContent.
    - Updated timeout so Chrome job doesn't time out.
    - Skip substrait tests which currently fail on Pyodide.
    
    ### Are these changes tested?
    
    Yes, via archery job.
    
    ### Are there any user-facing changes?
    
    No
    
    * GitHub Issue: #49522
    
    Authored-by: Raúl Cumplido <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 ci/docker/conda-python-emscripten.dockerfile |   2 +-
 ci/scripts/install_chromedriver.sh           |  21 ++++--
 compose.yaml                                 |   4 +-
 cpp/cmake_modules/ThirdpartyToolchain.cmake  | 108 ++++++++++++++++++++++++++-
 python/pyarrow/tests/test_dataset.py         |   1 +
 python/scripts/run_emscripten_tests.py       |  15 +++-
 6 files changed, 134 insertions(+), 17 deletions(-)

diff --git a/ci/docker/conda-python-emscripten.dockerfile 
b/ci/docker/conda-python-emscripten.dockerfile
index 878f918710..c56bf4f0c5 100644
--- a/ci/docker/conda-python-emscripten.dockerfile
+++ b/ci/docker/conda-python-emscripten.dockerfile
@@ -20,7 +20,7 @@ ARG arch
 ARG python="3.12"
 FROM ${repo}:${arch}-conda-python-${python}
 
-ARG selenium_version="4.15.2"
+ARG selenium_version="4.41.0"
 ARG pyodide_version="0.26.0"
 ARG chrome_version="latest"
 ARG required_python_min="(3,12)"
diff --git a/ci/scripts/install_chromedriver.sh 
b/ci/scripts/install_chromedriver.sh
index 9167ae70e8..defc1a9e50 100755
--- a/ci/scripts/install_chromedriver.sh
+++ b/ci/scripts/install_chromedriver.sh
@@ -23,15 +23,22 @@ set -e
 
 chrome_version=$1
 
-if [ "$chrome_version" = "latest" ]; then
-  latest_release_path=LATEST_RELEASE_STABLE
-else
-  latest_release_path=LATEST_RELEASE_${chrome_version}
+# Look up the Chrome version from the apt repo's Packages file.
+CHROME_DEB_VERSION=$(wget --no-verbose -O - \
+  
"https://dl.google.com/linux/chrome/deb/dists/stable/main/binary-amd64/Packages.gz";
 \
+  | gunzip \
+  | awk '/^Package: google-chrome-stable$/{found=1} found && /^Version: 
/{print $2; exit}')
+CHROME_VERSION_FULL=${CHROME_DEB_VERSION%-*}
+
+# Validate there hasn't been major version bumps since the last time we 
updated this script.
+if [ "$chrome_version" != "latest" ] && [ "${CHROME_VERSION_FULL%%.*}" != 
"$chrome_version" ]; then
+  echo "Requested Chrome major ${chrome_version}, but apt repo currently 
publishes ${CHROME_VERSION_FULL}" >&2
+  exit 1
 fi
-CHROME_VERSION_FULL=$(wget -q --no-verbose -O - 
"https://googlechromelabs.github.io/chrome-for-testing/${latest_release_path}";)
-CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION_FULL}-1_amd64.deb";
+
+CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_DEB_VERSION}_amd64.deb";
 
CHROMEDRIVER_DOWNLOAD_URL="https://storage.googleapis.com/chrome-for-testing-public/${CHROME_VERSION_FULL}/linux64/chromedriver-linux64.zip";
-wget -q --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}"
+wget --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}"
 apt-get update
 apt install -qqy /tmp/google-chrome.deb
 rm -f /tmp/google-chrome.deb
diff --git a/compose.yaml b/compose.yaml
index be32a95dd9..f527a835a3 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -951,8 +951,8 @@ services:
         clang_tools: ${CLANG_TOOLS}
         llvm: ${LLVM}
         pyodide_version: "0.26.0"
-        chrome_version: "134"
-        selenium_version: "4.15.2"
+        chrome_version: "148"
+        selenium_version: "4.41.0"
         required_python_min: "(3,12)"
         python: ${PYTHON}
     shm_size: *shm-size
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 2f5bbf55a4..9787fabafa 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2059,9 +2059,7 @@ function(build_protobuf)
 
   # Make protobuf_fc depend on the install completion marker
   add_custom_target(protobuf_fc DEPENDS 
"${PROTOBUF_PREFIX}/.protobuf_installed")
-  set(ARROW_BUNDLED_STATIC_LIBS
-      ${ARROW_BUNDLED_STATIC_LIBS} protobuf::libprotobuf
-      PARENT_SCOPE)
+  list(APPEND ARROW_BUNDLED_STATIC_LIBS protobuf::libprotobuf)
 
   if(CMAKE_CROSSCOMPILING)
     # If we are cross compiling, we need to build protoc for the host
@@ -2069,12 +2067,21 @@ function(build_protobuf)
     set(PROTOBUF_HOST_PREFIX 
"${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install")
     set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc")
 
+    # cross-compiled (PyArrow on emscripten) needs utf8_range bundled 
explicitly.
+    list(APPEND ARROW_BUNDLED_STATIC_LIBS utf8_range)
+
     set(PROTOBUF_HOST_CMAKE_ARGS
         "-DCMAKE_CXX_FLAGS="
         "-DCMAKE_C_FLAGS="
         "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}"
         -Dprotobuf_BUILD_TESTS=OFF
         -Dprotobuf_DEBUG_POSTFIX=)
+    if(ABSL_VENDORED)
+      # Force protobuf to reuse Arrow's already-extracted absl source
+      # so we don't re-download and we don't have issues with multiple abseil.
+      list(APPEND PROTOBUF_HOST_CMAKE_ARGS 
-Dprotobuf_FORCE_FETCH_DEPENDENCIES=ON
+           "-DFETCHCONTENT_SOURCE_DIR_ABSL=${absl_SOURCE_DIR}")
+    endif()
 
     # We reuse the FetchContent downloaded source but build it with host 
compiler
     externalproject_add(protobuf_ep_host
@@ -2089,7 +2096,102 @@ function(build_protobuf)
                           PROPERTIES IMPORTED_LOCATION 
"${PROTOBUF_HOST_COMPILER}")
 
     add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host)
+    # For cross-compilation along with ExternalProject we need to
+    # manually include absl deps to the bundled static libs so that
+    # they are available for the generated code in protobuf v31.
+    list(APPEND
+         ARROW_BUNDLED_STATIC_LIBS
+         absl::bad_any_cast_impl
+         absl::bad_optional_access
+         absl::bad_variant_access
+         absl::base
+         absl::city
+         absl::civil_time
+         absl::cord
+         absl::cord_internal
+         absl::cordz_functions
+         absl::cordz_handle
+         absl::cordz_info
+         absl::cordz_sample_token
+         absl::crc32c
+         absl::crc_cord_state
+         absl::crc_cpu_detect
+         absl::crc_internal
+         absl::debugging_internal
+         absl::decode_rust_punycode
+         absl::demangle_internal
+         absl::demangle_rust
+         absl::die_if_null
+         absl::examine_stack
+         absl::exponential_biased
+         absl::failure_signal_handler
+         absl::flags_commandlineflag
+         absl::flags_commandlineflag_internal
+         absl::flags_config
+         absl::flags_internal
+         absl::flags_marshalling
+         absl::flags_parse
+         absl::flags_private_handle_accessor
+         absl::flags_program_name
+         absl::flags_reflection
+         absl::flags_usage
+         absl::flags_usage_internal
+         absl::graphcycles_internal
+         absl::hash
+         absl::hashtablez_sampler
+         absl::int128
+         absl::kernel_timeout_internal
+         absl::leak_check
+         absl::log_globals
+         absl::log_initialize
+         absl::log_internal_check_op
+         absl::log_internal_conditions
+         absl::log_internal_fnmatch
+         absl::log_internal_format
+         absl::log_internal_globals
+         absl::log_internal_log_sink_set
+         absl::log_internal_message
+         absl::log_internal_nullguard
+         absl::log_internal_proto
+         absl::log_severity
+         absl::log_sink
+         absl::low_level_hash
+         absl::malloc_internal
+         absl::periodic_sampler
+         absl::poison
+         absl::random_distributions
+         absl::random_internal_distribution_test_util
+         absl::random_internal_platform
+         absl::random_internal_pool_urbg
+         absl::random_internal_randen
+         absl::random_internal_randen_hwaes
+         absl::random_internal_randen_hwaes_impl
+         absl::random_internal_randen_slow
+         absl::random_internal_seed_material
+         absl::random_seed_gen_exception
+         absl::random_seed_sequences
+         absl::raw_hash_set
+         absl::raw_logging_internal
+         absl::scoped_set_env
+         absl::spinlock_wait
+         absl::stacktrace
+         absl::status
+         absl::statusor
+         absl::str_format_internal
+         absl::strerror
+         absl::strings
+         absl::strings_internal
+         absl::symbolize
+         absl::synchronization
+         absl::throw_delegate
+         absl::time
+         absl::time_zone
+         absl::utf8_for_code_point
+         absl::vlog_config_internal)
   endif()
+  set(ARROW_BUNDLED_STATIC_LIBS
+      "${ARROW_BUNDLED_STATIC_LIBS}"
+      PARENT_SCOPE)
   list(POP_BACK CMAKE_MESSAGE_INDENT)
 endfunction()
 
diff --git a/python/pyarrow/tests/test_dataset.py 
b/python/pyarrow/tests/test_dataset.py
index d00c0c4b3e..3afe3281cb 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -5907,6 +5907,7 @@ def test_make_write_options_error():
         pformat.make_write_options(43)
 
 
[email protected]
 def test_scanner_from_substrait(dataset):
     try:
         import pyarrow.substrait as ps
diff --git a/python/scripts/run_emscripten_tests.py 
b/python/scripts/run_emscripten_tests.py
index a4f9ce9d9a..3cd7d5fd67 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -35,7 +35,9 @@ from selenium import webdriver
 
 class TemplateOverrider(http.server.SimpleHTTPRequestHandler):
     def log_request(self, code="-", size="-"):
-        # don't log successful requests
+        # don't log successful requests but log errors
+        if isinstance(code, int) and code >= 400:
+            sys.stderr.write(f"HTTP {code} for {self.path}\n")
         return
 
     def do_GET(self) -> bytes | None:
@@ -200,7 +202,8 @@ class BrowserDriver:
     def __init__(self, hostname, port, driver):
         self.driver = driver
         self.driver.get(f"http://{hostname}:{port}/test.html";)
-        self.driver.set_script_timeout(100)
+        # Chrome on CI takes longer than locally to compile.
+        self.driver.set_script_timeout(1200)
 
     def load_pyodide(self, dist_dir):
         pass
@@ -259,7 +262,9 @@ class ChromeDriver(BrowserDriver):
         options = Options()
         options.add_argument("--headless")
         options.add_argument("--no-sandbox")
-        super().__init__(hostname, port, webdriver.Chrome(options=options))
+        driver = webdriver.Chrome(options=options)
+        driver.command_executor._client_config.timeout = 1200
+        super().__init__(hostname, port, driver)
 
 
 class FirefoxDriver(BrowserDriver):
@@ -336,7 +341,9 @@ with launch_server(dist_dir) as (hostname, port):
         """
 import pyarrow,pathlib
 pyarrow_dir = pathlib.Path(pyarrow.__file__).parent
-pytest.main([pyarrow_dir, '-r', 's'])
+# Substrait expression serialization crashes pyodide with a
+# "Cannot convert a BigInt value to a number" error.
+pytest.main([pyarrow_dir, '-r', 's', '-m', 'not substrait'])
 """,
         wait_for_terminate=False,
     )

Reply via email to