This is an automated email from the ASF dual-hosted git repository.
assignuser pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 8be70c1372 GH-35193: [Python][Packaging] Enable GCS on Windows wheels
(#35255)
8be70c1372 is described below
commit 8be70c137289adba92871555ce74055719172f56
Author: Raúl Cumplido <[email protected]>
AuthorDate: Tue May 16 04:52:34 2023 +0200
GH-35193: [Python][Packaging] Enable GCS on Windows wheels (#35255)
### Rationale for this change
GCS could be enabled on Windows wheels.
### What changes are included in this PR?
Enabling GCS on Windows wheel
### Are these changes tested?
Crossbow jobs for wheels run tests for GCS now.
I have tested locally that I can install the built wheel and I can import
`GcsFileSystem`:
```
Python 3.9.12 (main, Apr 4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)] ::
Anaconda, Inc. on win32
Warning:
This Python interpreter is in a conda environment, but the environment has
not been activated. Libraries may fail to load. To activate this
environment
please see https://conda.io/activation
Type "help", "copyright", "credits" or "license" for more information.
>>> from pyarrow.fs import GcsFileSystem
>>> fs =
GcsFileSystem(access_token='abc',target_service_account='service_account@
apache',credential_token_expiration=datetime.now(),default_bucket_location='us-west2',scheme='https',
endpoint_override='localhost:8999')
>>> fs.default_bucket_location
'us-west2'
>>> fs.create_dir('hello')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "pyarrow\_fs.pyx", line 593, in pyarrow._fs.FileSystem.create_dir
File "pyarrow\error.pxi", line 113, in pyarrow.lib.check_status
PermissionError: [Errno 13] google::cloud::Status(UNAUTHENTICATED:
Permanent error GetBucketMetadata: Could not create a OAuth2 access token to
authenticate the request. The request was not sent, as such an access token is
required to complete the request successfully. Learn more about Google Cloud
authentication at https://cloud.google.com/docs/authentication. The underlying
error message was: Request had invalid authentication credentials. Expected
OAuth 2 access token, login cookie [...]
```
### Are there any user-facing changes?
No but Windows wheels should contain `ARROW_GCS`
* Closes: #35193
Authored-by: Raúl Cumplido <[email protected]>
Signed-off-by: Jacob Wujciak-Jens <[email protected]>
---
ci/appveyor-cpp-build.bat | 8 ++++++++
ci/scripts/install_gcs_testbench.bat | 25 +++++++++++++++++++++++++
ci/scripts/python_wheel_windows_build.bat | 3 +++
ci/scripts/python_wheel_windows_test.bat | 5 +++++
4 files changed, 41 insertions(+)
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index 0da8a7dacc..caadaab90b 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -42,6 +42,9 @@ set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA
-DARROW_WITH_BZ2=ON
@rem Enable warnings-as-errors
set ARROW_CXXFLAGS=/WX /MP
+@rem Install GCS testbench
+call %CD%\ci\scripts\install_gcs_testbench.bat
+
@rem
@rem Build and test Arrow C++ libraries (including Parquet)
@rem
@@ -70,6 +73,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
-DARROW_FLIGHT=%ARROW_BUILD_FLIGHT% ^
-DARROW_FLIGHT_SQL=%ARROW_BUILD_FLIGHT_SQL% ^
-DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^
+ -DARROW_GCS=%ARROW_GCS% ^
-DARROW_HDFS=ON ^
-DARROW_JSON=ON ^
-DARROW_MIMALLOC=ON ^
@@ -97,6 +101,9 @@ cmake --build . --target install --config Release || exit /B
@rem For ORC C++
set TZDIR=%CONDA_PREFIX%\share\zoneinfo
+@rem For finding Python executable for GCS tests
+set PYTHON=python
+
ctest --output-on-failure || exit /B
popd
@@ -114,6 +121,7 @@ set PYARROW_WITH_ACERO=ON
set PYARROW_WITH_DATASET=ON
set PYARROW_WITH_FLIGHT=%ARROW_BUILD_FLIGHT%
set PYARROW_WITH_GANDIVA=%ARROW_BUILD_GANDIVA%
+set PYARROW_WITH_GCS=%ARROW_GCS%
set PYARROW_WITH_PARQUET=ON
set PYARROW_WITH_PARQUET_ENCRYPTION=ON
set PYARROW_WITH_S3=%ARROW_S3%
diff --git a/ci/scripts/install_gcs_testbench.bat
b/ci/scripts/install_gcs_testbench.bat
new file mode 100644
index 0000000000..b03d0c2ad6
--- /dev/null
+++ b/ci/scripts/install_gcs_testbench.bat
@@ -0,0 +1,25 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set GCS_TESTBENCH_VERSION="v0.36.0"
+
+@REM Install GCS testbench %GCS_TESTBENCH_VERSION%
+python -m pip install ^
+
"https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz"
^
+ || exit /B 1
diff --git a/ci/scripts/python_wheel_windows_build.bat
b/ci/scripts/python_wheel_windows_build.bat
index 80270a84c4..ee879c7050 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -34,6 +34,7 @@ set ARROW_ACERO=ON
set ARROW_DATASET=ON
set ARROW_FLIGHT=ON
set ARROW_GANDIVA=OFF
+set ARROW_GCS=ON
set ARROW_HDFS=ON
set ARROW_ORC=OFF
set ARROW_PARQUET=ON
@@ -73,6 +74,7 @@ cmake ^
-DARROW_FILESYSTEM=ON ^
-DARROW_FLIGHT=%ARROW_FLIGHT% ^
-DARROW_GANDIVA=%ARROW_GANDIVA% ^
+ -DARROW_GCS=%ARROW_GCS% ^
-DARROW_HDFS=%ARROW_HDFS% ^
-DARROW_JSON=ON ^
-DARROW_MIMALLOC=%ARROW_MIMALLOC% ^
@@ -111,6 +113,7 @@ set PYARROW_WITH_ACERO=%ARROW_ACERO%
set PYARROW_WITH_DATASET=%ARROW_DATASET%
set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT%
set PYARROW_WITH_GANDIVA=%ARROW_GANDIVA%
+set PYARROW_WITH_GCS=%ARROW_GCS%
set PYARROW_WITH_HDFS=%ARROW_HDFS%
set PYARROW_WITH_ORC=%ARROW_ORC%
set PYARROW_WITH_PARQUET=%ARROW_PARQUET%
diff --git a/ci/scripts/python_wheel_windows_test.bat
b/ci/scripts/python_wheel_windows_test.bat
index cae1b7ef32..c73b0cfd1b 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -22,6 +22,7 @@ set PYARROW_TEST_CYTHON=ON
set PYARROW_TEST_DATASET=ON
set PYARROW_TEST_FLIGHT=ON
set PYARROW_TEST_GANDIVA=OFF
+set PYARROW_TEST_GCS=ON
set PYARROW_TEST_HDFS=ON
set PYARROW_TEST_ORC=OFF
set PYARROW_TEST_PARQUET=ON
@@ -38,11 +39,15 @@ set
PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data
@REM Install testing dependencies
pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
+@REM Install GCS testbench
+call "C:\arrow\ci\scripts\install_gcs_testbench.bat"
+
@REM Install the built wheels
python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow ||
exit /B 1
@REM Test that the modules are importable
python -c "import pyarrow" || exit /B 1
+python -c "import pyarrow._gcsfs" || exit /B 1
python -c "import pyarrow._hdfs" || exit /B 1
python -c "import pyarrow._s3fs" || exit /B 1
python -c "import pyarrow.csv" || exit /B 1