This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 51970e066e GH-39006: [Python] Extract libparquet requirements out of
libarrow_python.so to new libarrow_python_parquet_encryption.so (#39316)
51970e066e is described below
commit 51970e066e69ab01f9bdcc81219781ae07b9799b
Author: Raúl Cumplido <[email protected]>
AuthorDate: Fri Dec 22 02:06:50 2023 +0100
GH-39006: [Python] Extract libparquet requirements out of
libarrow_python.so to new libarrow_python_parquet_encryption.so (#39316)
### Rationale for this change
If I build pyarrow with everything and then I remove some of the Arrow CPP
.so in order to have a minimal build I can't import pyarrow because it requires
libarrow and libparquet. This is relevant in order to have a minimal build for
Conda. Please see the related issue for more information.
### What changes are included in this PR?
Move libarrow parquet encryption for pyarrow to its own shared object.
### Are these changes tested?
I will run extensive CI with extra python archery tests.
### Are there any user-facing changes?
No, and yes :) There will be a new .so on pyarrow but shouldn't be relevant
in my opinion.
* Closes: #39006
Lead-authored-by: Raúl Cumplido <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ci/scripts/python_test.sh | 2 ++
ci/scripts/python_wheel_unix_test.sh | 1 +
ci/scripts/python_wheel_windows_test.bat | 1 +
python/CMakeLists.txt | 38 ++++++++++++----------
.../pyarrow/src/arrow/python/parquet_encryption.h | 33 ++++++++++++++++---
5 files changed, 53 insertions(+), 22 deletions(-)
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 8d818346fa..341c2dd057 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -45,6 +45,7 @@ export ARROW_DEBUG_MEMORY_POOL=trap
: ${PYARROW_TEST_HDFS:=${ARROW_HDFS:-ON}}
: ${PYARROW_TEST_ORC:=${ARROW_ORC:-ON}}
: ${PYARROW_TEST_PARQUET:=${ARROW_PARQUET:-ON}}
+: ${PYARROW_TEST_PARQUET_ENCRYPTION:=${PARQUET_REQUIRE_ENCRYPTION:-ON}}
: ${PYARROW_TEST_S3:=${ARROW_S3:-ON}}
export PYARROW_TEST_ACERO
@@ -56,6 +57,7 @@ export PYARROW_TEST_GCS
export PYARROW_TEST_HDFS
export PYARROW_TEST_ORC
export PYARROW_TEST_PARQUET
+export PYARROW_TEST_PARQUET_ENCRYPTION
export PYARROW_TEST_S3
# Testing PyArrow
diff --git a/ci/scripts/python_wheel_unix_test.sh
b/ci/scripts/python_wheel_unix_test.sh
index a6cc3bb7b2..01250ff7ef 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -46,6 +46,7 @@ export PYARROW_TEST_HDFS=ON
export PYARROW_TEST_ORC=ON
export PYARROW_TEST_PANDAS=ON
export PYARROW_TEST_PARQUET=ON
+export PYARROW_TEST_PARQUET_ENCRYPTION=ON
export PYARROW_TEST_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_TEST_S3=${ARROW_S3}
export PYARROW_TEST_TENSORFLOW=ON
diff --git a/ci/scripts/python_wheel_windows_test.bat
b/ci/scripts/python_wheel_windows_test.bat
index c73b0cfd1b..b14bfddfb3 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -26,6 +26,7 @@ set PYARROW_TEST_GCS=ON
set PYARROW_TEST_HDFS=ON
set PYARROW_TEST_ORC=OFF
set PYARROW_TEST_PARQUET=ON
+set PYARROW_TEST_PARQUET_ENCRYPTION=ON
set PYARROW_TEST_SUBSTRAIT=ON
set PYARROW_TEST_S3=OFF
set PYARROW_TEST_TENSORFLOW=ON
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3f810d2727..2df1e67b9f 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -332,22 +332,6 @@ if(PYARROW_BUILD_PARQUET OR
PYARROW_BUILD_PARQUET_ENCRYPTION)
find_package(Parquet REQUIRED)
endif()
-if(PYARROW_BUILD_PARQUET_ENCRYPTION)
- if(PARQUET_REQUIRE_ENCRYPTION)
- list(APPEND PYARROW_CPP_SRCS
${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
- if(ARROW_BUILD_SHARED)
- list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_shared)
- else()
- list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_static)
- endif()
- message(STATUS "Parquet Encryption Enabled")
- else()
- message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
- endif()
-else()
- message(STATUS "Parquet Encryption is NOT Enabled")
-endif()
-
if(PYARROW_BUILD_HDFS)
if(NOT ARROW_HDFS)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
@@ -391,6 +375,26 @@ install(TARGETS arrow_python
LIBRARY DESTINATION .
RUNTIME DESTINATION .)
+set(PYARROW_CPP_ENCRYPTION_SRCS
${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
+if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION)
+ message(STATUS "Parquet Encryption is NOT Enabled")
+else()
+ if(PARQUET_REQUIRE_ENCRYPTION)
+ add_library(arrow_python_parquet_encryption SHARED
${PYARROW_CPP_ENCRYPTION_SRCS})
+ target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python
+
${PARQUET_LINK_LIBS})
+ target_compile_definitions(arrow_python_parquet_encryption
+ PRIVATE
ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+ install(TARGETS arrow_python_parquet_encryption
+ ARCHIVE DESTINATION .
+ LIBRARY DESTINATION .
+ RUNTIME DESTINATION .)
+ message(STATUS "Parquet Encryption Enabled")
+ else()
+ message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
+ endif()
+endif()
+
set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc)
if(PYARROW_BUILD_FLIGHT)
if(NOT ARROW_FLIGHT)
@@ -814,6 +818,6 @@ endif()
if(PYARROW_BUILD_PARQUET)
target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
- target_link_libraries(_parquet_encryption PRIVATE ${PARQUET_LINK_LIBS})
+ target_link_libraries(_parquet_encryption PRIVATE
arrow_python_parquet_encryption)
endif()
endif()
diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h
b/python/pyarrow/src/arrow/python/parquet_encryption.h
index 23ee478348..a1aaa30e26 100644
--- a/python/pyarrow/src/arrow/python/parquet_encryption.h
+++ b/python/pyarrow/src/arrow/python/parquet_encryption.h
@@ -26,6 +26,27 @@
#include "parquet/encryption/kms_client.h"
#include "parquet/encryption/kms_client_factory.h"
+#if defined(_WIN32) || defined(__CYGWIN__) // Windows
+#if defined(_MSC_VER)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_PYTHON_STATIC
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
+#else
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
+#endif
+
+#else // Not Windows
+#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
__attribute__((visibility("default")))
+#endif
+#endif // Non-Windows
+
namespace arrow {
namespace py {
namespace parquet {
@@ -33,7 +54,7 @@ namespace encryption {
/// \brief A table of function pointers for calling from C++ into
/// Python.
-class ARROW_PYTHON_EXPORT PyKmsClientVtable {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable {
public:
std::function<void(PyObject*, const std::string& key_bytes,
const std::string& master_key_identifier, std::string*
out)>
@@ -44,7 +65,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientVtable {
};
/// \brief A helper for KmsClient implementation in Python.
-class ARROW_PYTHON_EXPORT PyKmsClient : public
::parquet::encryption::KmsClient {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClient
+ : public ::parquet::encryption::KmsClient {
public:
PyKmsClient(PyObject* handler, PyKmsClientVtable vtable);
~PyKmsClient() override;
@@ -62,7 +84,7 @@ class ARROW_PYTHON_EXPORT PyKmsClient : public
::parquet::encryption::KmsClient
/// \brief A table of function pointers for calling from C++ into
/// Python.
-class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactoryVtable {
public:
std::function<void(
PyObject*, const ::parquet::encryption::KmsConnectionConfig&
kms_connection_config,
@@ -71,7 +93,7 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
};
/// \brief A helper for KmsClientFactory implementation in Python.
-class ARROW_PYTHON_EXPORT PyKmsClientFactory
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactory
: public ::parquet::encryption::KmsClientFactory {
public:
PyKmsClientFactory(PyObject* handler, PyKmsClientFactoryVtable vtable);
@@ -86,7 +108,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactory
};
/// \brief A CryptoFactory that returns Results instead of throwing exceptions.
-class ARROW_PYTHON_EXPORT PyCryptoFactory : public
::parquet::encryption::CryptoFactory {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyCryptoFactory
+ : public ::parquet::encryption::CryptoFactory {
public:
arrow::Result<std::shared_ptr<::parquet::FileEncryptionProperties>>
SafeGetFileEncryptionProperties(