This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 51970e066e GH-39006: [Python] Extract libparquet requirements out of 
libarrow_python.so to new libarrow_python_parquet_encryption.so (#39316)
51970e066e is described below

commit 51970e066e69ab01f9bdcc81219781ae07b9799b
Author: Raúl Cumplido <[email protected]>
AuthorDate: Fri Dec 22 02:06:50 2023 +0100

    GH-39006: [Python] Extract libparquet requirements out of 
libarrow_python.so to new libarrow_python_parquet_encryption.so (#39316)
    
    ### Rationale for this change
    
    If I build pyarrow with everything and then I remove some of the Arrow CPP 
.so in order to have a minimal build I can't import pyarrow because it requires 
libarrow and libparquet. This is relevant in order to have a minimal build for 
Conda. Please see the related issue for more information.
    
    ### What changes are included in this PR?
    
    Move libarrow parquet encryption for pyarrow to its own shared object.
    
    ### Are these changes tested?
    
    I will run extensive CI with extra python archery tests.
    
    ### Are there any user-facing changes?
    
    No, and yes :) There will be a new .so on pyarrow but shouldn't be relevant 
in my opinion.
    * Closes: #39006
    
    Lead-authored-by: Raúl Cumplido <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 ci/scripts/python_test.sh                          |  2 ++
 ci/scripts/python_wheel_unix_test.sh               |  1 +
 ci/scripts/python_wheel_windows_test.bat           |  1 +
 python/CMakeLists.txt                              | 38 ++++++++++++----------
 .../pyarrow/src/arrow/python/parquet_encryption.h  | 33 ++++++++++++++++---
 5 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 8d818346fa..341c2dd057 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -45,6 +45,7 @@ export ARROW_DEBUG_MEMORY_POOL=trap
 : ${PYARROW_TEST_HDFS:=${ARROW_HDFS:-ON}}
 : ${PYARROW_TEST_ORC:=${ARROW_ORC:-ON}}
 : ${PYARROW_TEST_PARQUET:=${ARROW_PARQUET:-ON}}
+: ${PYARROW_TEST_PARQUET_ENCRYPTION:=${PARQUET_REQUIRE_ENCRYPTION:-ON}}
 : ${PYARROW_TEST_S3:=${ARROW_S3:-ON}}
 
 export PYARROW_TEST_ACERO
@@ -56,6 +57,7 @@ export PYARROW_TEST_GCS
 export PYARROW_TEST_HDFS
 export PYARROW_TEST_ORC
 export PYARROW_TEST_PARQUET
+export PYARROW_TEST_PARQUET_ENCRYPTION
 export PYARROW_TEST_S3
 
 # Testing PyArrow
diff --git a/ci/scripts/python_wheel_unix_test.sh 
b/ci/scripts/python_wheel_unix_test.sh
index a6cc3bb7b2..01250ff7ef 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -46,6 +46,7 @@ export PYARROW_TEST_HDFS=ON
 export PYARROW_TEST_ORC=ON
 export PYARROW_TEST_PANDAS=ON
 export PYARROW_TEST_PARQUET=ON
+export PYARROW_TEST_PARQUET_ENCRYPTION=ON
 export PYARROW_TEST_SUBSTRAIT=${ARROW_SUBSTRAIT}
 export PYARROW_TEST_S3=${ARROW_S3}
 export PYARROW_TEST_TENSORFLOW=ON
diff --git a/ci/scripts/python_wheel_windows_test.bat 
b/ci/scripts/python_wheel_windows_test.bat
index c73b0cfd1b..b14bfddfb3 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -26,6 +26,7 @@ set PYARROW_TEST_GCS=ON
 set PYARROW_TEST_HDFS=ON
 set PYARROW_TEST_ORC=OFF
 set PYARROW_TEST_PARQUET=ON
+set PYARROW_TEST_PARQUET_ENCRYPTION=ON
 set PYARROW_TEST_SUBSTRAIT=ON
 set PYARROW_TEST_S3=OFF
 set PYARROW_TEST_TENSORFLOW=ON
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3f810d2727..2df1e67b9f 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -332,22 +332,6 @@ if(PYARROW_BUILD_PARQUET OR 
PYARROW_BUILD_PARQUET_ENCRYPTION)
   find_package(Parquet REQUIRED)
 endif()
 
-if(PYARROW_BUILD_PARQUET_ENCRYPTION)
-  if(PARQUET_REQUIRE_ENCRYPTION)
-    list(APPEND PYARROW_CPP_SRCS 
${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
-    if(ARROW_BUILD_SHARED)
-      list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_shared)
-    else()
-      list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_static)
-    endif()
-    message(STATUS "Parquet Encryption Enabled")
-  else()
-    message(FATAL_ERROR "You must build Arrow C++ with 
PARQUET_REQUIRE_ENCRYPTION=ON")
-  endif()
-else()
-  message(STATUS "Parquet Encryption is NOT Enabled")
-endif()
-
 if(PYARROW_BUILD_HDFS)
   if(NOT ARROW_HDFS)
     message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
@@ -391,6 +375,26 @@ install(TARGETS arrow_python
         LIBRARY DESTINATION .
         RUNTIME DESTINATION .)
 
+set(PYARROW_CPP_ENCRYPTION_SRCS 
${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
+if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION)
+  message(STATUS "Parquet Encryption is NOT Enabled")
+else()
+  if(PARQUET_REQUIRE_ENCRYPTION)
+    add_library(arrow_python_parquet_encryption SHARED 
${PYARROW_CPP_ENCRYPTION_SRCS})
+    target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python
+                                                                 
${PARQUET_LINK_LIBS})
+    target_compile_definitions(arrow_python_parquet_encryption
+                               PRIVATE 
ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+    install(TARGETS arrow_python_parquet_encryption
+            ARCHIVE DESTINATION .
+            LIBRARY DESTINATION .
+            RUNTIME DESTINATION .)
+    message(STATUS "Parquet Encryption Enabled")
+  else()
+    message(FATAL_ERROR "You must build Arrow C++ with 
PARQUET_REQUIRE_ENCRYPTION=ON")
+  endif()
+endif()
+
 set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc)
 if(PYARROW_BUILD_FLIGHT)
   if(NOT ARROW_FLIGHT)
@@ -814,6 +818,6 @@ endif()
 if(PYARROW_BUILD_PARQUET)
   target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
   if(PYARROW_BUILD_PARQUET_ENCRYPTION)
-    target_link_libraries(_parquet_encryption PRIVATE ${PARQUET_LINK_LIBS})
+    target_link_libraries(_parquet_encryption PRIVATE 
arrow_python_parquet_encryption)
   endif()
 endif()
diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h 
b/python/pyarrow/src/arrow/python/parquet_encryption.h
index 23ee478348..a1aaa30e26 100644
--- a/python/pyarrow/src/arrow/python/parquet_encryption.h
+++ b/python/pyarrow/src/arrow/python/parquet_encryption.h
@@ -26,6 +26,27 @@
 #include "parquet/encryption/kms_client.h"
 #include "parquet/encryption/kms_client_factory.h"
 
+#if defined(_WIN32) || defined(__CYGWIN__)  // Windows
+#if defined(_MSC_VER)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_PYTHON_STATIC
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
+#else
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
+#endif
+
+#else  // Not Windows
+#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT 
__attribute__((visibility("default")))
+#endif
+#endif  // Non-Windows
+
 namespace arrow {
 namespace py {
 namespace parquet {
@@ -33,7 +54,7 @@ namespace encryption {
 
 /// \brief A table of function pointers for calling from C++ into
 /// Python.
-class ARROW_PYTHON_EXPORT PyKmsClientVtable {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable {
  public:
   std::function<void(PyObject*, const std::string& key_bytes,
                      const std::string& master_key_identifier, std::string* 
out)>
@@ -44,7 +65,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientVtable {
 };
 
 /// \brief A helper for KmsClient implementation in Python.
-class ARROW_PYTHON_EXPORT PyKmsClient : public 
::parquet::encryption::KmsClient {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClient
+    : public ::parquet::encryption::KmsClient {
  public:
   PyKmsClient(PyObject* handler, PyKmsClientVtable vtable);
   ~PyKmsClient() override;
@@ -62,7 +84,7 @@ class ARROW_PYTHON_EXPORT PyKmsClient : public 
::parquet::encryption::KmsClient
 
 /// \brief A table of function pointers for calling from C++ into
 /// Python.
-class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactoryVtable {
  public:
   std::function<void(
       PyObject*, const ::parquet::encryption::KmsConnectionConfig& 
kms_connection_config,
@@ -71,7 +93,7 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
 };
 
 /// \brief A helper for KmsClientFactory implementation in Python.
-class ARROW_PYTHON_EXPORT PyKmsClientFactory
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactory
     : public ::parquet::encryption::KmsClientFactory {
  public:
   PyKmsClientFactory(PyObject* handler, PyKmsClientFactoryVtable vtable);
@@ -86,7 +108,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactory
 };
 
 /// \brief A CryptoFactory that returns Results instead of throwing exceptions.
-class ARROW_PYTHON_EXPORT PyCryptoFactory : public 
::parquet::encryption::CryptoFactory {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyCryptoFactory
+    : public ::parquet::encryption::CryptoFactory {
  public:
   arrow::Result<std::shared_ptr<::parquet::FileEncryptionProperties>>
   SafeGetFileEncryptionProperties(

Reply via email to