[ 
https://issues.apache.org/jira/browse/ARROW-1778?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16259286#comment-16259286
 ] 

ASF GitHub Bot commented on ARROW-1778:
---------------------------------------

wesm closed pull request #1331: ARROW-1778: [Python] Link parquet-cpp 
statically, privately in manylinux1 wheels
URL: https://github.com/apache/arrow/pull/1331
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/cmake_modules/FindThrift.cmake 
b/cpp/cmake_modules/FindThrift.cmake
new file mode 100644
index 000000000..25f50825c
--- /dev/null
+++ b/cpp/cmake_modules/FindThrift.cmake
@@ -0,0 +1,102 @@
+# Copyright 2012 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# - Find Thrift (a cross platform RPC lib/tool)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  Thrift_HOME - When set, this path is inspected instead of standard library
+#                locations as the root of the Thrift installation.
+#                The environment variable THRIFT_HOME overrides this variable.
+#
+# This module defines
+#  THRIFT_VERSION, version string of ant if found
+#  THRIFT_INCLUDE_DIR, where to find THRIFT headers
+#  THRIFT_CONTRIB_DIR, where contrib thrift files (e.g. fb303.thrift) are 
installed
+#  THRIFT_STATIC_LIB, THRIFT static library
+#  THRIFT_FOUND, If false, do not try to use ant
+
+# prefer the thrift version supplied in THRIFT_HOME
+if( NOT "${THRIFT_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "${THRIFT_HOME}" _native_path )
+    list( APPEND _thrift_roots ${_native_path} )
+elseif ( Thrift_HOME )
+    list( APPEND _thrift_roots ${Thrift_HOME} )
+endif()
+
+message(STATUS "THRIFT_HOME: ${THRIFT_HOME}")
+find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h HINTS
+  ${_thrift_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "include"
+)
+
+find_path(THRIFT_CONTRIB_DIR share/fb303/if/fb303.thrift HINTS
+  ${_thrift_roots}
+  NO_DEFAULT_PATH
+)
+
+if (MSVC AND NOT THRIFT_MSVC_STATIC_LIB_SUFFIX)
+  set(THRIFT_MSVC_STATIC_LIB_SUFFIX md)
+endif()
+
+find_library(THRIFT_STATIC_LIB NAMES
+  
${CMAKE_STATIC_LIBRARY_PREFIX}thrift${THRIFT_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
+  HINTS ${_thrift_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib"
+)
+
+find_program(THRIFT_COMPILER thrift HINTS
+  ${_thrift_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "bin"
+)
+
+if (THRIFT_STATIC_LIB)
+  set(THRIFT_FOUND TRUE)
+  exec_program(${THRIFT_COMPILER}
+    ARGS -version OUTPUT_VARIABLE THRIFT_VERSION RETURN_VALUE THRIFT_RETURN)
+else ()
+  set(THRIFT_FOUND FALSE)
+endif ()
+
+if (THRIFT_FOUND)
+  if (NOT Thrift_FIND_QUIETLY)
+    message(STATUS "Thrift version: ${THRIFT_VERSION}")
+  endif ()
+else ()
+  if (NOT Thrift_FIND_QUIETLY)
+    set(THRIFT_ERR_MSG "Thrift compiler/libraries NOT found: ${THRIFT_RETURN}")
+    set(THRIFT_ERR_MSG "${THRIFT_ERR_MSG} (${THRIFT_INCLUDE_DIR}, 
${THRIFT_STATIC_LIB}).")
+    if ( _thrift_roots )
+      set(THRIFT_ERR_MSG "${THRIFT_ERR_MSG} Looked in ${_thrift_roots}.")
+    else ()
+      set(THRIFT_ERR_MSG "${THRIFT_ERR_MSG} Looked in system search paths.")
+    endif ()
+    if ( Thrift_FIND_REQUIRED )
+      message(FATAL_ERROR "${THRIFT_ERR_MSG}")
+    else ()
+      message(STATUS "${THRIFT_ERR_MSG}")
+    endif ()
+  endif ()
+endif ()
+
+
+mark_as_advanced(
+  THRIFT_STATIC_LIB
+  THRIFT_COMPILER
+  THRIFT_INCLUDE_DIR
+)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 8c7348298..00a4ee687 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -61,6 +61,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL 
"${CMAKE_CURRENT_SOURCE_DIR}")
   option(PYARROW_BUILD_PARQUET
     "Build the PyArrow Parquet integration"
     OFF)
+  option(PYARROW_PARQUET_USE_SHARED
+    "Rely on parquet shared libraries where relevant"
+    ON)
   option(PYARROW_BUILD_PLASMA
     "Build the PyArrow Plasma integration"
     OFF)
@@ -282,24 +285,41 @@ if (PYARROW_BUILD_PARQUET)
   endif()
   include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
 
-  if (PYARROW_BUNDLE_ARROW_CPP)
-    bundle_arrow_lib(PARQUET_SHARED_LIB
-      ABI_VERSION ${PARQUET_ABI_VERSION}
-      SO_VERSION ${PARQUET_SO_VERSION})
+  if (PYARROW_PARQUET_USE_SHARED)
+    if (PYARROW_BUNDLE_ARROW_CPP)
+      bundle_arrow_lib(PARQUET_SHARED_LIB
+        ABI_VERSION ${PARQUET_ABI_VERSION}
+        SO_VERSION ${PARQUET_SO_VERSION})
+      if (MSVC)
+        bundle_arrow_implib(PARQUET_SHARED_IMP_LIB)
+      endif()
+    endif()
     if (MSVC)
-      bundle_arrow_implib(PARQUET_SHARED_IMP_LIB)
+      ADD_THIRDPARTY_LIB(parquet
+        SHARED_LIB ${PARQUET_SHARED_IMP_LIB})
+    else()
+      ADD_THIRDPARTY_LIB(parquet
+        SHARED_LIB ${PARQUET_SHARED_LIB})
     endif()
-  endif()
-  if (MSVC)
-    ADD_THIRDPARTY_LIB(parquet
-      SHARED_LIB ${PARQUET_SHARED_IMP_LIB})
+    set(LINK_LIBS
+      ${LINK_LIBS}
+      parquet_shared)
   else()
+    find_package(Thrift)
+    set(Boost_USE_STATIC_LIBS ON)
+    find_package(Boost COMPONENTS regex REQUIRED)
+    ADD_THIRDPARTY_LIB(boost_regex
+      STATIC_LIB ${Boost_REGEX_LIBRARY_RELEASE})
     ADD_THIRDPARTY_LIB(parquet
-      SHARED_LIB ${PARQUET_SHARED_LIB})
+      STATIC_LIB ${PARQUET_STATIC_LIB})
+    ADD_THIRDPARTY_LIB(thrift
+      STATIC_LIB ${THRIFT_STATIC_LIB})
+    set(LINK_LIBS
+      ${LINK_LIBS}
+      parquet_static
+      thrift_static
+      boost_regex_static)
   endif()
-  set(LINK_LIBS
-    ${LINK_LIBS}
-    parquet_shared)
   set(CYTHON_EXTENSIONS
     ${CYTHON_EXTENSIONS}
     _parquet)
diff --git a/python/manylinux1/Dockerfile-x86_64 
b/python/manylinux1/Dockerfile-x86_64
index 69d8d3dbf..919a32be7 100644
--- a/python/manylinux1/Dockerfile-x86_64
+++ b/python/manylinux1/Dockerfile-x86_64
@@ -29,5 +29,5 @@ RUN /check_arrow_visibility.sh
 WORKDIR /
 RUN git clone https://github.com/apache/parquet-cpp.git
 WORKDIR /parquet-cpp
-RUN ARROW_HOME=/arrow-dist cmake -DCMAKE_BUILD_TYPE=Release 
-DCMAKE_INSTALL_PREFIX=/arrow-dist -DPARQUET_BUILD_TESTS=OFF 
-DPARQUET_BOOST_USE_SHARED=OFF -GNinja .
+RUN ARROW_HOME=/arrow-dist cmake -DCMAKE_BUILD_TYPE=Release 
-DCMAKE_INSTALL_PREFIX=/arrow-dist -DPARQUET_BUILD_TESTS=OFF 
-DPARQUET_BUILD_SHARED=OFF -DPARQUET_BUILD_STATIC=ON 
-DPARQUET_BOOST_USE_SHARED=OFF -GNinja .
 RUN ninja install
diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh
index 074bd0056..ced4556e9 100755
--- a/python/manylinux1/build_arrow.sh
+++ b/python/manylinux1/build_arrow.sh
@@ -40,11 +40,11 @@ cd /arrow/python
 # PyArrow build configuration
 export PYARROW_BUILD_TYPE='release'
 export PYARROW_WITH_PARQUET=1
+export PYARROW_WITH_STATIC_PARQUET=1
 export PYARROW_WITH_PLASMA=1
 export PYARROW_BUNDLE_ARROW_CPP=1
-# Need as otherwise arrow_io is sometimes not linked
-export LDFLAGS="-Wl,--no-as-needed"
 export PKG_CONFIG_PATH=/arrow-dist/lib64/pkgconfig
+export PYARROW_CMAKE_OPTIONS='-DTHRIFT_HOME=/usr'
 # Ensure the target directory exists
 mkdir -p /io/dist
 
@@ -65,7 +65,7 @@ for PYTHON in ${PYTHON_VERSIONS}; do
     # Clear output directory
     rm -rf dist/
     echo "=== (${PYTHON}) Building wheel ==="
-    PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py 
build_ext --inplace --with-parquet --bundle-arrow-cpp
+    PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py 
build_ext --inplace --with-parquet --with-static-parquet --bundle-arrow-cpp
     PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py 
bdist_wheel
 
     echo "=== (${PYTHON}) Test the existence of optional modules ==="
diff --git a/python/setup.py b/python/setup.py
index ccab8fb65..2270cf79c 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -82,6 +82,7 @@ def run(self):
     user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'),
                      ('build-type=', None, 'build type (debug or release)'),
                      ('with-parquet', None, 'build the Parquet extension'),
+                     ('with-static-parquet', None, 'link parquet statically'),
                      ('with-plasma', None, 'build the Plasma extension'),
                      ('bundle-arrow-cpp', None,
                       'bundle the Arrow C++ libraries')] +
@@ -102,6 +103,8 @@ def initialize_options(self):
 
         self.with_parquet = strtobool(
             os.environ.get('PYARROW_WITH_PARQUET', '0'))
+        self.with_static_parquet = strtobool(
+            os.environ.get('PYARROW_WITH_STATIC_PARQUET', '0'))
         self.with_plasma = strtobool(
             os.environ.get('PYARROW_WITH_PLASMA', '0'))
         self.bundle_arrow_cpp = strtobool(
@@ -144,6 +147,8 @@ def _run_cmake(self):
 
         if self.with_parquet:
             cmake_options.append('-DPYARROW_BUILD_PARQUET=on')
+        if self.with_static_parquet:
+            cmake_options.append('-DPYARROW_PARQUET_USE_SHARED=off')
 
         if self.with_plasma:
             cmake_options.append('-DPYARROW_BUILD_PLASMA=on')
@@ -225,7 +230,7 @@ def _run_cmake(self):
             move_shared_libs(build_prefix, build_lib, "arrow_python")
             if self.with_plasma:
                 move_shared_libs(build_prefix, build_lib, "plasma")
-            if self.with_parquet:
+            if self.with_parquet and not self.with_static_parquet:
                 move_shared_libs(build_prefix, build_lib, "parquet")
 
         print('Bundling includes: ' + pjoin(build_prefix, 'include'))


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Python] Link parquet-cpp statically, privately in manylinux1 wheels
> --------------------------------------------------------------------
>
>                 Key: ARROW-1778
>                 URL: https://issues.apache.org/jira/browse/ARROW-1778
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>            Reporter: Uwe L. Korn
>            Assignee: Uwe L. Korn
>            Priority: Blocker
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>
> We currently link parquet-cpp dynamically in the {{manylinux1}} wheels. This 
> also makes us the authority on the distribution of {{parquet-cpp}} inside of 
> the wheel-based ecosystem. Instead of doing this, we should statically, 
> privately link {{parquet-cpp}} inside of the wheels.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to