ARROW-1034: [PYTHON] Resolve wheel build issues on Windows Author: Max Risuhin <risuhin....@gmail.com>
Closes #707 from MaxRis/ARROW-1034 and squashes the following commits: 796d1554 [Max Risuhin] ARROW-1034: [PYTHON] Resolve wheel build issues on Windows Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/33117d9a Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/33117d9a Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/33117d9a Branch: refs/heads/master Commit: 33117d9a7df9a38fb1100d428852c9f5c9d65e4d Parents: aa652cb Author: Max Risuhin <risuhin....@gmail.com> Authored: Tue May 23 14:09:36 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Wed May 31 13:45:48 2017 -0400 ---------------------------------------------------------------------- ci/msvc-build.bat | 2 +- python/CMakeLists.txt | 34 +++++++++++++++++------- python/cmake_modules/FindArrow.cmake | 41 ++++++++++++++++++++++------- python/cmake_modules/FindParquet.cmake | 41 ++++++++++++++++++++++------- python/setup.py | 23 +++++++++++----- 5 files changed, 105 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/33117d9a/ci/msvc-build.bat ---------------------------------------------------------------------- diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat index f756fc5..d13c11f 100644 --- a/ci/msvc-build.bat +++ b/ci/msvc-build.bat @@ -68,6 +68,6 @@ popd set PYTHONPATH= pushd python -python setup.py build_ext --inplace --with-parquet || exit /B +python setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp bdist_wheel || exit /B py.test pyarrow -v -s || exit /B popd http://git-wip-us.apache.org/repos/asf/arrow/blob/33117d9a/python/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 6f48f7f..b02f23b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -237,7 +237,7 @@ function(bundle_arrow_lib library_path) configure_file(${LIBRARY_DIR}/${LIBRARY_NAME}.${ARROW_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX} ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.${ARROW_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX} COPYONLY) - else() + else(NOT MSVC) configure_file(${${library_path}}.${ARROW_ABI_VERSION} ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARROW_ABI_VERSION} COPYONLY) @@ -257,10 +257,17 @@ if (PYARROW_BUNDLE_ARROW_CPP) bundle_arrow_lib(ARROW_PYTHON_SHARED_LIB) endif() -ADD_THIRDPARTY_LIB(arrow - SHARED_LIB ${ARROW_SHARED_LIB}) -ADD_THIRDPARTY_LIB(arrow_python - SHARED_LIB ${ARROW_PYTHON_SHARED_LIB}) +if (MSVC) + ADD_THIRDPARTY_LIB(arrow + SHARED_LIB ${ARROW_SHARED_IMP_LIB}) + ADD_THIRDPARTY_LIB(arrow_python + SHARED_LIB ${ARROW_PYTHON_SHARED_IMP_LIB}) +else() + ADD_THIRDPARTY_LIB(arrow + SHARED_LIB ${ARROW_SHARED_LIB}) + ADD_THIRDPARTY_LIB(arrow_python + SHARED_LIB ${ARROW_PYTHON_SHARED_LIB}) +endif() ############################################################ # Subdirectories @@ -312,7 +319,7 @@ if (PYARROW_BUILD_PARQUET) configure_file(${PARQUET_LIBRARY_DIR}/libparquet_arrow.${PARQUET_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX} ${BUILD_OUTPUT_ROOT_DIRECTORY}/libparquet_arrow.${PARQUET_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX} COPYONLY) - else() + elseif (NOT MSVC) configure_file(${PARQUET_LIBRARY_DIR}/libparquet${CMAKE_SHARED_LIBRARY_SUFFIX}.${PARQUET_ABI_VERSION} ${BUILD_OUTPUT_ROOT_DIRECTORY}/libparquet${CMAKE_SHARED_LIBRARY_SUFFIX}.${PARQUET_ABI_VERSION} COPYONLY) @@ -332,10 +339,17 @@ if (PYARROW_BUILD_PARQUET) #SET(PARQUET_ARROW_SHARED_LIB # ${BUILD_OUTPUT_ROOT_DIRECTORY}/libparquet_arrow${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() - ADD_THIRDPARTY_LIB(parquet - SHARED_LIB ${PARQUET_SHARED_LIB}) - ADD_THIRDPARTY_LIB(parquet_arrow - SHARED_LIB ${PARQUET_ARROW_SHARED_LIB}) + if (MSVC) + ADD_THIRDPARTY_LIB(parquet + SHARED_LIB ${PARQUET_SHARED_IMP_LIB}) + ADD_THIRDPARTY_LIB(parquet_arrow + SHARED_LIB ${PARQUET_ARROW_SHARED_IMP_LIB}) + else() + ADD_THIRDPARTY_LIB(parquet + SHARED_LIB ${PARQUET_SHARED_LIB}) + ADD_THIRDPARTY_LIB(parquet_arrow + SHARED_LIB ${PARQUET_ARROW_SHARED_LIB}) + endif() set(LINK_LIBS ${LINK_LIBS} parquet_shared http://git-wip-us.apache.org/repos/asf/arrow/blob/33117d9a/python/cmake_modules/FindArrow.cmake ---------------------------------------------------------------------- diff --git a/python/cmake_modules/FindArrow.cmake b/python/cmake_modules/FindArrow.cmake index c16a4bf..4c8ed3d 100644 --- a/python/cmake_modules/FindArrow.cmake +++ b/python/cmake_modules/FindArrow.cmake @@ -21,6 +21,7 @@ # ARROW_LIBS, directory containing arrow libraries # ARROW_STATIC_LIB, path to libarrow.a # ARROW_SHARED_LIB, path to libarrow's shared library +# ARROW_SHARED_IMP_LIB, path to libarrow's import library (MSVC only) # ARROW_FOUND, whether arrow has been found include(FindPkgConfig) @@ -69,23 +70,45 @@ find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python PATHS ${ARROW_SEARCH_LIB_PATH} NO_DEFAULT_PATH) +get_filename_component(ARROW_PYTHON_LIBS ${ARROW_PYTHON_LIB_PATH} DIRECTORY) + +if (MSVC) + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll") + + if (MSVC AND NOT ARROW_MSVC_STATIC_LIB_SUFFIX) + set(ARROW_MSVC_STATIC_LIB_SUFFIX "_static") + endif() + + find_library(ARROW_SHARED_LIBRARIES NAMES arrow + PATHS ${ARROW_HOME} NO_DEFAULT_PATH + PATH_SUFFIXES "bin" ) + + find_library(ARROW_PYTHON_SHARED_LIBRARIES NAMES arrow_python + PATHS ${ARROW_HOME} NO_DEFAULT_PATH + PATH_SUFFIXES "bin" ) + get_filename_component(ARROW_SHARED_LIBS ${ARROW_SHARED_LIBRARIES} PATH ) + get_filename_component(ARROW_PYTHON_SHARED_LIBS ${ARROW_PYTHON_SHARED_LIBRARIES} PATH ) +endif () if (ARROW_INCLUDE_DIR AND ARROW_LIBS) set(ARROW_FOUND TRUE) - + set(ARROW_LIB_NAME arrow) + set(ARROW_PYTHON_LIB_NAME arrow_python) if (MSVC) - set(ARROW_STATIC_LIB ${ARROW_LIB_PATH}) - set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}) - set(ARROW_SHARED_LIB ${ARROW_STATIC_LIB}) - set(ARROW_PYTHON_SHARED_LIB ${ARROW_PYTHON_STATIC_LIB}) + set(ARROW_STATIC_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIBS}/${ARROW_PYTHON_LIB_NAME}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(ARROW_SHARED_LIB ${ARROW_SHARED_LIBS}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(ARROW_PYTHON_SHARED_LIB ${ARROW_PYTHON_SHARED_LIBS}/${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(ARROW_SHARED_IMP_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}.lib) + set(ARROW_PYTHON_SHARED_IMP_LIB ${ARROW_PYTHON_LIBS}/${ARROW_PYTHON_LIB_NAME}.lib) else() - set(ARROW_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow.a) - set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow_python.a) + set(ARROW_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/lib${ARROW_LIB_NAME}.a) + set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/lib${ARROW_PYTHON_LIB_NAME}.a) set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow_jemalloc.a) - set(ARROW_SHARED_LIB ${ARROW_LIBS}/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(ARROW_SHARED_LIB ${ARROW_LIBS}/lib${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) set(ARROW_JEMALLOC_SHARED_LIB ${ARROW_LIBS}/libarrow_jemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/libarrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/lib${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() endif() http://git-wip-us.apache.org/repos/asf/arrow/blob/33117d9a/python/cmake_modules/FindParquet.cmake ---------------------------------------------------------------------- diff --git a/python/cmake_modules/FindParquet.cmake b/python/cmake_modules/FindParquet.cmake index de53a29..ef3c645 100644 --- a/python/cmake_modules/FindParquet.cmake +++ b/python/cmake_modules/FindParquet.cmake @@ -21,6 +21,7 @@ # PARQUET_LIBS, directory containing parquet libraries # PARQUET_STATIC_LIB, path to libparquet.a # PARQUET_SHARED_LIB, path to libparquet's shared library +# PARQUET_SHARED_IMP_LIB, path to libparquet's import library (MSVC only) # PARQUET_FOUND, whether parquet has been found include(FindPkgConfig) @@ -29,6 +30,24 @@ if(NOT "$ENV{PARQUET_HOME}" STREQUAL "") set(PARQUET_HOME "$ENV{PARQUET_HOME}") endif() +if (MSVC) + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll") + + if (MSVC AND NOT PARQUET_MSVC_STATIC_LIB_SUFFIX) + set(PARQUET_MSVC_STATIC_LIB_SUFFIX "_static") + endif() + + find_library(PARQUET_SHARED_LIBRARIES NAMES parquet + PATHS ${PARQUET_HOME} NO_DEFAULT_PATH + PATH_SUFFIXES "bin" ) + + find_library(PARQUET_ARROW_SHARED_LIBRARIES NAMES parquet_arrow + PATHS ${PARQUET_HOME} NO_DEFAULT_PATH + PATH_SUFFIXES "bin" ) + get_filename_component(PARQUET_SHARED_LIBS ${PARQUET_SHARED_LIBRARIES} PATH ) + get_filename_component(PARQUET_ARROW_SHARED_LIBS ${PARQUET_ARROW_SHARED_LIBRARIES} PATH ) +endif () + if(PARQUET_HOME) set(PARQUET_SEARCH_HEADER_PATHS ${PARQUET_HOME}/include @@ -74,13 +93,14 @@ endif() if (PARQUET_INCLUDE_DIR AND PARQUET_LIBRARIES) set(PARQUET_FOUND TRUE) + set(PARQUET_LIB_NAME parquet) if (MSVC) - set(PARQUET_STATIC_LIB "${PARQUET_LIBRARIES}_static") - set(PARQUET_SHARED_LIB "${PARQUET_LIBRARIES}") + set(PARQUET_STATIC_LIB "${PARQUET_LIBS}/${PARQUET_LIB_NAME}${PARQUET_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(PARQUET_SHARED_LIB "${PARQUET_SHARED_LIBS}/${PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") + set(PARQUET_SHARED_IMP_LIB "${PARQUET_LIBS}/${PARQUET_LIB_NAME}.lib") else() - set(PARQUET_LIB_NAME libparquet) - set(PARQUET_STATIC_LIB ${PARQUET_LIBS}/${PARQUET_LIB_NAME}.a) - set(PARQUET_SHARED_LIB ${PARQUET_LIBS}/${PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(PARQUET_STATIC_LIB ${PARQUET_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PARQUET_LIB_NAME}.a) + set(PARQUET_SHARED_LIB ${PARQUET_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() else () set(PARQUET_FOUND FALSE) @@ -89,15 +109,16 @@ endif () if (PARQUET_INCLUDE_DIR AND PARQUET_ARROW_LIBRARIES) set(PARQUET_ARROW_FOUND TRUE) get_filename_component(PARQUET_ARROW_LIBS ${PARQUET_ARROW_LIBRARIES} PATH) + set(PARQUET_ARROW_LIB_NAME parquet_arrow) if (MSVC) - set(PARQUET_ARROW_STATIC_LIB "${PARQUET_ARROW_LIBRARIES}_static") - set(PARQUET_ARROW_SHARED_LIB "${PARQUET_ARROW_LIBRARIES}") + set(PARQUET_ARROW_STATIC_LIB "${PARQUET_ARROW_LIBS}/${PARQUET_ARROW_LIB_NAME}${PARQUET_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(PARQUET_ARROW_SHARED_LIB "${PARQUET_ARROW_SHARED_LIBS}/${PARQUET_ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") + set(PARQUET_ARROW_SHARED_IMP_LIB "${PARQUET_ARROW_LIBS}/${PARQUET_ARROW_LIB_NAME}.lib") else() - set(PARQUET_ARROW_LIB_NAME libparquet_arrow) set(PARQUET_ARROW_STATIC_LIB - ${PARQUET_ARROW_LIBS}/${PARQUET_ARROW_LIB_NAME}.a) + ${PARQUET_ARROW_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PARQUET_ARROW_LIB_NAME}.a) set(PARQUET_ARROW_SHARED_LIB - ${PARQUET_ARROW_LIBS}/${PARQUET_ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + ${PARQUET_ARROW_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${PARQUET_ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() else () set(PARQUET_ARROW_FOUND FALSE) http://git-wip-us.apache.org/repos/asf/arrow/blob/33117d9a/python/setup.py ---------------------------------------------------------------------- diff --git a/python/setup.py b/python/setup.py index 9374af3..7e612c9 100644 --- a/python/setup.py +++ b/python/setup.py @@ -29,7 +29,7 @@ import Cython import pkg_resources -from setuptools import setup, Extension +from setuptools import setup, Extension, Distribution from os.path import join as pjoin @@ -210,6 +210,11 @@ class build_ext(_build_ext): except OSError: pass + if sys.platform == 'win32': + build_prefix = '' + else: + build_prefix = self.build_type + def move_lib(lib_name): lib_filename = (shared_library_prefix + lib_name + shared_library_suffix) @@ -217,14 +222,14 @@ class build_ext(_build_ext): if sys.platform == 'darwin': lib_pattern = (shared_library_prefix + lib_name + ".*" + shared_library_suffix[1:]) - libs = glob.glob(pjoin(self.build_type, lib_pattern)) + libs = glob.glob(pjoin(build_prefix, lib_pattern)) else: - libs = glob.glob(pjoin(self.build_type, lib_filename) + '*') + libs = glob.glob(pjoin(build_prefix, lib_filename) + '*') # Longest suffix library should be copied, all others symlinked libs.sort(key=lambda s: -len(s)) print(libs, libs[0]) lib_filename = os.path.basename(libs[0]) - shutil.move(pjoin(self.build_type, lib_filename), + shutil.move(pjoin(build_prefix, lib_filename), pjoin(build_lib, 'pyarrow', lib_filename)) for lib in libs[1:]: filename = os.path.basename(lib) @@ -233,10 +238,10 @@ class build_ext(_build_ext): os.symlink(lib_filename, link_name) if self.bundle_arrow_cpp: - print(pjoin(self.build_type, 'include'), pjoin(build_lib, 'pyarrow')) + print(pjoin(build_prefix, 'include'), pjoin(build_lib, 'pyarrow')) if os.path.exists(pjoin(build_lib, 'pyarrow', 'include')): shutil.rmtree(pjoin(build_lib, 'pyarrow', 'include')) - shutil.move(pjoin(self.build_type, 'include'), pjoin(build_lib, 'pyarrow')) + shutil.move(pjoin(build_prefix, 'include'), pjoin(build_lib, 'pyarrow')) move_lib("arrow") move_lib("arrow_python") if self.with_jemalloc: @@ -337,11 +342,17 @@ representations of flat and hierarchical data along with multiple language-bindings for structure manipulation. It also provides IPC and common algorithm implementations.""" +class BinaryDistribution(Distribution): + def has_ext_modules(foo): + return True + setup( name="pyarrow", packages=['pyarrow', 'pyarrow.tests'], zip_safe=False, package_data={'pyarrow': ['*.pxd', '*.pyx']}, + include_package_data=True, + distclass=BinaryDistribution, # Dummy extension to trigger build_ext ext_modules=[Extension('__dummy__', sources=[])],