jorisvandenbossche commented on code in PR #13311:
URL: https://github.com/apache/arrow/pull/13311#discussion_r925264377


##########
python/CMakeLists.txt:
##########
@@ -604,29 +604,27 @@ foreach(module ${CYTHON_EXTENSIONS})
                                                     ${module_output_directory})
   endif()
 
-  if(PYARROW_BUNDLE_ARROW_CPP)
-    # In the event that we are bundling the shared libraries (e.g. in a
-    # manylinux1 wheel), we need to set the RPATH of the extensions to the
-    # root of the pyarrow/ package so that libarrow/libarrow_python are able
-    # to be loaded properly
-    if(APPLE)
-      set(module_install_rpath "@loader_path/")
-    else()
-      set(module_install_rpath "\$ORIGIN")
-    endif()
+  # In the event that we are bundling the shared libraries (e.g. in a
+  # manylinux1 wheel), we need to set the RPATH of the extensions to the
+  # root of the pyarrow/ package so that libarrow/libarrow_python are able
+  # to be loaded properly

Review Comment:
   I think this comment is no longer fully up to date? (i.e. we are now always 
bundling the shared library?)



##########
python/setup.py:
##########
@@ -227,6 +228,118 @@ def initialize_options(self):
         '_hdfsio',
         'gandiva']
 
+    def _run_cmake_pyarrow_cpp(self):
+        # check if build_type is correctly passed / set
+        if self.build_type.lower() not in ('release', 'debug'):
+            raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
+                             "be 'release' or 'debug'")
+
+        # The directory containing this setup.py
+        source = os.path.dirname(os.path.abspath(__file__))
+        # The directory containing this PyArrow cpp CMakeLists.txt
+        source_pyarrow_cpp = pjoin(source, "pyarrow/src")
+
+        # The directory for the module being built
+        build_cmd = self.get_finalized_command('build')
+        saved_cwd = os.getcwd()
+        build_dir = pjoin(saved_cwd, 'build', 'dist')
+        build_include = pjoin(saved_cwd, 'build', 'dist', 'include')
+        build_lib = pjoin(os.getcwd(), build_cmd.build_lib)
+
+        # The directory containing Arrow C++ build
+        arrow_build_dir = os.environ.get('ARROW_BUILD_DIR', 'build')
+        if self.inplace:
+            # a bit hacky
+            build_lib = saved_cwd
+        if not os.path.isdir(build_dir):
+            self.mkpath(build_dir)
+        if not os.path.isdir(build_lib):
+            self.mkpath(build_lib)
+        if not os.path.isdir(build_include):
+            self.mkpath(build_include)
+
+        # Change to the build directory
+        with changed_dir(build_dir):
+            # cmake args
+            cmake_options = [
+                '-DCMAKE_INSTALL_PREFIX=' +
+                str(pjoin(saved_cwd, 'build/dist')),
+                '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type.lower()),
+                '-DARROW_BUILD_DIR=' + str(arrow_build_dir),
+                '-DPYTHON_EXECUTABLE=%s' % sys.executable,
+                '-DPython3_EXECUTABLE=%s' % sys.executable,

Review Comment:
   Small style nitpick: we are using 3 different string formatting ways in 3 
lines ;) (format, +, %) 
   (I know this is partly from copied elsewhere)



##########
python/setup.py:
##########
@@ -354,6 +468,19 @@ def append_cmake_bool(value, varname):
                 shutil.move(pjoin(build_prefix, 'include'),
                             pjoin(build_lib, 'pyarrow'))
 
+                # We need to, again, add the PyArrow cpp include folder
+                build_pyarrow_cpp_include = pjoin(
+                    saved_cwd, 'build/dist/include')
+                if not os.path.isdir(
+                        pjoin(build_pyarrow_cpp_include, "arrow", "python")):
+                    self.mkpath(

Review Comment:
   We are making the source path we want to copy? (if we need to create 
something, I would expect the target directory)



##########
python/setup.py:
##########
@@ -354,6 +468,19 @@ def append_cmake_bool(value, varname):
                 shutil.move(pjoin(build_prefix, 'include'),
                             pjoin(build_lib, 'pyarrow'))
 
+                # We need to, again, add the PyArrow cpp include folder

Review Comment:
   Why is it needed to do this again? (can we clarify that comment?)



##########
python/pyarrow/src/CMakeLists.txt:
##########
@@ -0,0 +1,445 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+  cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE_DIR}/cpp")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules" 
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION 
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")

Review Comment:
   On the other hand, the final library is still called "arrow_python.so", so 
here this is maybe fine?



##########
python/setup.py:
##########
@@ -227,6 +228,118 @@ def initialize_options(self):
         '_hdfsio',
         'gandiva']
 
+    def _run_cmake_pyarrow_cpp(self):
+        # check if build_type is correctly passed / set
+        if self.build_type.lower() not in ('release', 'debug'):
+            raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
+                             "be 'release' or 'debug'")
+
+        # The directory containing this setup.py
+        source = os.path.dirname(os.path.abspath(__file__))
+        # The directory containing this PyArrow cpp CMakeLists.txt
+        source_pyarrow_cpp = pjoin(source, "pyarrow/src")
+
+        # The directory for the module being built
+        build_cmd = self.get_finalized_command('build')
+        saved_cwd = os.getcwd()
+        build_dir = pjoin(saved_cwd, 'build', 'dist')
+        build_include = pjoin(saved_cwd, 'build', 'dist', 'include')
+        build_lib = pjoin(os.getcwd(), build_cmd.build_lib)
+
+        # The directory containing Arrow C++ build
+        arrow_build_dir = os.environ.get('ARROW_BUILD_DIR', 'build')
+        if self.inplace:
+            # a bit hacky
+            build_lib = saved_cwd
+        if not os.path.isdir(build_dir):
+            self.mkpath(build_dir)
+        if not os.path.isdir(build_lib):
+            self.mkpath(build_lib)
+        if not os.path.isdir(build_include):
+            self.mkpath(build_include)
+
+        # Change to the build directory
+        with changed_dir(build_dir):
+            # cmake args
+            cmake_options = [
+                '-DCMAKE_INSTALL_PREFIX=' +
+                str(pjoin(saved_cwd, 'build/dist')),
+                '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type.lower()),
+                '-DARROW_BUILD_DIR=' + str(arrow_build_dir),
+                '-DPYTHON_EXECUTABLE=%s' % sys.executable,
+                '-DPython3_EXECUTABLE=%s' % sys.executable,
+            ]
+
+            # Check for specific options
+            def append_cmake_bool(value, varname):
+                cmake_options.append('-D{0}={1}'.format(
+                    varname, 'on' if value else 'off'))
+
+            append_cmake_bool(self.with_dataset, 'PYARROW_WITH_DATASET')
+            append_cmake_bool(self.with_parquet_encryption,
+                              'PYARROW_WITH_PARQUET_ENCRYPTION')
+            append_cmake_bool(self.with_hdfs,
+                              'PYARROW_WITH_HDFS')
+
+            # Windows
+            if self.cmake_generator:
+                cmake_options += ['-G', self.cmake_generator]
+
+            # build args
+            build_tool_args = []
+            if os.environ.get('PYARROW_PARALLEL'):
+                build_tool_args.append('--')
+                build_tool_args.append(
+                    '-j{0}'.format(os.environ['PYARROW_PARALLEL']))
+
+            # run cmake
+            print("-- Running cmake for pyarrow cpp")
+            self.spawn(['cmake'] + cmake_options + [source_pyarrow_cpp])
+            print("-- Finished cmake for pyarrow cpp")
+            # run make & install
+            print("-- Running cmake build and install for pyarrow cpp")
+            self.spawn(['cmake', '--build', '.', '--target', 'install'] +
+                       build_tool_args)
+            print("-- Finished cmake build and install for pyarrow cpp")
+
+            # Move the libraries to the place expected by the Python build
+            try:
+                os.makedirs(pjoin(build_lib, 'pyarrow'))
+            except OSError:
+                pass
+
+            # helper function
+            def copy_libs(folder_name):
+                for libname in os.listdir(pjoin(build_dir, folder_name)):
+                    if "python" in libname:
+                        libname_path = pjoin(build_lib, "pyarrow", libname)
+                        if os.path.exists(libname_path):
+                            os.remove(libname_path)
+                        print(
+                            f"Copying {pjoin(build_dir, folder_name, libname)}"
+                            f" to {pjoin(build_lib, 'pyarrow', libname)}")
+                        shutil.copy(pjoin(build_dir, folder_name, libname),
+                                    pjoin(build_lib, "pyarrow"))
+
+            # Move libraries to python/pyarrow
+            # For windows builds, move dll from bin
+            for folder in ['lib', 'lib64', 'bin']:
+                try:
+                    copy_libs(folder)
+                except OSError:
+                    pass
+
+            # Copy headers tp python/pyarrow/include

Review Comment:
   ```suggestion
               # Copy headers to python/pyarrow/include
   ```



##########
python/setup.py:
##########
@@ -401,15 +528,10 @@ def append_cmake_bool(value, varname):
     def _bundle_arrow_cpp(self, build_prefix, build_lib):
         print(pjoin(build_lib, 'pyarrow'))
         move_shared_libs(build_prefix, build_lib, "arrow")
-        move_shared_libs(build_prefix, build_lib, "arrow_python")
         if self.with_cuda:
             move_shared_libs(build_prefix, build_lib, "arrow_cuda")
         if self.with_substrait:
             move_shared_libs(build_prefix, build_lib, "arrow_substrait")
-        if self.with_flight:
-            move_shared_libs(build_prefix, build_lib, "arrow_flight")

Review Comment:
   Why is arrow_flight not needed anymore? 



##########
python/setup.py:
##########
@@ -227,6 +228,118 @@ def initialize_options(self):
         '_hdfsio',
         'gandiva']
 
+    def _run_cmake_pyarrow_cpp(self):
+        # check if build_type is correctly passed / set
+        if self.build_type.lower() not in ('release', 'debug'):
+            raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
+                             "be 'release' or 'debug'")
+
+        # The directory containing this setup.py
+        source = os.path.dirname(os.path.abspath(__file__))
+        # The directory containing this PyArrow cpp CMakeLists.txt
+        source_pyarrow_cpp = pjoin(source, "pyarrow/src")
+
+        # The directory for the module being built
+        build_cmd = self.get_finalized_command('build')
+        saved_cwd = os.getcwd()
+        build_dir = pjoin(saved_cwd, 'build', 'dist')
+        build_include = pjoin(saved_cwd, 'build', 'dist', 'include')
+        build_lib = pjoin(os.getcwd(), build_cmd.build_lib)
+
+        # The directory containing Arrow C++ build
+        arrow_build_dir = os.environ.get('ARROW_BUILD_DIR', 'build')
+        if self.inplace:
+            # a bit hacky
+            build_lib = saved_cwd
+        if not os.path.isdir(build_dir):
+            self.mkpath(build_dir)
+        if not os.path.isdir(build_lib):
+            self.mkpath(build_lib)
+        if not os.path.isdir(build_include):
+            self.mkpath(build_include)
+
+        # Change to the build directory
+        with changed_dir(build_dir):
+            # cmake args
+            cmake_options = [
+                '-DCMAKE_INSTALL_PREFIX=' +
+                str(pjoin(saved_cwd, 'build/dist')),

Review Comment:
   Is this the same as `build_dir` ? (meaning we can use that variable here)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to