pitrou commented on code in PR #13311:
URL: https://github.com/apache/arrow/pull/13311#discussion_r925346281
##########
python/pyarrow/src/common.h:
##########
@@ -21,8 +21,8 @@
#include <utility>
#include "arrow/buffer.h"
-#include "arrow/python/pyarrow.h"
-#include "arrow/python/visibility.h"
+#include "pyarrow.h"
+#include "visibility.h"
Review Comment:
Can we reorder these includes: first Arrow C++ includes, then PyArrow C++
includes?
##########
cpp/cmake_modules/FindArrowPython.cmake:
##########
@@ -46,8 +46,9 @@ endif()
find_package(Arrow ${find_package_arguments})
if(ARROW_FOUND)
+ message(STATUS "PYARROW_CPP_HOME: ${PYARROW_CPP_HOME}")
Review Comment:
Is it customary to add such status messages in `Find*.cmake` files? @kou
##########
python/pyarrow/src/CMakeLists.txt:
##########
@@ -0,0 +1,445 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE_DIR}/cpp")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs PyArrow cpp options
+#
+
+# Check all the options from Arrow and PyArrow cpp to be in line
+if(PYARROW_WITH_DATASET)
+ find_package(ArrowDataset REQUIRED)
+endif()
+
+if(PYARROW_WITH_PARQUET_ENCRYPTION)
+ if(PARQUET_REQUIRE_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc)
+ find_package(Parquet REQUIRED)
+ else()
+ message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
+ endif()
+endif()
+
+if(PYARROW_WITH_HDFS)
+ if(NOT ARROW_HDFS)
+ message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
+ endif()
+endif()
+
+# Check for only Arrow C++ options
+if(ARROW_CSV)
+ list(APPEND ARROW_PYTHON_SRCS csv.cc)
+endif()
+
+if(ARROW_FILESYSTEM)
+ list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
+endif()
+
+# Link to arrow dependecies
+if(ARROW_BUILD_SHARED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_shared)
+else()
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads REQUIRED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ set_property(SOURCE pyarrow.cc
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+endif()
+
+#
+# Compiler stuff
+#
+
+include(GNUInstallDirs)
+
+# This ensures that things like gnu++11 get passed correctly
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 11)
+endif()
+
+# We require a C++11 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Needed gdb flags
Review Comment:
```suggestion
# Needed compiler flags
```
##########
python/pyarrow/src/CMakeLists.txt:
##########
@@ -0,0 +1,445 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE_DIR}/cpp")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs PyArrow cpp options
+#
+
+# Check all the options from Arrow and PyArrow cpp to be in line
+if(PYARROW_WITH_DATASET)
+ find_package(ArrowDataset REQUIRED)
+endif()
+
+if(PYARROW_WITH_PARQUET_ENCRYPTION)
+ if(PARQUET_REQUIRE_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc)
+ find_package(Parquet REQUIRED)
+ else()
+ message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
+ endif()
+endif()
+
+if(PYARROW_WITH_HDFS)
+ if(NOT ARROW_HDFS)
+ message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
+ endif()
+endif()
+
+# Check for only Arrow C++ options
+if(ARROW_CSV)
+ list(APPEND ARROW_PYTHON_SRCS csv.cc)
+endif()
+
+if(ARROW_FILESYSTEM)
+ list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
+endif()
+
+# Link to arrow dependecies
+if(ARROW_BUILD_SHARED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_shared)
+else()
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads REQUIRED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ set_property(SOURCE pyarrow.cc
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+endif()
+
+#
+# Compiler stuff
+#
+
+include(GNUInstallDirs)
+
+# This ensures that things like gnu++11 get passed correctly
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 11)
+endif()
+
+# We require a C++11 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Needed gdb flags
+include(SetupCxxFlags)
+
+#
+# shred/static link libs
Review Comment:
```suggestion
# Shared/static link libs
```
##########
docs/source/developers/python.rst:
##########
@@ -131,6 +131,29 @@ for ``.py`` files or
for ``.pyx`` and ``.pxi`` files. In this case you will also need to
install the `pytest-cython <https://github.com/lgpage/pytest-cython>`_ plugin.
+Testing Arrow Python (C++ code)
Review Comment:
+1
##########
ci/scripts/python_test.sh:
##########
@@ -54,4 +55,14 @@ export PYARROW_TEST_ORC
export PYARROW_TEST_PARQUET
export PYARROW_TEST_S3
+# Testing Arrow Python
Review Comment:
+1
##########
python/pyarrow/src/api.h:
##########
@@ -17,14 +17,14 @@
#pragma once
-#include "arrow/python/arrow_to_pandas.h"
-#include "arrow/python/common.h"
-#include "arrow/python/datetime.h"
-#include "arrow/python/deserialize.h"
-#include "arrow/python/helpers.h"
-#include "arrow/python/inference.h"
-#include "arrow/python/io.h"
-#include "arrow/python/numpy_convert.h"
-#include "arrow/python/numpy_to_arrow.h"
-#include "arrow/python/python_to_arrow.h"
-#include "arrow/python/serialize.h"
+#include "arrow_to_pandas.h"
Review Comment:
Hmm... we can keep it like this for now, but I think we'll have to decide
the fate of this file.
If this is an API to be used by third-party projects, then I think there
should still be a base directory, for example:
```suggestion
#include "pyarrow/arrow_to_pandas.h"
```
If we decide that PyArrow does not provide any C++ API at all, then this
file can simply be deleted.
Perhaps open a separate JIRA for this?
##########
python/pyarrow/src/datetime.cc:
##########
@@ -14,17 +14,17 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-#include "arrow/python/datetime.h"
+#include "datetime.h"
#include <algorithm>
#include <chrono>
#include <iomanip>
#include "arrow/array.h"
-#include "arrow/python/arrow_to_python_internal.h"
-#include "arrow/python/common.h"
-#include "arrow/python/helpers.h"
-#include "arrow/python/platform.h"
+#include "arrow_to_python_internal.h"
+#include "common.h"
+#include "helpers.h"
+#include "platform.h"
Review Comment:
Reorder includes?
##########
python/pyarrow/tests/test_gdb.py:
##########
@@ -154,7 +154,7 @@ def select_frame(self, func_name):
# but it's not available on old GDB versions (such as 8.1.1),
# so instead parse the stack trace for a matching frame number.
out = self.run_command("info stack")
- pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + " "
+ pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name)
Review Comment:
Can we then make the regex a bit less lenient to avoid matching prefixes of
`func_name`, e.g.:
```suggestion
pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + r"\b"
```
##########
python/pyarrow/src/CMakeLists.txt:
##########
@@ -0,0 +1,445 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE_DIR}/cpp")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs PyArrow cpp options
+#
+
+# Check all the options from Arrow and PyArrow cpp to be in line
+if(PYARROW_WITH_DATASET)
+ find_package(ArrowDataset REQUIRED)
+endif()
+
+if(PYARROW_WITH_PARQUET_ENCRYPTION)
+ if(PARQUET_REQUIRE_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc)
+ find_package(Parquet REQUIRED)
+ else()
+ message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
+ endif()
+endif()
+
+if(PYARROW_WITH_HDFS)
+ if(NOT ARROW_HDFS)
+ message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
+ endif()
+endif()
+
+# Check for only Arrow C++ options
+if(ARROW_CSV)
+ list(APPEND ARROW_PYTHON_SRCS csv.cc)
+endif()
+
+if(ARROW_FILESYSTEM)
+ list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
+endif()
+
+# Link to arrow dependecies
+if(ARROW_BUILD_SHARED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_shared)
+else()
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads REQUIRED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ set_property(SOURCE pyarrow.cc
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+endif()
+
+#
+# Compiler stuff
+#
+
+include(GNUInstallDirs)
+
+# This ensures that things like gnu++11 get passed correctly
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 11)
+endif()
+
+# We require a C++11 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Needed gdb flags
+include(SetupCxxFlags)
+
+#
+# shred/static link libs
+#
+
+set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared)
+set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS)
+set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS})
+
+if(WIN32)
+ list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES}
${PYTHON_OTHER_LIBS})
+endif()
+
+if(PARQUET_REQUIRE_ENCRYPTION AND PYARROW_WITH_PARQUET_ENCRYPTION)
Review Comment:
Hmm... wouldn't `parquet_shared` be required even if Parquet encryption
isn't enabled?
##########
python/pyarrow/src/csv.h:
##########
@@ -23,7 +23,7 @@
#include <vector>
#include "arrow/csv/options.h"
-#include "arrow/python/common.h"
+#include "common.h"
Review Comment:
Reorder includes?
##########
python/setup.py:
##########
@@ -227,6 +228,118 @@ def initialize_options(self):
'_hdfsio',
'gandiva']
+ def _run_cmake_pyarrow_cpp(self):
+ # check if build_type is correctly passed / set
+ if self.build_type.lower() not in ('release', 'debug'):
+ raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
+ "be 'release' or 'debug'")
+
+ # The directory containing this setup.py
+ source = os.path.dirname(os.path.abspath(__file__))
+ # The directory containing this PyArrow cpp CMakeLists.txt
+ source_pyarrow_cpp = pjoin(source, "pyarrow/src")
+
+ # The directory for the module being built
+ build_cmd = self.get_finalized_command('build')
+ saved_cwd = os.getcwd()
+ build_dir = pjoin(saved_cwd, 'build', 'dist')
+ build_include = pjoin(saved_cwd, 'build', 'dist', 'include')
+ build_lib = pjoin(os.getcwd(), build_cmd.build_lib)
+
+ # The directory containing Arrow C++ build
+ arrow_build_dir = os.environ.get('ARROW_BUILD_DIR', 'build')
+ if self.inplace:
+ # a bit hacky
+ build_lib = saved_cwd
+ if not os.path.isdir(build_dir):
+ self.mkpath(build_dir)
+ if not os.path.isdir(build_lib):
+ self.mkpath(build_lib)
+ if not os.path.isdir(build_include):
+ self.mkpath(build_include)
+
+ # Change to the build directory
+ with changed_dir(build_dir):
+ # cmake args
+ cmake_options = [
+ '-DCMAKE_INSTALL_PREFIX=' +
+ str(pjoin(saved_cwd, 'build/dist')),
+ '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type.lower()),
+ '-DARROW_BUILD_DIR=' + str(arrow_build_dir),
+ '-DPYTHON_EXECUTABLE=%s' % sys.executable,
+ '-DPython3_EXECUTABLE=%s' % sys.executable,
+ ]
+
+ # Check for specific options
+ def append_cmake_bool(value, varname):
+ cmake_options.append('-D{0}={1}'.format(
+ varname, 'on' if value else 'off'))
+
+ append_cmake_bool(self.with_dataset, 'PYARROW_WITH_DATASET')
+ append_cmake_bool(self.with_parquet_encryption,
+ 'PYARROW_WITH_PARQUET_ENCRYPTION')
+ append_cmake_bool(self.with_hdfs,
+ 'PYARROW_WITH_HDFS')
+
+ # Windows
+ if self.cmake_generator:
+ cmake_options += ['-G', self.cmake_generator]
+
+ # build args
+ build_tool_args = []
+ if os.environ.get('PYARROW_PARALLEL'):
+ build_tool_args.append('--')
+ build_tool_args.append(
+ '-j{0}'.format(os.environ['PYARROW_PARALLEL']))
+
+ # run cmake
+ print("-- Running cmake for pyarrow cpp")
+ self.spawn(['cmake'] + cmake_options + [source_pyarrow_cpp])
+ print("-- Finished cmake for pyarrow cpp")
+ # run make & install
+ print("-- Running cmake build and install for pyarrow cpp")
+ self.spawn(['cmake', '--build', '.', '--target', 'install'] +
+ build_tool_args)
+ print("-- Finished cmake build and install for pyarrow cpp")
+
+ # Move the libraries to the place expected by the Python build
+ try:
+ os.makedirs(pjoin(build_lib, 'pyarrow'))
+ except OSError:
+ pass
+
+ # helper function
+ def copy_libs(folder_name):
+ for libname in os.listdir(pjoin(build_dir, folder_name)):
+ if "python" in libname:
+ libname_path = pjoin(build_lib, "pyarrow", libname)
+ if os.path.exists(libname_path):
+ os.remove(libname_path)
+ print(
+ f"Copying {pjoin(build_dir, folder_name, libname)}"
+ f" to {pjoin(build_lib, 'pyarrow', libname)}")
+ shutil.copy(pjoin(build_dir, folder_name, libname),
+ pjoin(build_lib, "pyarrow"))
+
+ # Move libraries to python/pyarrow
+ # For windows builds, move dll from bin
+ for folder in ['lib', 'lib64', 'bin']:
+ try:
+ copy_libs(folder)
+ except OSError:
Review Comment:
Which do we simply ignore errors here? What kind of error would that be?
##########
python/pyarrow/src/CMakeLists.txt:
##########
@@ -0,0 +1,445 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE_DIR}/cpp")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs PyArrow cpp options
+#
+
+# Check all the options from Arrow and PyArrow cpp to be in line
+if(PYARROW_WITH_DATASET)
+ find_package(ArrowDataset REQUIRED)
+endif()
+
+if(PYARROW_WITH_PARQUET_ENCRYPTION)
+ if(PARQUET_REQUIRE_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc)
+ find_package(Parquet REQUIRED)
+ else()
+ message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
+ endif()
+endif()
+
+if(PYARROW_WITH_HDFS)
+ if(NOT ARROW_HDFS)
+ message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
+ endif()
+endif()
+
+# Check for only Arrow C++ options
+if(ARROW_CSV)
+ list(APPEND ARROW_PYTHON_SRCS csv.cc)
+endif()
+
+if(ARROW_FILESYSTEM)
+ list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
+endif()
+
+# Link to arrow dependecies
+if(ARROW_BUILD_SHARED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_shared)
+else()
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads REQUIRED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ set_property(SOURCE pyarrow.cc
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+endif()
+
+#
+# Compiler stuff
+#
+
+include(GNUInstallDirs)
+
+# This ensures that things like gnu++11 get passed correctly
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 11)
+endif()
+
+# We require a C++11 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Needed gdb flags
+include(SetupCxxFlags)
+
+#
+# shred/static link libs
+#
+
+set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared)
+set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS)
+set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS})
+
+if(WIN32)
+ list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES}
${PYTHON_OTHER_LIBS})
+endif()
+
+if(PARQUET_REQUIRE_ENCRYPTION AND PYARROW_WITH_PARQUET_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS parquet_shared)
+endif()
+
+set(ARROW_PYTHON_INCLUDES ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS})
+
+# Inlude macros needed to find and use add_arrow_lib function
+include(BuildUtils)
+include(CMakePackageConfigHelpers)
+
+# Set the output directory for cmake module
+# (CMAKE_INSTALL_PREFIX = python/build/dist! should be set in setup.py!)
+set(ARROW_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
+
+# Changing ARROW_SOURCE_DIR for sdist build
+# In this case cpp/cmake_modules doesn't exist
+if(NOT EXISTS "${ARROW_SOURCE_DIR}/cpp/cmake_modules/Find${MODULE}.cmake")
+ set(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR})
+endif()
+
+add_arrow_lib(arrow_python
+ CMAKE_PACKAGE_NAME
+ ArrowPython
+ PKG_CONFIG_NAME
+ arrow-python
+ SOURCES
+ ${ARROW_PYTHON_SRCS}
+ PRECOMPILED_HEADERS
+ "$<$<COMPILE_LANGUAGE:CXX>:pch.h>"
+ OUTPUTS
+ ARROW_PYTHON_LIBRARIES
+ DEPENDENCIES
+ ${ARROW_PYTHON_DEPENDENCIES}
+ SHARED_LINK_FLAGS
+ ${ARROW_VERSION_SCRIPT_FLAGS}
+ SHARED_LINK_LIBS
+ ${ARROW_PYTHON_SHARED_LINK_LIBS}
+ SHARED_PRIVATE_LINK_LIBS
+ ${ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS}
+ STATIC_LINK_LIBS
+ ${ARROW_PYTHON_STATIC_LINK_LIBS}
+ EXTRA_INCLUDES
+ "${ARROW_PYTHON_INCLUDES}")
+
+add_dependencies(arrow_python ${ARROW_PYTHON_LIBRARIES})
+
+foreach(LIB_TARGET ${ARROW_PYTHON_LIBRARIES})
+ target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYTHON_EXPORTING)
+endforeach()
+
+if(ARROW_BUILD_STATIC AND MSVC)
+ target_compile_definitions(arrow_python_static PUBLIC ARROW_STATIC)
+endif()
+
+if(ARROW_FLIGHT AND ARROW_BUILD_SHARED)
+ # Must link to shared libarrow_flight: we don't want to link more than one
+ # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls
+ # fail with weird errors due to multiple copies of global static state (The
+ # other solution is to link gRPC shared everywhere instead of statically
only
+ # in Flight)
+ find_package(ArrowFlight REQUIRED)
+ include_directories("${ARROW_CPP_SOURCE_DIR}/src"
"${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/src")
+
+ set(FLIGHT_LINK_LIBS arrow_flight_shared)
+
+ add_arrow_lib(arrow_python_flight
+ CMAKE_PACKAGE_NAME
+ ArrowPythonFlight
+ PKG_CONFIG_NAME
+ arrow-python-flight
+ SOURCES
+ flight.cc
+ OUTPUTS
+ ARROW_PYFLIGHT_LIBRARIES
+ SHARED_LINK_FLAGS
+ ${ARROW_VERSION_SCRIPT_FLAGS}
+ SHARED_LINK_LIBS
+ arrow_python_shared
+ arrow_flight_shared
+ STATIC_LINK_LIBS
+ ${PYTHON_OTHER_LIBS}
+ EXTRA_INCLUDES
+ "${ARROW_PYTHON_INCLUDES}"
+ PRIVATE_INCLUDES
+ "${Protobuf_INCLUDE_DIRS}")
+
+ add_dependencies(arrow_python ${ARROW_PYFLIGHT_LIBRARIES})
+
+ foreach(LIB_TARGET ${ARROW_PYFLIGHT_LIBRARIES})
+ target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYFLIGHT_EXPORTING)
+ endforeach()
+
+ if(ARROW_BUILD_STATIC AND MSVC)
+ target_compile_definitions(arrow_python_flight_static PUBLIC ARROW_STATIC)
+ endif()
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ # Clang, be quiet. Python C API has lots of macros
+ set_property(SOURCE ${ARROW_PYTHON_SRCS}
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS -Wno-parentheses-equality)
+endif()
+
+arrow_install_all_headers("arrow/python")
+
+# ----------------------------------------------------------------------
+
+#
+# Tests
+# The tests will be moved to Cython and are currently supported for bundled
GTest
+# Follow-up: https://issues.apache.org/jira/browse/ARROW-17016
+#
+
+if(ARROW_BUILD_TESTS)
+
+ enable_testing()
+ set(GTEST_ROOT
${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/googletest_ep-prefix)
+
+ # GTest must be built from source
+ if(EXISTS ${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/googletest_ep-prefix)
+
+ # Set necessary paths for cmake to find GTest
+ set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/include")
+ set(GTEST_LIBRARY ${GTEST_ROOT}/lib)
+ set(GTEST_MAIN_LIBRARY ${GTEST_ROOT}/lib)
+
+ #
+ # Taken from Matlab CMakeLists.txt (enable_gtest and build_gtest)
+ #
+
+ set(ARROW_GTEST_PREFIX "${GTEST_ROOT}")
+ set(ARROW_GTEST_MAIN_PREFIX "${GTEST_ROOT}")
+
+ if(WIN32)
+ set(ARROW_GTEST_SHARED_LIB_DIR "${ARROW_GTEST_PREFIX}/bin")
+ set(ARROW_GTEST_MAIN_SHARED_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/bin")
+
+ set(ARROW_GTEST_LINK_LIB_DIR "${ARROW_GTEST_PREFIX}/lib")
+ set(ARROW_GTEST_LINK_LIB
+
"${ARROW_GTEST_LINK_LIB_DIR}/${CMAKE_IMPORT_LIBRARY_PREFIX}gtestd${CMAKE_IMPORT_LIBRARY_SUFFIX}"
+ )
+
+ set(ARROW_GTEST_MAIN_LINK_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib")
+ set(ARROW_GTEST_MAIN_LINK_LIB
+
"${ARROW_GTEST_MAIN_LINK_LIB_DIR}/${CMAKE_IMPORT_LIBRARY_PREFIX}gtest_maind${CMAKE_IMPORT_LIBRARY_SUFFIX}"
+ )
+ else()
+ set(ARROW_GTEST_SHARED_LIB_DIR "${ARROW_GTEST_PREFIX}/lib")
+ set(ARROW_GTEST_MAIN_SHARED_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib")
+ endif()
+
+ set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include")
+ set(ARROW_GTEST_SHARED_LIB
+
"${ARROW_GTEST_SHARED_LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtestd${CMAKE_SHARED_LIBRARY_SUFFIX}"
+ )
+
+ set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include")
+ set(ARROW_GTEST_MAIN_SHARED_LIB
+
"${ARROW_GTEST_MAIN_SHARED_LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_maind${CMAKE_SHARED_LIBRARY_SUFFIX}"
+ )
+
+ file(MAKE_DIRECTORY "${ARROW_GTEST_INCLUDE_DIR}")
+
+ # Create target GTest::gtest
+ add_library(GTest::gtest SHARED IMPORTED)
+ set_target_properties(GTest::gtest
+ PROPERTIES IMPORTED_LOCATION
${ARROW_GTEST_SHARED_LIB}
+ INTERFACE_INCLUDE_DIRECTORIES
+ ${ARROW_GTEST_INCLUDE_DIR})
+ if(WIN32)
+ set_target_properties(GTest::gtest PROPERTIES IMPORTED_IMPLIB
${ARROW_GTEST_LINK_LIB})
+ endif()
+
+ # ArrowTesting
+ # needed to be able to use arrow_testing_shared target
+ find_package(ArrowTesting REQUIRED)
+
+ add_custom_target(all-tests)
+
+ add_library(arrow_python_test_main STATIC util/test_main.cc)
+
+ target_link_libraries(arrow_python_test_main GTest::gtest)
+ target_include_directories(arrow_python_test_main SYSTEM
+ PUBLIC ${ARROW_PYTHON_INCLUDES})
+
+ # Link libraries to avoid include error on Linux
+ if(ARROW_TEST_LINKAGE STREQUAL shared)
+ target_link_libraries(arrow_python_test_main arrow_shared)
+ else()
+ target_link_libraries(arrow_python_test_main arrow_static)
+ endif()
+
+ if(APPLE)
+ target_link_libraries(arrow_python_test_main ${CMAKE_DL_LIBS})
+ set_target_properties(arrow_python_test_main PROPERTIES LINK_FLAGS
+ "-undefined
dynamic_lookup")
+ elseif(NOT MSVC)
+ target_link_libraries(arrow_python_test_main pthread ${CMAKE_DL_LIBS})
+ endif()
+
+ if(ARROW_TEST_LINKAGE STREQUAL shared)
+ set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main
arrow_python_shared
+ arrow_testing_shared arrow_shared)
+ else()
+ set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main
arrow_python_static
+ arrow_testing_static arrow_static)
+ endif()
+
+ #
+ # Add a test case
+ #
+
+ set(REL_TEST_NAME "python_test")
+ get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
+ set(TEST_NAME "arrow-${TEST_NAME}")
+ set(SOURCES "${REL_TEST_NAME}.cc")
+
+ # # Make sure the executable name contains only hyphens, not underscores
+ string(REPLACE "_" "-" TEST_NAME ${TEST_NAME})
+
+ set(TEST_PATH "${CMAKE_BINARY_DIR}/${TEST_NAME}")
+ add_executable(${TEST_NAME} ${SOURCES})
+
+ # We need to set the correct RPATH so that dependencies
+ set_target_properties(${TEST_NAME}
+ PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
+ INSTALL_RPATH_USE_LINK_PATH TRUE
+ INSTALL_RPATH
+
"${PYTHON_SOURCE_DIR}/pyarrow;$ENV{CONDA_PREFIX}/lib")
+
+ # Customize link libraries
+ target_link_libraries(${TEST_NAME} PRIVATE
"${ARROW_PYTHON_TEST_LINK_LIBS}")
+ # Extra link libs
+ target_link_libraries(${TEST_NAME} PRIVATE ${PYTHON_LIBRARIES})
+ # Extra includes
+ target_include_directories(${TEST_NAME} SYSTEM PUBLIC
"${ARROW_PYTHON_INCLUDES}")
+
+ # Add the test
+ if(ARROW_TEST_MEMCHECK)
Review Comment:
I don't think we want to keep this specific part.
##########
ci/scripts/python_test.sh:
##########
@@ -20,6 +20,7 @@
set -ex
arrow_dir=${1}
+test_dir=${1}/python/build/dist/temp
Review Comment:
Where is this "temp" directory coming from? Can we name it something else
(e.g. "cpp_tests")?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]