kou commented on code in PR #37821:
URL: https://github.com/apache/arrow/pull/37821#discussion_r1363454803
##########
ci/scripts/cpp_build.sh:
##########
@@ -91,111 +91,139 @@ esac
mkdir -p ${build_dir}
pushd ${build_dir}
-cmake \
- -Dabsl_SOURCE=${absl_SOURCE:-} \
- -DARROW_ACERO=${ARROW_ACERO:-OFF} \
- -DARROW_AZURE=${ARROW_AZURE:-OFF} \
- -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \
- -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \
- -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \
- -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \
- -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \
- -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
- -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
- -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
- -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
- -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \
- -DARROW_CSV=${ARROW_CSV:-ON} \
- -DARROW_CUDA=${ARROW_CUDA:-OFF} \
- -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
- -DARROW_CXX_FLAGS_DEBUG="${ARROW_CXX_FLAGS_DEBUG:-}" \
- -DARROW_CXX_FLAGS_RELEASE="${ARROW_CXX_FLAGS_RELEASE:-}" \
- -DARROW_CXX_FLAGS_RELWITHDEBINFO="${ARROW_CXX_FLAGS_RELWITHDEBINFO:-}" \
- -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \
- -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \
- -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \
- -DARROW_DATASET=${ARROW_DATASET:-OFF} \
- -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
- -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \
- -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
- -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
- -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \
- -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \
- -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL:-OFF} \
- -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \
- -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \
- -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \
- -DARROW_GCS=${ARROW_GCS:-OFF} \
- -DARROW_HDFS=${ARROW_HDFS:-ON} \
- -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
- -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
- -DARROW_JSON=${ARROW_JSON:-ON} \
- -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
- -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
- -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \
- -DARROW_ORC=${ARROW_ORC:-OFF} \
- -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
- -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
- -DARROW_S3=${ARROW_S3:-OFF} \
- -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \
- -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \
- -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \
- -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
- -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \
- -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
- -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \
- -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \
- -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \
- -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \
- -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \
- -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \
- -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \
- -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \
- -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \
- -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \
- -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \
- -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \
- -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \
- -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \
- -DARROW_WITH_UCX=${ARROW_WITH_UCX:-OFF} \
- -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
- -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \
- -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \
- -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
- -DAzure_SOURCE=${Azure_SOURCE:-} \
- -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \
- -DBOOST_SOURCE=${BOOST_SOURCE:-} \
- -DBrotli_SOURCE=${Brotli_SOURCE:-} \
- -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \
- -Dc-ares_SOURCE=${cares_SOURCE:-} \
- -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
- -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
- -DCMAKE_C_FLAGS="${CFLAGS:-}" \
- -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
- -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \
- -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
- -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
- -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
- -Dgflags_SOURCE=${gflags_SOURCE:-} \
- -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \
- -DgRPC_SOURCE=${gRPC_SOURCE:-} \
- -DGTest_SOURCE=${GTest_SOURCE:-} \
- -Dlz4_SOURCE=${lz4_SOURCE:-} \
- -DORC_SOURCE=${ORC_SOURCE:-} \
- -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \
- -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \
- -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \
- -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \
- -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \
- -Dre2_SOURCE=${re2_SOURCE:-} \
- -DSnappy_SOURCE=${Snappy_SOURCE:-} \
- -DThrift_SOURCE=${Thrift_SOURCE:-} \
- -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \
- -Dzstd_SOURCE=${zstd_SOURCE:-} \
- -Dxsimd_SOURCE=${xsimd_SOURCE:-} \
- -G "${CMAKE_GENERATOR:-Ninja}" \
- ${ARROW_CMAKE_ARGS} \
- ${source_dir}
+if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
+ if [ "${ARROW_BUILD_TYPE:-debug}" = "debug" ]; then
+ echo "Forcing non-parallel build for emscripten debug"
+ export CMAKE_BUILD_PARALLEL_LEVEL=1
+ # emscripten debug linking takes *tons* of memory
+ # https://github.com/WebAssembly/binaryen/issues/4261
+ # so stop parallel builds for debug build
+ # or else crossbow CI runs out of memory
+ fi
+
Review Comment:
Do we still need this?
Does `-g2` solve the problem?
##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -707,3 +709,44 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}
${MSVC_LINKER_FLAGS}")
endif()
endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # flags are:
+ # 1) We're using zlib from Emscripten ports
+ # 2) We are building library code
+ # 3) We force *everything* to build as position independent
+ # 4) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions")
+ # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+ # deprecated-literal-operator error is thrown in datetime (vendored lib in
arrow)
+ set(CMAKE_CXX_FLAGS
+ "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"
Review Comment:
Could you remove `-Wno-error=shorten-64-to-32` and use the following instead
of it?
```diff
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake
b/cpp/cmake_modules/SetupCxxFlags.cmake
index 43f905239..338bdfade 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -319,7 +319,12 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation")
- set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32")
+ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # size_t is 32 bit in Emscripten wasm32 - ignore conversion errors
+ set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-shorten-64-to-32")
+ else()
+ set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32")
+ endif()
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}
-Wno-constant-logical-operand")
```
##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2159,13 +2162,17 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) {
}
TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) {
+#ifdef EMSCRIPTEN
+ GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default";
+#else
auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent");
const char* seconds = R"(["1970-01-01T00:00:59", null])";
auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), seconds);
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
testing::HasSubstr("Cannot find locale
'non-existent'"),
Strftime(arr, options));
+#endif
Review Comment:
```suggestion
```
##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -1906,6 +1919,7 @@ TYPED_TEST(TestStringKernels, StrptimeZoneOffset) {
StrptimeOptions options2("%Y-%m-%dT%H:%M%z", TimeUnit::MICRO,
/*error_is_null=*/true);
this->CheckUnary("strptime", input2, timestamp(TimeUnit::MICRO, "UTC"),
output,
&options2);
+#endif
Review Comment:
```suggestion
```
##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -700,3 +702,30 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}
${MSVC_LINKER_FLAGS}")
endif()
endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # flags are:
+ # 1) We're using zlib from Emscripten ports
+ # 2) We are building library code
+ # 3) We force *everything* to build as position independent
+ # 4) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions")
+ # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+ # deprecated-literal-operator error is thrown in datetime (vendored lib in
arrow)
+ set(CMAKE_CXX_FLAGS
+ "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"
Review Comment:
Could you move `-sUSE_ZLIB=1` to `build_zlib` because it's zlib related
flags?
```diff
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake
b/cpp/cmake_modules/SetupCxxFlags.cmake
index 43f905239..d0af614f0 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -712,26 +712,24 @@ endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
# flags are:
- # 1) We're using zlib from Emscripten ports
- # 2) We are building library code
- # 3) We force *everything* to build as position independent
- # 4) And with support for C++ exceptions
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions")
+ # 1) We force *everything* to build as position independent
+ # 2) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions")
# size_t is 32 bit in emscripten wasm32 - ignore conversion errors
# deprecated-literal-operator error is thrown in datetime (vendored lib
in arrow)
set(CMAKE_CXX_FLAGS
- "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"
+ "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=shorten-64-to-32
-Wno-error=deprecated-literal-operator"
)
# flags for creating shared libraries (only used in pyarrow, because
# Emscripten builds libarrow as static)
# flags are:
- # 1) Tell it to use zlib from Emscripten ports
- # 2) Tell it to use javascript / webassembly 64 bit number support.
- # 3) Tell it to build with support for C++ exceptions
- # 4) Skip linker flags error which happens with -soname parameter
+ # 1) Tell it to use javascript / webassembly 64 bit number support.
+ # 2) Tell it to build with support for C++ exceptions
+ # 3) Skip linker flags error which happens with -soname parameter
set(ARROW_EMSCRIPTEN_LINKER_FLAGS
- "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags")
+ "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags")
+ # -sSIDE_MODULE=1: We are building library code
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index f71f09cbe..a404f3837 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2511,16 +2511,18 @@ macro(build_zlib)
if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a)
execute_process(COMMAND embuilder --pic --force build zlib)
endif()
- set(ZLIB_STATIC_LIB
${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a)
- set(ZLIB_LIBRARIES ${ZLIB_LIBRARY})
- # set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include")
-
- add_library(ZLIB::ZLIB STATIC IMPORTED)
- set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB})
- set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include")
- set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION
${ZLIB_LIBRARIES})
- # target_include_directories(ZLIB::ZLIB BEFORE INTERFACE
"${ZLIB_INCLUDE_DIRS}")
-
+ add_library(ZLIB::ZLIB INTERFACE IMPORTED)
+ # We need -sMAIN_MODULE=1 (for executable) or -sSIDE_MODULE=1 (for
+ # library) in target_compile_options() too. If we don't have
+ # -sMAIN_MODULE=1 nor -sSIDE_MODULE=1 here, Emscripten tries
+ # finding no-PIC libz.a. We can use -sRELOCATABLE=1 for this but
+ # it seems that RELOCATABLE isn't public setting. Emscripten
+ # document doesn't mention RELOCATABLE. (MAIN_MODULE/SIDE_MODULE
+ # are mentioned.)
+ target_compile_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1
+
"$<IF:$<STREQUAL:$<TARGET_PROPERTY:TYPE>,EXECUTABLE>,-sMAIN_MODULE=1,-sSIDE_MODULE=1>")
+ target_link_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1
+
"$<IF:$<STREQUAL:$<TARGET_PROPERTY:TYPE>,EXECUTABLE>,-sMAIN_MODULE=1,-sSIDE_MODULE=1>")
else()
set(ZLIB_PREFIX
"${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
if(MSVC)
@@ -2552,9 +2554,9 @@ macro(build_zlib)
add_dependencies(toolchain zlib_ep)
add_dependencies(ZLIB::ZLIB zlib_ep)
+ list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
endif()
- list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
set(ZLIB_VENDORED TRUE)
endmacro()
```
##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -2141,8 +2237,15 @@ function(build_gtest)
if(APPLE)
string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-value" "
-Wno-ignored-attributes")
endif()
- set(BUILD_SHARED_LIBS ON)
- set(BUILD_STATIC_LIBS OFF)
+ # If we're building static libs for Emscripten, we need to build
*everything* as
+ # static libs.
+ if(NOT (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") OR ARROW_BUILD_SHARED)
+ set(BUILD_SHARED_LIBS ON)
+ set(BUILD_STATIC_LIBS OFF)
+ else()
+ set(BUILD_SHARED_LIBS OFF)
+ set(BUILD_STATIC_LIBS ON)
+ endif()
Review Comment:
We don't need `ARROW_BUILD_SHARED` here:
```suggestion
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(BUILD_SHARED_LIBS OFF)
set(BUILD_STATIC_LIBS ON)
else()
set(BUILD_SHARED_LIBS ON)
set(BUILD_STATIC_LIBS OFF)
endif()
```
##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -1878,17 +1879,29 @@ TYPED_TEST(TestStringKernels, Strptime) {
this->CheckUnary("strptime", input4, unit, output4, &options);
options.format = "%m/%d/%Y %%z";
- this->CheckUnary("strptime", input5, unit, output1, &options);
+ #ifndef EMSCRIPTEN
+ // emscripten bug
https://github.com/emscripten-core/emscripten/issues/20466
+ this->CheckUnary("strptime", input5, unit, output1, &options);
- options.error_is_null = false;
- this->CheckUnary("strptime", input5, unit, output1, &options);
+ options.error_is_null = false;
+ this->CheckUnary("strptime", input5, unit, output1, &options);
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid, testing::HasSubstr("Invalid: Failed to parse string:
'5/1/2020'"),
Strptime(ArrayFromJSON(this->type(), input1), options));
+
+ #else
+ GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug
https://github.com/emscripten-core/emscripten/issues/20466";
+ #endif
Review Comment:
Could you put the following at the beginning instead of `#infdef #else
#endif`?
```cpp
#ifdef EMSCRIPTEN
GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug
https://github.com/emscripten-core/emscripten/issues/20466";
#endif
```
##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -707,3 +709,44 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}
${MSVC_LINKER_FLAGS}")
endif()
endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # flags are:
+ # 1) We're using zlib from Emscripten ports
+ # 2) We are building library code
+ # 3) We force *everything* to build as position independent
+ # 4) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions")
+ # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+ # deprecated-literal-operator error is thrown in datetime (vendored lib in
arrow)
+ set(CMAKE_CXX_FLAGS
+ "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"
+ )
+
+ # flags for creating shared libraries (only used in pyarrow, because
+ # Emscripten builds libarrow as static)
+ # flags are:
+ # 1) Tell it to use zlib from Emscripten ports
+ # 2) Tell it to use javascript / webassembly 64 bit number support.
+ # 3) Tell it to build with support for C++ exceptions
+ # 4) Skip linker flags error which happens with -soname parameter
+ set(ARROW_EMSCRIPTEN_LINKER_FLAGS
+ "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags")
+ set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ if(ARROW_TESTING)
+ # flags for building test executables for use in node
+ set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js
${BUILD_SUPPORT_DIR}/emscripten-test-init.js")
+ else()
+ set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sALLOW_MEMORY_GROWTH")
+ endif()
+
+ # limit debug info because building with DWARF debug info requires
+ # absolutely tons of memory
+ # https://github.com/WebAssembly/binaryen/issues/4261
+ if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
+ string(APPEND CMAKE_CXX_FLAGS_DEBUG " -g2")
+ endif()
+
Review Comment:
Could you remove this and use the following instead?
```diff
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake
b/cpp/cmake_modules/SetupCxxFlags.cmake
index 43f905239..7ebec131c 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -674,7 +677,12 @@ if(NOT MSVC)
if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O")
string(APPEND CXX_DEBUG_FLAGS " -O0")
endif()
- if(ARROW_GGDB_DEBUG)
+ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ string(APPEND C_DEBUG_FLAGS " -g2")
+ string(APPEND CXX_DEBUG_FLAGS " -g2")
+ string(APPEND C_RELWITHDEBINFO_FLAGS " -g2")
+ string(APPEND CXX_RELWITHDEBINFO_FLAGS " -g2")
+ elseif(ARROW_GGDB_DEBUG)
string(APPEND C_DEBUG_FLAGS " -ggdb")
string(APPEND CXX_DEBUG_FLAGS " -ggdb")
string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb")
```
##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -1878,17 +1879,29 @@ TYPED_TEST(TestStringKernels, Strptime) {
this->CheckUnary("strptime", input4, unit, output4, &options);
options.format = "%m/%d/%Y %%z";
- this->CheckUnary("strptime", input5, unit, output1, &options);
+ #ifndef EMSCRIPTEN
+ // emscripten bug
https://github.com/emscripten-core/emscripten/issues/20466
+ this->CheckUnary("strptime", input5, unit, output1, &options);
- options.error_is_null = false;
- this->CheckUnary("strptime", input5, unit, output1, &options);
+ options.error_is_null = false;
+ this->CheckUnary("strptime", input5, unit, output1, &options);
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid, testing::HasSubstr("Invalid: Failed to parse string:
'5/1/2020'"),
Strptime(ArrayFromJSON(this->type(), input1), options));
+
+ #else
+ GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug
https://github.com/emscripten-core/emscripten/issues/20466";
+ #endif
+
+
}
TYPED_TEST(TestStringKernels, StrptimeZoneOffset) {
+#ifdef EMSCRIPTEN
+ GTEST_SKIP() << "Emscripten bug
https://github.com/emscripten-core/emscripten/issues/20467 ";
+#else
Review Comment:
We don't need `#else`.
```suggestion
#endif
```
##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2159,13 +2162,17 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) {
}
TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) {
+#ifdef EMSCRIPTEN
+ GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default";
+#else
Review Comment:
We don't need `#else`.
```suggestion
#endif
```
##########
cpp/src/arrow/io/file_test.cc:
##########
@@ -991,6 +1001,8 @@ TEST_F(TestMemoryMappedFile,
LARGE_MEMORY_TEST(ReadWriteOver4GbFile)) {
}
TEST_F(TestMemoryMappedFile, RetainMemoryMapReference) {
+
+
Review Comment:
Could you revert needless changes?
```suggestion
```
##########
ci/docker/ubuntu-22.04-cpp.dockerfile:
##########
@@ -114,10 +115,20 @@ RUN apt-get update -y -q && \
rapidjson-dev \
rsync \
tzdata \
- wget && \
+ wget \
+ xz-utils && \
Review Comment:
```suggestion
xz-utils && \
```
##########
ci/docker/ubuntu-22.04-cpp.dockerfile:
##########
@@ -65,10 +65,11 @@ RUN latest_system_llvm=14 && \
RUN apt-get update -y -q && \
apt-get install -y -q --no-install-recommends \
autoconf \
+ bzip2 \
ca-certificates \
ccache \
cmake \
- curl \
+ curl \
Review Comment:
```suggestion
curl \
```
##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -1395,16 +1420,49 @@ macro(build_brotli)
)
set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}")
- externalproject_add(brotli_ep
- ${EP_COMMON_OPTIONS}
- URL ${BROTLI_SOURCE_URL}
- URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
- BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
- "${BROTLI_STATIC_LIBRARY_DEC}"
- "${BROTLI_STATIC_LIBRARY_COMMON}"
- ${BROTLI_BUILD_BYPRODUCTS}
- CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
- STEP_TARGETS headers_copy)
+ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # cmake install is disabled for brotli on emscripten, so we have
+ # to manually copy the libraries to our install directory
+ set(BROTLI_BUILD_DIR
${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build)
+ set(BROTLI_BUILD_LIBS
+
"${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+
"${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+
"${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+ )
+
+ set(BROTLI_BUILD_INCLUDE_DIR
+
${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli)
+
+ externalproject_add(brotli_ep
+ ${EP_COMMON_OPTIONS}
+ URL ${BROTLI_SOURCE_URL}
+ URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
+ BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
+ "${BROTLI_STATIC_LIBRARY_DEC}"
+ "${BROTLI_STATIC_LIBRARY_COMMON}"
+ ${BROTLI_BUILD_BYPRODUCTS}
+ CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
+ STEP_TARGETS headers_copy
+ INSTALL_COMMAND "")
+ add_custom_command(TARGET brotli_ep
+ POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+
${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}*${CMAKE_STATIC_LIBRARY_SUFFIX}
+ ${BROTLI_LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy_directory
+ ${BROTLI_BUILD_INCLUDE_DIR}
${BROTLI_INCLUDE_DIR}/brotli)
+ else() # not emscripten - just behave as normal
+ externalproject_add(brotli_ep
+ ${EP_COMMON_OPTIONS}
+ URL ${BROTLI_SOURCE_URL}
+ URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
+ BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
+ "${BROTLI_STATIC_LIBRARY_DEC}"
+ "${BROTLI_STATIC_LIBRARY_COMMON}"
+ ${BROTLI_BUILD_BYPRODUCTS}
+ CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
+ STEP_TARGETS headers_copy)
+ endif()
Review Comment:
Could you reuse `externalproject_add()` call?
```diff
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index f71f09cbe..f51ec56fc 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1420,9 +1420,26 @@ macro(build_brotli)
)
set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}")
+ set(BROTLI_EP_OPTIONS)
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
- # cmake install is disabled for brotli on emscripten, so we have
- # to manually copy the libraries to our install directory
+ # "cmake install" is disabled for Brotli on Emscripten, so the
+ # default INSTALL_COMMAND is failed. We need to disable the default
+ # INSTALL_COMMAND.
+ list(APPEND BROTLI_EP_OPTIONS INSTALL_COMMAND ${CMAKE_COMMAND} -E true)
+ endif()
+ externalproject_add(brotli_ep
+ ${EP_COMMON_OPTIONS}
+ URL ${BROTLI_SOURCE_URL}
+ URL_HASH
"SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
+ BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
+ "${BROTLI_STATIC_LIBRARY_DEC}"
+ "${BROTLI_STATIC_LIBRARY_COMMON}"
+ ${BROTLI_BUILD_BYPRODUCTS}
+ CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
+ STEP_TARGETS headers_copy
+ ${BROTLI_EP_OPTIONS})
+ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # Copy the libraries to our install directory manually.
set(BROTLI_BUILD_DIR
${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build)
set(BROTLI_BUILD_LIBS
"${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
@@ -1432,18 +1449,6 @@ macro(build_brotli)
set(BROTLI_BUILD_INCLUDE_DIR
${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli)
-
- externalproject_add(brotli_ep
- ${EP_COMMON_OPTIONS}
- URL ${BROTLI_SOURCE_URL}
- URL_HASH
"SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
- BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
- "${BROTLI_STATIC_LIBRARY_DEC}"
- "${BROTLI_STATIC_LIBRARY_COMMON}"
- ${BROTLI_BUILD_BYPRODUCTS}
- CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
- STEP_TARGETS headers_copy
- INSTALL_COMMAND "")
add_custom_command(TARGET brotli_ep
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different
@@ -1451,17 +1456,6 @@ macro(build_brotli)
${BROTLI_LIB_DIR}
COMMAND ${CMAKE_COMMAND} -E copy_directory
${BROTLI_BUILD_INCLUDE_DIR}
${BROTLI_INCLUDE_DIR}/brotli)
- else() # not emscripten - just behave as normal
- externalproject_add(brotli_ep
- ${EP_COMMON_OPTIONS}
- URL ${BROTLI_SOURCE_URL}
- URL_HASH
"SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
- BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
- "${BROTLI_STATIC_LIBRARY_DEC}"
- "${BROTLI_STATIC_LIBRARY_COMMON}"
- ${BROTLI_BUILD_BYPRODUCTS}
- CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
- STEP_TARGETS headers_copy)
endif()
add_dependencies(toolchain brotli_ep)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]