kou commented on code in PR #37821:
URL: https://github.com/apache/arrow/pull/37821#discussion_r1449695994
##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -1349,6 +1366,13 @@ macro(build_snappy)
set(SNAPPY_PATCH_COMMAND)
endif()
+ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # ignore linker flag errors, as snappy sets
Review Comment:
```suggestion
# ignore linker flag errors, as Snappy sets
```
##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -702,3 +730,36 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}
${MSVC_LINKER_FLAGS}")
endif()
endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # flags are:
+ # 1) We force *everything* to build as position independent
+ # 2) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions")
+ # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
Review Comment:
We don't do this here now:
```suggestion
```
##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -702,3 +730,36 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}
${MSVC_LINKER_FLAGS}")
endif()
endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # flags are:
+ # 1) We force *everything* to build as position independent
+ # 2) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions")
+ # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+ # deprecated-literal-operator error is thrown in datetime (vendored lib in
arrow)
+ set(CMAKE_CXX_FLAGS
+ "${CMAKE_CXX_FLAGS} -fPIC -fexceptions
-Wno-error=deprecated-literal-operator")
+
+ # flags for creating shared libraries (only used in pyarrow, because
+ # Emscripten builds libarrow as static)
+ # flags are:
+ # 1) Tell it to use JavaScript / WebAssembly 64 bit number support.
+ # 2) Tell it to build with support for C++ exceptions
+ # 3) Skip linker flags error which happens with -soname parameter
+ set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions
-Wno-error=linkflags")
+ set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
+ "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS
+ "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ if(ARROW_TESTING)
+ # flags for building test executables for use in node
+ set(CMAKE_EXE_LINKER_FLAGS
+ "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js
-lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
Review Comment:
```suggestion
"${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js
-lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
```
##########
cpp/src/arrow/util/rle_encoding_test.cc:
##########
@@ -227,10 +234,9 @@ void ValidateRle(const std::vector<int>& values, int
bit_width,
if (expected_len != -1) {
EXPECT_EQ(encoded_len, expected_len);
}
- if (expected_encoding != NULL) {
+ if (expected_encoding != NULL && encoded_len == expected_len) {
Review Comment:
OK. Let's change the above `EXPECT_EQ()` to `ASSERT_EQ()` and remove the
added condition.
##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -702,3 +730,36 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}
${MSVC_LINKER_FLAGS}")
endif()
endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # flags are:
+ # 1) We force *everything* to build as position independent
+ # 2) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions")
+ # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+ # deprecated-literal-operator error is thrown in datetime (vendored lib in
arrow)
+ set(CMAKE_CXX_FLAGS
+ "${CMAKE_CXX_FLAGS} -fPIC -fexceptions
-Wno-error=deprecated-literal-operator")
+
+ # flags for creating shared libraries (only used in pyarrow, because
+ # Emscripten builds libarrow as static)
+ # flags are:
+ # 1) Tell it to use JavaScript / WebAssembly 64 bit number support.
+ # 2) Tell it to build with support for C++ exceptions
+ # 3) Skip linker flags error which happens with -soname parameter
+ set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions
-Wno-error=linkflags")
+ set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
+ "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS
+ "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ if(ARROW_TESTING)
+ # flags for building test executables for use in node
+ set(CMAKE_EXE_LINKER_FLAGS
+ "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js
-lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
+ )
+ else()
+ set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sALLOW_MEMORY_GROWTH")
Review Comment:
```suggestion
set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sALLOW_MEMORY_GROWTH")
```
##########
cpp/src/arrow/util/mutex.cc:
##########
@@ -35,9 +36,12 @@ struct Mutex::Impl {
Mutex::Guard::Guard(Mutex* locked)
: locked_(locked, [](Mutex* locked) {
+#if !defined(__EMSCRIPTEN__) || defined(ARROW_ENABLE_THREADING)
Review Comment:
Is this `__EMSCRIPTEN__` check redundant?
```suggestion
#ifdef ARROW_ENABLE_THREADING
```
##########
cpp/src/arrow/util/rle_encoding_test.cc:
##########
@@ -214,7 +214,14 @@ TEST(BitUtil, RoundTripIntValues) {
void ValidateRle(const std::vector<int>& values, int bit_width,
uint8_t* expected_encoding, int expected_len) {
const int len = 64 * 1024;
+#ifdef __EMSCRIPTEN__
+ // don't make this on the stack as it is
+ // too big for emscripten
+ std::vector<uint8_t> buffer_vec((size_t)len);
Review Comment:
```suggestion
std::vector<uint8_t> buffer_vec(static_cast<size_t>(len));
```
##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -702,3 +730,36 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}
${MSVC_LINKER_FLAGS}")
endif()
endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # flags are:
+ # 1) We force *everything* to build as position independent
+ # 2) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions")
+ # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+ # deprecated-literal-operator error is thrown in datetime (vendored lib in
arrow)
+ set(CMAKE_CXX_FLAGS
+ "${CMAKE_CXX_FLAGS} -fPIC -fexceptions
-Wno-error=deprecated-literal-operator")
+
+ # flags for creating shared libraries (only used in pyarrow, because
+ # Emscripten builds libarrow as static)
+ # flags are:
+ # 1) Tell it to use JavaScript / WebAssembly 64 bit number support.
+ # 2) Tell it to build with support for C++ exceptions
+ # 3) Skip linker flags error which happens with -soname parameter
+ set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions
-Wno-error=linkflags")
+ set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
+ "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS
+ "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ if(ARROW_TESTING)
+ # flags for building test executables for use in node
+ set(CMAKE_EXE_LINKER_FLAGS
+ "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js
-lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
+ )
+ else()
+ set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sALLOW_MEMORY_GROWTH")
+ endif()
+
Review Comment:
```suggestion
```
##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -1349,6 +1366,13 @@ macro(build_snappy)
set(SNAPPY_PATCH_COMMAND)
endif()
+ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # ignore linker flag errors, as snappy sets
+ # -Werror -Wall, and emscripten doesn't support -soname
Review Comment:
```suggestion
# -Werror -Wall, and Emscripten doesn't support -soname
```
##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -1349,6 +1366,13 @@ macro(build_snappy)
set(SNAPPY_PATCH_COMMAND)
endif()
+ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # ignore linker flag errors, as snappy sets
+ # -Werror -Wall, and emscripten doesn't support -soname
+ list(APPEND SNAPPY_CMAKE_ARGS
+ "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}
-Wno-error=linkflags")
Review Comment:
```suggestion
"-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}"
"-Wno-error=linkflags")
```
##########
cpp/src/arrow/util/rle_encoding_test.cc:
##########
@@ -256,7 +263,14 @@ void ValidateRle(const std::vector<int>& values, int
bit_width,
// the returned values are not all the same
bool CheckRoundTrip(const std::vector<int>& values, int bit_width) {
const int len = 64 * 1024;
+#ifdef __EMSCRIPTEN__
+ // don't make this on the stack as it is
+ // too big for emscripten
+ std::vector<uint8_t> buffer_vec((size_t)len);
Review Comment:
```suggestion
std::vector<uint8_t> buffer_vec(static_cast<size_t>(len));
```
##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -1798,6 +1859,38 @@ macro(build_protobuf)
add_dependencies(arrow::protobuf::protoc protobuf_ep)
list(APPEND ARROW_BUNDLED_STATIC_LIBS arrow::protobuf::libprotobuf)
+
+ if(CMAKE_CROSSCOMPILING)
+ # If we are cross compiling, we need to build protoc for the host
+ # system also, as it is used when building Arrow
+ # We do this by calling CMake as a child process
+ # with CXXFLAGS / CFLAGS and CMake flags cleared.
+ set(PROTOBUF_HOST_PREFIX
"${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install")
+ set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc")
+
+ set(PROTOBUF_HOST_CMAKE_ARGS
+ "-DCMAKE_CXX_FLAGS="
+ "-DCMAKE_C_FLAGS="
+ "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}"
+ -Dprotobuf_BUILD_TESTS=OFF
+ -Dprotobuf_DEBUG_POSTFIX=)
+
+ externalproject_add(protobuf_ep_host
+ ${EP_COMMON_OPTIONS}
+ CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS}
+ BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}"
+ BUILD_IN_SOURCE 1
+ URL ${PROTOBUF_SOURCE_URL}
+ URL_HASH
"SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}")
+
+ add_executable(arrow::protobuf::host_protoc IMPORTED)
+ set_target_properties(arrow::protobuf::host_protoc
+ PROPERTIES IMPORTED_LOCATION
"${PROTOBUF_HOST_COMPILER}")
+
+ add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host)
+
+ endif()
+
Review Comment:
```suggestion
```
##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -1798,6 +1859,38 @@ macro(build_protobuf)
add_dependencies(arrow::protobuf::protoc protobuf_ep)
list(APPEND ARROW_BUNDLED_STATIC_LIBS arrow::protobuf::libprotobuf)
+
+ if(CMAKE_CROSSCOMPILING)
+ # If we are cross compiling, we need to build protoc for the host
+ # system also, as it is used when building Arrow
+ # We do this by calling CMake as a child process
+ # with CXXFLAGS / CFLAGS and CMake flags cleared.
+ set(PROTOBUF_HOST_PREFIX
"${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install")
+ set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc")
+
+ set(PROTOBUF_HOST_CMAKE_ARGS
+ "-DCMAKE_CXX_FLAGS="
+ "-DCMAKE_C_FLAGS="
+ "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}"
+ -Dprotobuf_BUILD_TESTS=OFF
+ -Dprotobuf_DEBUG_POSTFIX=)
+
+ externalproject_add(protobuf_ep_host
+ ${EP_COMMON_OPTIONS}
+ CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS}
+ BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}"
+ BUILD_IN_SOURCE 1
+ URL ${PROTOBUF_SOURCE_URL}
+ URL_HASH
"SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}")
+
+ add_executable(arrow::protobuf::host_protoc IMPORTED)
+ set_target_properties(arrow::protobuf::host_protoc
+ PROPERTIES IMPORTED_LOCATION
"${PROTOBUF_HOST_COMPILER}")
+
+ add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host)
+
Review Comment:
```suggestion
```
##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -702,3 +714,47 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}
${MSVC_LINKER_FLAGS}")
endif()
endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ # flags are:
+ # 1) We're using zlib from Emscripten ports
+ # 2) We are building library code
+ # 3) We force *everything* to build as position independent
+ # 4) And with support for C++ exceptions
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions")
+ # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+ # deprecated-literal-operator error is thrown in datetime (vendored lib in
arrow)
+ set(CMAKE_CXX_FLAGS
+ "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"
+ )
+
+ # flags for creating shared libraries (only used in pyarrow, because
+ # Emscripten builds libarrow as static)
+ # flags are:
+ # 1) Tell it to use zlib from Emscripten ports
+ # 2) Tell it to use javascript / webassembly 64 bit number support.
+ # 3) Tell it to build with support for C++ exceptions
+ # 4) Skip linker flags error which happens with -soname parameter
+ set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions
-Wno-error=linkflags")
+ set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
+ "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS
+ "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+ if(ARROW_TESTING)
+ # flags for building test executables for use in node
+ set(CMAKE_EXE_LINKER_FLAGS
+ "${ARROW_EMSCRIPTEN_LINKER_FLAGS}
-sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js
-lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
Review Comment:
Ah, thanks. I understand.
I thought that we can set `ARROW_TEST_DATA`/`PARQUET_TEST_DATA` by passing
something (environment variables or command line arguments) to test
executables. But we can't do this without changing `*-test.js` by passing
`--pre-js ...`.
##########
cpp/cmake_modules/BuildUtils.cmake:
##########
@@ -742,6 +742,14 @@ function(ADD_TEST_CASE REL_TEST_NAME)
--error-exitcode=1 ${TEST_PATH} ${ARG_TEST_ARGUMENTS}")
elseif(WIN32)
add_test(${TEST_NAME} ${TEST_PATH} ${ARG_TEST_ARGUMENTS})
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ add_test(${TEST_NAME}
+ ${BUILD_SUPPORT_DIR}/run-test.sh
+ ${CMAKE_BINARY_DIR}
+ test
+ ${CMAKE_CROSSCOMPILING_EMULATOR}
+ ${TEST_PATH}
+ ${ARG_TEST_ARGUMENTS})
Review Comment:
`run-test.sh` is for:
* formatting sanitizer logs
* printing backtrace on crash (but I think that this doesn't work well...)
I think that both of them are needless for Emscripten build.
##########
docs/source/developers/cpp/emscripten.rst:
##########
@@ -0,0 +1,100 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+
+.. highlight:: console
+
+.. _developers-cpp-emscripten:
+#################################################
+Cross compiling for WebAssembly with Emscripten
+#################################################
Review Comment:
It seems that we use different marker character (e.g. `#` for top level) for
other documents such as
https://raw.githubusercontent.com/apache/arrow/main/docs/source/developers/cpp/building.rst
(e.g. `=` for top level) in the same directory.
Could you use the same style?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]