kou commented on code in PR #37821:
URL: https://github.com/apache/arrow/pull/37821#discussion_r1363454803


##########
ci/scripts/cpp_build.sh:
##########
@@ -91,111 +91,139 @@ esac
 mkdir -p ${build_dir}
 pushd ${build_dir}
 
-cmake \
-  -Dabsl_SOURCE=${absl_SOURCE:-} \
-  -DARROW_ACERO=${ARROW_ACERO:-OFF} \
-  -DARROW_AZURE=${ARROW_AZURE:-OFF} \
-  -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \
-  -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \
-  -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \
-  -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \
-  -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \
-  -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
-  -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
-  -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
-  -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
-  -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \
-  -DARROW_CSV=${ARROW_CSV:-ON} \
-  -DARROW_CUDA=${ARROW_CUDA:-OFF} \
-  -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
-  -DARROW_CXX_FLAGS_DEBUG="${ARROW_CXX_FLAGS_DEBUG:-}" \
-  -DARROW_CXX_FLAGS_RELEASE="${ARROW_CXX_FLAGS_RELEASE:-}" \
-  -DARROW_CXX_FLAGS_RELWITHDEBINFO="${ARROW_CXX_FLAGS_RELWITHDEBINFO:-}" \
-  -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \
-  -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \
-  -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \
-  -DARROW_DATASET=${ARROW_DATASET:-OFF} \
-  -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
-  -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \
-  -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
-  -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
-  -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \
-  -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \
-  -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL:-OFF} \
-  -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \
-  -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \
-  -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \
-  -DARROW_GCS=${ARROW_GCS:-OFF} \
-  -DARROW_HDFS=${ARROW_HDFS:-ON} \
-  -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
-  -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
-  -DARROW_JSON=${ARROW_JSON:-ON} \
-  -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
-  -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
-  -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \
-  -DARROW_ORC=${ARROW_ORC:-OFF} \
-  -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
-  -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
-  -DARROW_S3=${ARROW_S3:-OFF} \
-  -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \
-  -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \
-  -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \
-  -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
-  -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \
-  -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
-  -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \
-  -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \
-  -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \
-  -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \
-  -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \
-  -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \
-  -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \
-  -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \
-  -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \
-  -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \
-  -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \
-  -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \
-  -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \
-  -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \
-  -DARROW_WITH_UCX=${ARROW_WITH_UCX:-OFF} \
-  -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
-  -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \
-  -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \
-  -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
-  -DAzure_SOURCE=${Azure_SOURCE:-} \
-  -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \
-  -DBOOST_SOURCE=${BOOST_SOURCE:-} \
-  -DBrotli_SOURCE=${Brotli_SOURCE:-} \
-  -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \
-  -Dc-ares_SOURCE=${cares_SOURCE:-} \
-  -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
-  -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
-  -DCMAKE_C_FLAGS="${CFLAGS:-}" \
-  -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
-  -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \
-  -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
-  -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
-  -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
-  -Dgflags_SOURCE=${gflags_SOURCE:-} \
-  -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \
-  -DgRPC_SOURCE=${gRPC_SOURCE:-} \
-  -DGTest_SOURCE=${GTest_SOURCE:-} \
-  -Dlz4_SOURCE=${lz4_SOURCE:-} \
-  -DORC_SOURCE=${ORC_SOURCE:-} \
-  -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \
-  -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \
-  -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \
-  -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \
-  -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \
-  -Dre2_SOURCE=${re2_SOURCE:-} \
-  -DSnappy_SOURCE=${Snappy_SOURCE:-} \
-  -DThrift_SOURCE=${Thrift_SOURCE:-} \
-  -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \
-  -Dzstd_SOURCE=${zstd_SOURCE:-} \
-  -Dxsimd_SOURCE=${xsimd_SOURCE:-} \
-  -G "${CMAKE_GENERATOR:-Ninja}" \
-  ${ARROW_CMAKE_ARGS} \
-  ${source_dir}
+if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
+  if [ "${ARROW_BUILD_TYPE:-debug}" = "debug" ]; then
+    echo "Forcing non-parallel build for emscripten debug"
+    export CMAKE_BUILD_PARALLEL_LEVEL=1 
+    # emscripten debug linking takes *tons* of memory
+    # https://github.com/WebAssembly/binaryen/issues/4261
+    # so stop parallel builds for debug build
+    # or else crossbow CI runs out of memory
+  fi
+  

Review Comment:
   Do we still need this?
   Does `-g2` solve the problem?



##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -707,3 +709,44 @@ if(MSVC)
     set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} 
${MSVC_LINKER_FLAGS}")
   endif()
 endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  # flags are:
+  # 1) We're using zlib from Emscripten ports
+  # 2) We are building library code
+  # 3) We force *everything* to build as position independent
+  # 4) And with support for C++ exceptions
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1  -fPIC -fexceptions")
+  # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+  # deprecated-literal-operator error is thrown in datetime (vendored lib in 
arrow)
+  set(CMAKE_CXX_FLAGS
+      "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions 
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"

Review Comment:
   Could you remove `-Wno-error=shorten-64-to-32` and use the following instead 
of it?
   
   ```diff
   diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake 
b/cpp/cmake_modules/SetupCxxFlags.cmake
   index 43f905239..338bdfade 100644
   --- a/cpp/cmake_modules/SetupCxxFlags.cmake
   +++ b/cpp/cmake_modules/SetupCxxFlags.cmake
   @@ -319,7 +319,12 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
        set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
        set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra")
        set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation")
   -    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32")
   +    if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   +      # size_t is 32 bit in Emscripten wasm32 - ignore conversion errors
   +      set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-shorten-64-to-32")
   +    else()
   +      set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32")
   +    endif()
        set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces")
        set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
        set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} 
-Wno-constant-logical-operand")
   ```



##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2159,13 +2162,17 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) {
 }
 
 TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) {
+#ifdef EMSCRIPTEN
+  GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default";
+#else
   auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent");
   const char* seconds = R"(["1970-01-01T00:00:59", null])";
   auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), seconds);
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
                                   testing::HasSubstr("Cannot find locale 
'non-existent'"),
                                   Strftime(arr, options));
+#endif

Review Comment:
   ```suggestion
   ```



##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -1906,6 +1919,7 @@ TYPED_TEST(TestStringKernels, StrptimeZoneOffset) {
   StrptimeOptions options2("%Y-%m-%dT%H:%M%z", TimeUnit::MICRO, 
/*error_is_null=*/true);
   this->CheckUnary("strptime", input2, timestamp(TimeUnit::MICRO, "UTC"), 
output,
                    &options2);
+#endif                   

Review Comment:
   ```suggestion
   ```



##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -700,3 +702,30 @@ if(MSVC)
     set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} 
${MSVC_LINKER_FLAGS}")
   endif()
 endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  # flags are:
+  # 1) We're using zlib from Emscripten ports
+  # 2) We are building library code
+  # 3) We force *everything* to build as position independent
+  # 4) And with support for C++ exceptions
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1  -fPIC -fexceptions")
+  # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+  # deprecated-literal-operator error is thrown in datetime (vendored lib in 
arrow)
+  set(CMAKE_CXX_FLAGS
+      "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions 
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"

Review Comment:
   Could you move `-sUSE_ZLIB=1` to `build_zlib` because it's zlib related 
flags?
   
   ```diff
   diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake 
b/cpp/cmake_modules/SetupCxxFlags.cmake
   index 43f905239..d0af614f0 100644
   --- a/cpp/cmake_modules/SetupCxxFlags.cmake
   +++ b/cpp/cmake_modules/SetupCxxFlags.cmake
   @@ -712,26 +712,24 @@ endif()
    
    if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
      # flags are:
   -  # 1) We're using zlib from Emscripten ports
   -  # 2) We are building library code
   -  # 3) We force *everything* to build as position independent
   -  # 4) And with support for C++ exceptions
   -  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1  -fPIC -fexceptions")
   +  # 1) We force *everything* to build as position independent
   +  # 2) And with support for C++ exceptions
   +  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions")
      # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
      # deprecated-literal-operator error is thrown in datetime (vendored lib 
in arrow)
      set(CMAKE_CXX_FLAGS
   -      "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions 
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"
   +      "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=shorten-64-to-32 
-Wno-error=deprecated-literal-operator"
      )
    
      # flags for creating shared libraries (only used in pyarrow, because
      # Emscripten builds libarrow as static)
      # flags are:
   -  # 1) Tell it to use zlib from Emscripten ports
   -  # 2) Tell it to use javascript / webassembly 64 bit number support.
   -  # 3) Tell it to build with support for C++ exceptions
   -  # 4) Skip linker flags error which happens with -soname parameter
   +  # 1) Tell it to use javascript / webassembly 64 bit number support.
   +  # 2) Tell it to build with support for C++ exceptions
   +  # 3) Skip linker flags error which happens with -soname parameter
      set(ARROW_EMSCRIPTEN_LINKER_FLAGS
   -      "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags")
   +      "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags")
   +  # -sSIDE_MODULE=1: We are building library code
      set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1 
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
      set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sSIDE_MODULE=1 
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
      set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1 
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
   diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
   index f71f09cbe..a404f3837 100644
   --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
   +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
   @@ -2511,16 +2511,18 @@ macro(build_zlib)
        if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a)
          execute_process(COMMAND embuilder --pic --force build zlib)
        endif()
   -    set(ZLIB_STATIC_LIB 
${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a)
   -    set(ZLIB_LIBRARIES ${ZLIB_LIBRARY})
   -    #    set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include")
   -
   -    add_library(ZLIB::ZLIB STATIC IMPORTED)
   -    set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB})
   -    set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include")
   -    set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION 
${ZLIB_LIBRARIES})
   -    #    target_include_directories(ZLIB::ZLIB BEFORE INTERFACE 
"${ZLIB_INCLUDE_DIRS}")
   -
   +    add_library(ZLIB::ZLIB INTERFACE IMPORTED)
   +    # We need -sMAIN_MODULE=1 (for executable) or -sSIDE_MODULE=1 (for
   +    # library) in target_compile_options() too. If we don't have
   +    # -sMAIN_MODULE=1 nor -sSIDE_MODULE=1 here, Emscripten tries
   +    # finding no-PIC libz.a.  We can use -sRELOCATABLE=1 for this but
   +    # it seems that RELOCATABLE isn't public setting. Emscripten
   +    # document doesn't mention RELOCATABLE. (MAIN_MODULE/SIDE_MODULE
   +    # are mentioned.)
   +    target_compile_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1
   +      
"$<IF:$<STREQUAL:$<TARGET_PROPERTY:TYPE>,EXECUTABLE>,-sMAIN_MODULE=1,-sSIDE_MODULE=1>")
   +    target_link_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1
   +      
"$<IF:$<STREQUAL:$<TARGET_PROPERTY:TYPE>,EXECUTABLE>,-sMAIN_MODULE=1,-sSIDE_MODULE=1>")
      else()
        set(ZLIB_PREFIX 
"${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
        if(MSVC)
   @@ -2552,9 +2554,9 @@ macro(build_zlib)
    
        add_dependencies(toolchain zlib_ep)
        add_dependencies(ZLIB::ZLIB zlib_ep)
   +    list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
      endif()
    
   -  list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
      set(ZLIB_VENDORED TRUE)
    endmacro()
    
   ```



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -2141,8 +2237,15 @@ function(build_gtest)
   if(APPLE)
     string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-value" " 
-Wno-ignored-attributes")
   endif()
-  set(BUILD_SHARED_LIBS ON)
-  set(BUILD_STATIC_LIBS OFF)
+  # If we're building static libs for Emscripten, we need to build 
*everything* as
+  # static libs.
+  if(NOT (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") OR ARROW_BUILD_SHARED)
+    set(BUILD_SHARED_LIBS ON)
+    set(BUILD_STATIC_LIBS OFF)
+  else()
+    set(BUILD_SHARED_LIBS OFF)
+    set(BUILD_STATIC_LIBS ON)
+  endif()

Review Comment:
   We don't need `ARROW_BUILD_SHARED` here:
   
   ```suggestion
     if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
       set(BUILD_SHARED_LIBS OFF)
       set(BUILD_STATIC_LIBS ON)
     else()
       set(BUILD_SHARED_LIBS ON)
       set(BUILD_STATIC_LIBS OFF)
     endif()
   ```



##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -1878,17 +1879,29 @@ TYPED_TEST(TestStringKernels, Strptime) {
   this->CheckUnary("strptime", input4, unit, output4, &options);
 
   options.format = "%m/%d/%Y %%z";
-  this->CheckUnary("strptime", input5, unit, output1, &options);
+  #ifndef EMSCRIPTEN
+    // emscripten bug 
https://github.com/emscripten-core/emscripten/issues/20466
+    this->CheckUnary("strptime", input5, unit, output1, &options);
 
-  options.error_is_null = false;
-  this->CheckUnary("strptime", input5, unit, output1, &options);
+    options.error_is_null = false;
+    this->CheckUnary("strptime", input5, unit, output1, &options);
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       Invalid, testing::HasSubstr("Invalid: Failed to parse string: 
'5/1/2020'"),
       Strptime(ArrayFromJSON(this->type(), input1), options));
+
+  #else
+    GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug 
https://github.com/emscripten-core/emscripten/issues/20466";;
+  #endif

Review Comment:
   Could you put the following at the beginning instead of `#infdef #else 
#endif`?
   
   ```cpp
     #ifdef EMSCRIPTEN
       GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug 
https://github.com/emscripten-core/emscripten/issues/20466";;
     #endif
   ```



##########
cpp/cmake_modules/SetupCxxFlags.cmake:
##########
@@ -707,3 +709,44 @@ if(MSVC)
     set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} 
${MSVC_LINKER_FLAGS}")
   endif()
 endif()
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  # flags are:
+  # 1) We're using zlib from Emscripten ports
+  # 2) We are building library code
+  # 3) We force *everything* to build as position independent
+  # 4) And with support for C++ exceptions
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1  -fPIC -fexceptions")
+  # size_t is 32 bit in emscripten wasm32 - ignore conversion errors
+  # deprecated-literal-operator error is thrown in datetime (vendored lib in 
arrow)
+  set(CMAKE_CXX_FLAGS
+      "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions 
-Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator"
+  )
+
+  # flags for creating shared libraries (only used in pyarrow, because
+  # Emscripten builds libarrow as static)
+  # flags are:
+  # 1) Tell it to use zlib from Emscripten ports
+  # 2) Tell it to use javascript / webassembly 64 bit number support.
+  # 3) Tell it to build with support for C++ exceptions
+  # 4) Skip linker flags error which happens with -soname parameter
+  set(ARROW_EMSCRIPTEN_LINKER_FLAGS
+      "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags")
+  set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1 
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+  set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sSIDE_MODULE=1 
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+  set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1 
${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
+  if(ARROW_TESTING)
+    # flags for building test executables for use in node
+    set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS}  
-sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js 
${BUILD_SUPPORT_DIR}/emscripten-test-init.js")
+  else()
+  set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS}  
-sALLOW_MEMORY_GROWTH")
+  endif()
+
+  # limit debug info because building with DWARF debug info requires
+  # absolutely tons of memory
+  # https://github.com/WebAssembly/binaryen/issues/4261
+  if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
+    string(APPEND CMAKE_CXX_FLAGS_DEBUG " -g2")
+  endif()
+

Review Comment:
   Could you remove this and use the following instead?
   
   ```diff
   diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake 
b/cpp/cmake_modules/SetupCxxFlags.cmake
   index 43f905239..7ebec131c 100644
   --- a/cpp/cmake_modules/SetupCxxFlags.cmake
   +++ b/cpp/cmake_modules/SetupCxxFlags.cmake
   @@ -674,7 +677,12 @@ if(NOT MSVC)
        if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O")
          string(APPEND CXX_DEBUG_FLAGS " -O0")
        endif()
   -    if(ARROW_GGDB_DEBUG)
   +    if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   +      string(APPEND C_DEBUG_FLAGS " -g2")
   +      string(APPEND CXX_DEBUG_FLAGS " -g2")
   +      string(APPEND C_RELWITHDEBINFO_FLAGS " -g2")
   +      string(APPEND CXX_RELWITHDEBINFO_FLAGS " -g2")
   +    elseif(ARROW_GGDB_DEBUG)
          string(APPEND C_DEBUG_FLAGS " -ggdb")
          string(APPEND CXX_DEBUG_FLAGS " -ggdb")
          string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb")
   ```



##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -1878,17 +1879,29 @@ TYPED_TEST(TestStringKernels, Strptime) {
   this->CheckUnary("strptime", input4, unit, output4, &options);
 
   options.format = "%m/%d/%Y %%z";
-  this->CheckUnary("strptime", input5, unit, output1, &options);
+  #ifndef EMSCRIPTEN
+    // emscripten bug 
https://github.com/emscripten-core/emscripten/issues/20466
+    this->CheckUnary("strptime", input5, unit, output1, &options);
 
-  options.error_is_null = false;
-  this->CheckUnary("strptime", input5, unit, output1, &options);
+    options.error_is_null = false;
+    this->CheckUnary("strptime", input5, unit, output1, &options);
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       Invalid, testing::HasSubstr("Invalid: Failed to parse string: 
'5/1/2020'"),
       Strptime(ArrayFromJSON(this->type(), input1), options));
+
+  #else
+    GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug 
https://github.com/emscripten-core/emscripten/issues/20466";;
+  #endif
+
+
 }
 
 TYPED_TEST(TestStringKernels, StrptimeZoneOffset) {
+#ifdef EMSCRIPTEN
+  GTEST_SKIP() << "Emscripten bug 
https://github.com/emscripten-core/emscripten/issues/20467 ";
+#else

Review Comment:
   We don't need `#else`.
   
   ```suggestion
   #endif
   ```



##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2159,13 +2162,17 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) {
 }
 
 TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) {
+#ifdef EMSCRIPTEN
+  GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default";
+#else

Review Comment:
   We don't need `#else`.
   
   ```suggestion
   #endif
   ```



##########
cpp/src/arrow/io/file_test.cc:
##########
@@ -991,6 +1001,8 @@ TEST_F(TestMemoryMappedFile, 
LARGE_MEMORY_TEST(ReadWriteOver4GbFile)) {
 }
 
 TEST_F(TestMemoryMappedFile, RetainMemoryMapReference) {
+
+

Review Comment:
   Could you revert needless changes?
   
   ```suggestion
   ```



##########
ci/docker/ubuntu-22.04-cpp.dockerfile:
##########
@@ -114,10 +115,20 @@ RUN apt-get update -y -q && \
         rapidjson-dev \
         rsync \
         tzdata \
-        wget && \
+        wget \
+        xz-utils  && \

Review Comment:
   ```suggestion
           xz-utils && \
   ```



##########
ci/docker/ubuntu-22.04-cpp.dockerfile:
##########
@@ -65,10 +65,11 @@ RUN latest_system_llvm=14 && \
 RUN apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
         autoconf \
+        bzip2 \
         ca-certificates \
         ccache \
         cmake \
-        curl \
+        curl \        

Review Comment:
   ```suggestion
           curl \
   ```



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -1395,16 +1420,49 @@ macro(build_brotli)
   )
   set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} 
"-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}")
 
-  externalproject_add(brotli_ep
-                      ${EP_COMMON_OPTIONS}
-                      URL ${BROTLI_SOURCE_URL}
-                      URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
-                      BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
-                                       "${BROTLI_STATIC_LIBRARY_DEC}"
-                                       "${BROTLI_STATIC_LIBRARY_COMMON}"
-                                       ${BROTLI_BUILD_BYPRODUCTS}
-                      CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
-                      STEP_TARGETS headers_copy)
+  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    # cmake install is disabled for brotli on emscripten, so we have
+    # to manually copy the libraries to our install directory
+    set(BROTLI_BUILD_DIR 
${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build)
+    set(BROTLI_BUILD_LIBS
+        
"${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+        
"${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+        
"${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+
+    set(BROTLI_BUILD_INCLUDE_DIR
+        
${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli)
+
+    externalproject_add(brotli_ep
+                        ${EP_COMMON_OPTIONS}
+                        URL ${BROTLI_SOURCE_URL}
+                        URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
+                        BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
+                                         "${BROTLI_STATIC_LIBRARY_DEC}"
+                                         "${BROTLI_STATIC_LIBRARY_COMMON}"
+                                         ${BROTLI_BUILD_BYPRODUCTS}
+                        CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
+                        STEP_TARGETS headers_copy
+                        INSTALL_COMMAND "")
+    add_custom_command(TARGET brotli_ep
+                       POST_BUILD
+                       COMMAND ${CMAKE_COMMAND} -E copy_if_different
+                               
${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}*${CMAKE_STATIC_LIBRARY_SUFFIX}
+                               ${BROTLI_LIB_DIR}
+                       COMMAND ${CMAKE_COMMAND} -E copy_directory
+                               ${BROTLI_BUILD_INCLUDE_DIR} 
${BROTLI_INCLUDE_DIR}/brotli)
+  else() # not emscripten - just behave as normal
+    externalproject_add(brotli_ep
+                        ${EP_COMMON_OPTIONS}
+                        URL ${BROTLI_SOURCE_URL}
+                        URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
+                        BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
+                                         "${BROTLI_STATIC_LIBRARY_DEC}"
+                                         "${BROTLI_STATIC_LIBRARY_COMMON}"
+                                         ${BROTLI_BUILD_BYPRODUCTS}
+                        CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
+                        STEP_TARGETS headers_copy)
+  endif()

Review Comment:
   Could you reuse `externalproject_add()` call?
   
   ```diff
   diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
   index f71f09cbe..f51ec56fc 100644
   --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
   +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
   @@ -1420,9 +1420,26 @@ macro(build_brotli)
      )
      set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} 
"-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}")
    
   +  set(BROTLI_EP_OPTIONS)
      if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   -    # cmake install is disabled for brotli on emscripten, so we have
   -    # to manually copy the libraries to our install directory
   +    # "cmake install" is disabled for Brotli on Emscripten, so the
   +    # default INSTALL_COMMAND is failed. We need to disable the default
   +    # INSTALL_COMMAND.
   +    list(APPEND BROTLI_EP_OPTIONS INSTALL_COMMAND ${CMAKE_COMMAND} -E true)
   +  endif()
   +  externalproject_add(brotli_ep
   +                      ${EP_COMMON_OPTIONS}
   +                      URL ${BROTLI_SOURCE_URL}
   +                      URL_HASH 
"SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
   +                      BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
   +                                       "${BROTLI_STATIC_LIBRARY_DEC}"
   +                                       "${BROTLI_STATIC_LIBRARY_COMMON}"
   +                                       ${BROTLI_BUILD_BYPRODUCTS}
   +                      CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
   +                      STEP_TARGETS headers_copy
   +                      ${BROTLI_EP_OPTIONS})
   +  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   +    # Copy the libraries to our install directory manually.
        set(BROTLI_BUILD_DIR 
${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build)
        set(BROTLI_BUILD_LIBS
            
"${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
   @@ -1432,18 +1449,6 @@ macro(build_brotli)
    
        set(BROTLI_BUILD_INCLUDE_DIR
            
${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli)
   -
   -    externalproject_add(brotli_ep
   -                        ${EP_COMMON_OPTIONS}
   -                        URL ${BROTLI_SOURCE_URL}
   -                        URL_HASH 
"SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
   -                        BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
   -                                         "${BROTLI_STATIC_LIBRARY_DEC}"
   -                                         "${BROTLI_STATIC_LIBRARY_COMMON}"
   -                                         ${BROTLI_BUILD_BYPRODUCTS}
   -                        CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
   -                        STEP_TARGETS headers_copy
   -                        INSTALL_COMMAND "")
        add_custom_command(TARGET brotli_ep
                           POST_BUILD
                           COMMAND ${CMAKE_COMMAND} -E copy_if_different
   @@ -1451,17 +1456,6 @@ macro(build_brotli)
                                   ${BROTLI_LIB_DIR}
                           COMMAND ${CMAKE_COMMAND} -E copy_directory
                                   ${BROTLI_BUILD_INCLUDE_DIR} 
${BROTLI_INCLUDE_DIR}/brotli)
   -  else() # not emscripten - just behave as normal
   -    externalproject_add(brotli_ep
   -                        ${EP_COMMON_OPTIONS}
   -                        URL ${BROTLI_SOURCE_URL}
   -                        URL_HASH 
"SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}"
   -                        BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}"
   -                                         "${BROTLI_STATIC_LIBRARY_DEC}"
   -                                         "${BROTLI_STATIC_LIBRARY_COMMON}"
   -                                         ${BROTLI_BUILD_BYPRODUCTS}
   -                        CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
   -                        STEP_TARGETS headers_copy)
      endif()
    
      add_dependencies(toolchain brotli_ep)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to