This is an automated email from the ASF dual-hosted git repository. kszucs pushed a commit to branch maint-1.0.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 127ada2c9ed777d542f0a89e452abbe4b40ee0dd Author: Neal Richardson <[email protected]> AuthorDate: Wed Jul 29 16:30:21 2020 -0700 ARROW-9575: [R] gcc-UBSAN failure on CRAN The cause of the failure itself turns out to be in cpp/src/parquet, not the R bindings. This patch reworks the existing r-sanitizer nightly job to (1) build the bundled C++ build like what happens on CRAN, and (2) actually fail the build if there is a UBSAN error. Previously no UBSAN error was reported because the Arrow C++ library was not built with sanitizers, and even if it were, the build was not set up to detect them and fail. Closes #7858 from nealrichardson/r-ubsan Authored-by: Neal Richardson <[email protected]> Signed-off-by: Neal Richardson <[email protected]> --- ci/docker/linux-r.dockerfile | 8 +-- ci/docker/ubuntu-18.04-r-sanitizer.dockerfile | 100 -------------------------- ci/scripts/r_docker_configure.sh | 13 +++- ci/scripts/r_sanitize.sh | 6 +- cpp/src/arrow/dataset/file_parquet.h | 2 +- cpp/src/parquet/arrow/path_internal.cc | 3 +- docker-compose.yml | 7 +- 7 files changed, 25 insertions(+), 114 deletions(-) diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile index 2ec42fb..1d963a2 100644 --- a/ci/docker/linux-r.dockerfile +++ b/ci/docker/linux-r.dockerfile @@ -21,15 +21,15 @@ ARG base FROM ${base} +ARG r_bin=R +ENV R_BIN=${r_bin} + # Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile) ENV PATH "${RPREFIX}/bin:${PATH}" -# Ensure parallel R package installation, set CRAN repo mirror, -# and use pre-built binaries where possible -COPY ci/etc/rprofile /arrow/ci/etc/ -RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site # Patch up some of the docker images COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ +COPY ci/etc/rprofile /arrow/ci/etc/ RUN /arrow/ci/scripts/r_docker_configure.sh COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ diff --git a/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile b/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile deleted file mode 100644 index 4bd0236..0000000 --- a/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile +++ /dev/null @@ -1,100 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM wch1/r-debug:latest - -# Installs C++ toolchain and dependencies -RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ - autoconf \ - ca-certificates \ - ccache \ - cmake \ - g++ \ - gcc \ - git \ - libbenchmark-dev \ - libboost-filesystem-dev \ - libboost-regex-dev \ - libboost-system-dev \ - libbrotli-dev \ - libbz2-dev \ - libgflags-dev \ - libgoogle-glog-dev \ - liblz4-dev \ - liblzma-dev \ - libre2-dev \ - libsnappy-dev \ - libssl-dev \ - libzstd-dev \ - locales \ - ninja-build \ - pkg-config \ - rapidjson-dev \ - tzdata && \ - locale-gen en_US.UTF-8 && \ - apt-get clean && rm -rf /var/lib/apt/lists* - -# Ensure parallel R package installation, set CRAN repo mirror, -# and use pre-built binaries where possible -RUN printf "\ - options(Ncpus = parallel::detectCores(), \ - repos = 'https://packagemanager.rstudio.com/cran/__linux__/bionic/latest', \ - HTTPUserAgent = sprintf(\ - 'R/%%s R (%%s)', getRversion(), \ - paste(getRversion(), R.version\$platform, R.version\$arch, R.version\$os)))\n" \ - >> /usr/local/RDsan/lib/R/etc/Rprofile.site - -# Also ensure parallel compilation of C/C++ code -RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> /usr/local/RDsan/lib/R/etc/Makeconf - -# Prioritize system packages and local installation -# The following dependencies will be downloaded due to missing/invalid packages -# provided by the distribution: -# - libc-ares-dev does not install CMake config files -# - flatbuffer is not packaged -# - libgtest-dev only provide sources -# - libprotobuf-dev only provide sources -# - thrift is too old -ENV ARROW_DEPENDENCY_SOURCE=SYSTEM \ - ARROW_FLIGHT=OFF \ - ARROW_GANDIVA=OFF \ - ARROW_HOME=/usr/local \ - ARROW_HDFS=OFF \ - ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ - ARROW_ORC=OFF \ - ARROW_PARQUET=ON \ - ARROW_PLASMA=OFF \ - ARROW_USE_ASAN=OFF \ - ARROW_USE_CCACHE=ON \ - ARROW_USE_UBSAN=OFF \ - ARROW_WITH_BZ2=OFF \ - ARROW_WITH_UTF8PROC=OFF \ - ARROW_WITH_ZSTD=OFF \ - cares_SOURCE=BUNDLED \ - gRPC_SOURCE=BUNDLED \ - GTest_SOURCE=BUNDLED \ - LC_ALL=en_US.UTF-8 \ - PATH=/usr/lib/ccache/:$PATH \ - Protobuf_SOURCE=BUNDLED \ - R_BIN=RDsan \ - Thrift_SOURCE=BUNDLED - -COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ -COPY r/DESCRIPTION /arrow/r/ -RUN /arrow/ci/scripts/r_deps.sh /arrow diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh index 9b131f2..1d7e8de 100755 --- a/ci/scripts/r_docker_configure.sh +++ b/ci/scripts/r_docker_configure.sh @@ -20,16 +20,23 @@ set -ex : ${R_BIN:=R} +# The Dockerfile should have put this file here +if [ -f "/arrow/ci/etc/rprofile" ]; then + # Ensure parallel R package installation, set CRAN repo mirror, + # and use pre-built binaries where possible + cat /arrow/ci/etc/rprofile >> $(${R_BIN} RHOME)/etc/Rprofile.site +fi + # Ensure parallel compilation of C/C++ code -echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Makeconf +echo "MAKEFLAGS=-j$(${R_BIN} -s -e 'cat(parallel::detectCores())')" >> $(${R_BIN} RHOME)/etc/Makeconf # Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN # which uses a bespoke clang compiled to use libc++ # https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then dnf install -y libcxx-devel - sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(R RHOME)/etc/Makeconf - rm -rf $(R RHOME)/etc/Makeconf.bak + sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf + rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak fi # Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786 diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh index de75e93..eacee5f 100755 --- a/ci/scripts/r_sanitize.sh +++ b/ci/scripts/r_sanitize.sh @@ -27,6 +27,10 @@ pushd ${source_dir}/tests export TEST_R_WITH_ARROW=TRUE export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" -${R_BIN} < testthat.R +${R_BIN} < testthat.R > testthat.out 2>&1 +cat testthat.out +if grep -q "runtime error" testthat.out; then + exit 1 +fi popd diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h index c156bfb..13f1090 100644 --- a/cpp/src/arrow/dataset/file_parquet.h +++ b/cpp/src/arrow/dataset/file_parquet.h @@ -39,7 +39,7 @@ class ReaderProperties; class ArrowReaderProperties; namespace arrow { class FileReader; -}; // namespace arrow +} // namespace arrow } // namespace parquet namespace arrow { diff --git a/cpp/src/parquet/arrow/path_internal.cc b/cpp/src/parquet/arrow/path_internal.cc index 8d2790a..e2079b7 100644 --- a/cpp/src/parquet/arrow/path_internal.cc +++ b/cpp/src/parquet/arrow/path_internal.cc @@ -106,7 +106,6 @@ #include "arrow/util/make_unique.h" #include "arrow/util/variant.h" #include "arrow/visitor_inline.h" - #include "parquet/properties.h" namespace parquet { @@ -525,7 +524,7 @@ struct PathInfo { std::shared_ptr<Array> primitive_array; int16_t max_def_level = 0; int16_t max_rep_level = 0; - bool has_dictionary; + bool has_dictionary = false; }; /// Contains logic for writing a single leaf node to parquet. diff --git a/docker-compose.yml b/docker-compose.yml index fafbeae..1828adf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -911,16 +911,17 @@ services: - SYS_PTRACE build: context: . - dockerfile: ci/docker/ubuntu-18.04-r-sanitizer.dockerfile + dockerfile: ci/docker/linux-r.dockerfile cache_from: - ${REPO}:amd64-ubuntu-18.04-r-sanitizer + args: + base: wch1/r-debug:latest + r_bin: RDsan environment: <<: *ccache - LIBARROW_BUILD: 'false' volumes: *ubuntu-volumes command: > /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/r_sanitize.sh /arrow" ################################ Rust #######################################
