[arrow] branch master updated: ARROW-5557: [C++] Add VisitBits benchmark
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new bffe31b ARROW-5557: [C++] Add VisitBits benchmark bffe31b is described below commit bffe31ba5a18ee09195a9583ef43f58baba73af1 Author: Antoine Pitrou AuthorDate: Mon Jun 17 21:54:28 2019 -0500 ARROW-5557: [C++] Add VisitBits benchmark Author: Antoine Pitrou Closes #4550 from pitrou/ARROW-5557-visit-bits-benchmark and squashes the following commits: 9d502b28c ARROW-5557: Add VisitBits benchmark --- cpp/CMakeLists.txt | 3 ++ cpp/src/arrow/builder-benchmark.cc | 10 +++-- cpp/src/arrow/csv/parser-benchmark.cc | 6 +-- cpp/src/arrow/util/bit-util-benchmark.cc| 67 + cpp/src/arrow/util/hashing.h| 4 +- cpp/src/arrow/util/thread-pool-benchmark.cc | 2 +- 6 files changed, 75 insertions(+), 17 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 142ae75..1910b66 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -697,6 +697,9 @@ if(ARROW_BUILD_BENCHMARKS) set(ARROW_BENCHMARK_LINK_LIBS benchmark::benchmark_main benchmark::benchmark ${ARROW_TEST_LINK_LIBS}) + if(WIN32) +set(ARROW_BENCHMARK_LINK_LIBS Shlwapi.dll ${ARROW_BENCHMARK_LINK_LIBS}) + endif() endif() set(ARROW_SYSTEM_LINK_LIBS) diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc index bc36970..e814c55 100644 --- a/cpp/src/arrow/builder-benchmark.cc +++ b/cpp/src/arrow/builder-benchmark.cc @@ -160,7 +160,7 @@ static void BuildChunkedBinaryArray( static void BuildFixedSizeBinaryArray( benchmark::State& state) { // NOLINT non-const reference - auto type = fixed_size_binary(kBinaryView.size()); + auto type = fixed_size_binary(static_cast(kBinaryView.size())); for (auto _ : state) { FixedSizeBinaryBuilder builder(type); @@ -227,7 +227,8 @@ static std::vector MakeRandomIntDictFodder() { [&]() { return static_cast(values_dist(gen)); }); } { -std::uniform_int_distribution indices_dist(0, kDistinctElements - 1); +std::uniform_int_distribution indices_dist( +0, static_cast(kDistinctElements - 1)); std::generate(values.begin(), values.end(), [&]() { return values_dict[indices_dist(gen)]; }); } @@ -262,7 +263,8 @@ static std::vector MakeStringDictFodder() { }); } { -std::uniform_int_distribution indices_dist(0, kDistinctElements - 1); +std::uniform_int_distribution indices_dist( +0, static_cast(kDistinctElements - 1)); std::generate(values.begin(), values.end(), [&] { return values_dict[indices_dist(gen)]; }); } @@ -311,7 +313,7 @@ static void BuildStringDictionaryArray( benchmark::State& state) { // NOLINT non-const reference const auto fodder = MakeStringDictFodder(); auto fodder_size = - std::accumulate(fodder.begin(), fodder.end(), 0UL, + std::accumulate(fodder.begin(), fodder.end(), 0ULL, [&](size_t acc, const std::string& s) { return acc + s.size(); }); for (auto _ : state) { diff --git a/cpp/src/arrow/csv/parser-benchmark.cc b/cpp/src/arrow/csv/parser-benchmark.cc index c474af5..bb84f8e 100644 --- a/cpp/src/arrow/csv/parser-benchmark.cc +++ b/cpp/src/arrow/csv/parser-benchmark.cc @@ -33,11 +33,11 @@ namespace csv { const char* one_row = "abc,\"d,f\",12.34,\n"; const char* one_row_escaped = "abc,d\\,f,12.34,\n"; -size_t num_rows = (1024 * 64) / strlen(one_row); +const auto num_rows = static_cast((1024 * 64) / strlen(one_row)); -static std::string BuildCSVData(const std::string& row, size_t repeat) { +static std::string BuildCSVData(const std::string& row, int32_t repeat) { std::stringstream ss; - for (size_t i = 0; i < repeat; ++i) { + for (int32_t i = 0; i < repeat; ++i) { ss << row; } return ss.str(); diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc index 5131ceb..8725ad6 100644 --- a/cpp/src/arrow/util/bit-util-benchmark.cc +++ b/cpp/src/arrow/util/bit-util-benchmark.cc @@ -124,6 +124,30 @@ static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) { state.SetBytesProcessed(2LL * state.iterations() * nbytes); } +template +static void BenchmarkVisitBits(benchmark::State& state, int64_t nbytes) { + std::shared_ptr buffer = CreateRandomBuffer(nbytes); + + const int64_t num_bits = nbytes * 8; + const uint8_t* bitmap = buffer->data(); + + for (auto _ : state) { +{ + int64_t total = 0; + const auto visit = [](bool value) -> void { total += value; }; + VisitBitsFunctorType()(bitmap, 0, num_bits, visit); + benchmark::DoNotOptimize(total); +} +{ + int64_t total
[arrow] branch master updated: ARROW-4912: [C++] add method for easy renaming of a Table's columns
This is an automated email from the ASF dual-hosted git repository. fsaintjacques pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 5c562e8 ARROW-4912: [C++] add method for easy renaming of a Table's columns 5c562e8 is described below commit 5c562e832f16ce881d0cae9237307e4290ac3dd2 Author: Benjamin Kietzman AuthorDate: Mon Jun 17 22:52:59 2019 -0400 ARROW-4912: [C++] add method for easy renaming of a Table's columns This will allow users to produce a table with custom column names from a csv file without specifying a schema (and requiring type inference) Author: Benjamin Kietzman Closes #4557 from bkietz/4912-Allow-specifying-column-names-to-CSV-rea and squashes the following commits: 213c19383 set correct schema in Table::RenameColumns, tests in C++ and py 1341a80e1 fix careless copypasta a6eb3f0ae use gmock's improved assert syntax 9ab8de811 add autopep8 note to developers/python.rst 5fdbf1a80 autopep8, test f/incorrect number of args 81e5adc8e add cython bindings e46752673 Table::RenameCols can fail if an incorrect number of columns are provided be18cfc20 add method for easy renaming of a Table's columns --- cpp/src/arrow/table-test.cc | 14 ++ cpp/src/arrow/table.cc | 24 cpp/src/arrow/table.h| 7 +++ cpp/src/arrow/type-test.cc | 20 cpp/src/arrow/type.cc| 4 cpp/src/arrow/type.h | 3 +++ docs/source/developers/python.rst| 8 python/pyarrow/includes/libarrow.pxd | 3 +++ python/pyarrow/table.pxi | 24 python/pyarrow/tests/test_table.py | 17 + 10 files changed, 124 insertions(+) diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc index b545e3b..3b2ed57 100644 --- a/cpp/src/arrow/table-test.cc +++ b/cpp/src/arrow/table-test.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "arrow/array.h" @@ -498,6 +499,19 @@ TEST_F(TestTable, SetColumn) { ASSERT_TRUE(result->Equals(*expected)); } +TEST_F(TestTable, RenameColumns) { + MakeExample1(10); + auto table = Table::Make(schema_, columns_); + EXPECT_THAT(table->ColumnNames(), testing::ElementsAre("f0", "f1", "f2")); + + std::shared_ptr renamed; + ASSERT_OK(table->RenameColumns({"zero", "one", "two"}, )); + EXPECT_THAT(renamed->ColumnNames(), testing::ElementsAre("zero", "one", "two")); + ASSERT_OK(renamed->Validate()); + + ASSERT_RAISES(Invalid, table->RenameColumns({"hello", "world"}, )); +} + TEST_F(TestTable, RemoveColumnEmpty) { // ARROW-1865 const int64_t length = 10; diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index b018b8b..07b0189 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -478,6 +478,30 @@ Status Table::FromChunkedStructArray(const std::shared_ptr& array, return Status::OK(); } +std::vector Table::ColumnNames() const { + std::vector names(num_columns()); + for (int i = 0; i < num_columns(); ++i) { +names[i] = column(i)->name(); + } + return names; +} + +Status Table::RenameColumns(const std::vector& names, +std::shared_ptr* out) const { + if (names.size() != static_cast(num_columns())) { +return Status::Invalid("tried to rename a table of ", num_columns(), + " columns but only ", names.size(), " names were provided"); + } + std::vector> columns(num_columns()); + std::vector> fields(num_columns()); + for (int i = 0; i < num_columns(); ++i) { +fields[i] = column(i)->field()->WithName(names[i]); +columns[i] = std::make_shared(fields[i], column(i)->data()); + } + *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns), num_rows()); + return Status::OK(); +} + Status ConcatenateTables(const std::vector>& tables, std::shared_ptr* table) { if (tables.size() == 0) { diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h index 8016371..94e9f22 100644 --- a/cpp/src/arrow/table.h +++ b/cpp/src/arrow/table.h @@ -297,6 +297,13 @@ class ARROW_EXPORT Table { virtual Status SetColumn(int i, const std::shared_ptr& column, std::shared_ptr* out) const = 0; + /// \brief Return names of all columns + std::vector ColumnNames() const; + + /// \brief Rename columns with provided names + Status RenameColumns(const std::vector& names, + std::shared_ptr* out) const; + /// \brief Replace schema key-value metadata with new metadata (EXPERIMENTAL) /// \since 0.5.0 /// diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc index 91562ee..7ad1d8a 100644 --- a/cpp/src/arrow/type-test.cc +++ b/cpp/src/arrow/type-test.cc @@
[arrow] branch master updated: ARROW-5567: [C++] Fix build error of memory-benchmark
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 9f52759 ARROW-5567: [C++] Fix build error of memory-benchmark 9f52759 is described below commit 9f527598a139f9f758f638eabcf6c13ea4a6c9bd Author: Yuqi Gu AuthorDate: Mon Jun 17 20:41:21 2019 -0500 ARROW-5567: [C++] Fix build error of memory-benchmark 'memory-benchmark.cc' is implemented by x86 AVX and SSE instructions. Build would failed on Arm64 when DARROW_BUILD_BENCHMARKS is enabled. Author: Yuqi Gu Author: Wes McKinney Closes #4528 from guyuqi/ARROW-5567 and squashes the following commits: b058450aa Rely on arrow/util/sse-util.h for SSE4.2-related flags and includes 52bf6ed7e Use Macro __SSE4_2__ 8cc81e89e Fix build error of memory-benchmark --- cpp/src/arrow/io/memory-benchmark.cc | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/io/memory-benchmark.cc b/cpp/src/arrow/io/memory-benchmark.cc index a3676e4..a75aa2e 100644 --- a/cpp/src/arrow/io/memory-benchmark.cc +++ b/cpp/src/arrow/io/memory-benchmark.cc @@ -15,12 +15,6 @@ // specific language governing permissions and limitations // under the License. -#ifdef _MSC_VER -#include -#else -#include -#endif - #include #include "arrow/api.h" @@ -28,9 +22,11 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/util.h" #include "arrow/util/cpu-info.h" +#include "arrow/util/sse-util.h" #include "benchmark/benchmark.h" +#ifdef ARROW_HAVE_SSE4_2 namespace arrow { using internal::CpuInfo; @@ -229,3 +225,4 @@ BENCHMARK(ParallelMemoryCopy) ->UseRealTime(); } // namespace arrow +#endif // ARROW_HAVE_SSE4_2
[arrow] branch master updated: ARROW-5629: [C++] Fix Coverity issues
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 67f920e ARROW-5629: [C++] Fix Coverity issues 67f920e is described below commit 67f920e8dc72c9857daba792014c86b994a4213b Author: Antoine Pitrou AuthorDate: Mon Jun 17 20:32:59 2019 -0500 ARROW-5629: [C++] Fix Coverity issues Nothing serious, just small cleanups. Note I didn't enable all components when running the analysis, since it's quite long already. I left CUDA, Gandiva, Orc and Plasma disabled. Author: Antoine Pitrou Closes #4595 from pitrou/ARROW-5629-fix-coverity-issues and squashes the following commits: fa235f534 ARROW-5629: Fix Coverity issues --- cpp/src/arrow/array.h | 7 --- cpp/src/arrow/compare.cc| 3 --- cpp/src/arrow/io/buffered.cc| 3 ++- cpp/src/arrow/io/compressed.cc | 7 ++- cpp/src/arrow/json/parser.cc| 7 +-- cpp/src/arrow/python/flight.cc | 6 +++--- cpp/src/arrow/python/flight.h | 8 +--- cpp/src/arrow/python/helpers.cc | 2 +- cpp/src/arrow/python/python_to_arrow.cc | 2 +- cpp/src/arrow/python/serialize.cc | 1 - cpp/src/arrow/util/basic_decimal.cc | 1 + cpp/src/arrow/util/io-util.cc | 5 +++-- cpp/src/arrow/util/io-util.h| 2 +- cpp/src/arrow/util/macros.h | 8 +--- 14 files changed, 37 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index b3c2539..a655422 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -529,13 +529,14 @@ class ARROW_EXPORT ListArray : public Array { } protected: - // this constructor defers SetData to a derived array class + // This constructor defers SetData to a derived array class ListArray() = default; void SetData(const std::shared_ptr& data); - const int32_t* raw_value_offsets_; + + const int32_t* raw_value_offsets_ = NULLPTR; private: - const ListType* list_type_; + const ListType* list_type_ = NULLPTR; std::shared_ptr values_; }; diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index ca4dfee..12991b9 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -646,9 +646,6 @@ class ApproxEqualsVisitor : public ArrayEqualsVisitor { left, checked_cast(right_), opts_); return Status::OK(); } - - protected: - double epsilon_; }; static bool BaseDataEquals(const Array& left, const Array& right) { diff --git a/cpp/src/arrow/io/buffered.cc b/cpp/src/arrow/io/buffered.cc index 134eb96..ed3cd4e 100644 --- a/cpp/src/arrow/io/buffered.cc +++ b/cpp/src/arrow/io/buffered.cc @@ -151,7 +151,8 @@ class BufferedOutputStream::Impl : public BufferedBase { } Status Detach(std::shared_ptr* raw) { -RETURN_NOT_OK(Flush()); +std::lock_guard guard(lock_); +RETURN_NOT_OK(FlushUnlocked()); *raw = std::move(raw_); is_open_ = false; return Status::OK(); diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc index bc04d49..301ebc3 100644 --- a/cpp/src/arrow/io/compressed.cc +++ b/cpp/src/arrow/io/compressed.cc @@ -229,7 +229,12 @@ std::shared_ptr CompressedOutputStream::raw() const { return impl_ class CompressedInputStream::Impl { public: Impl(MemoryPool* pool, Codec* codec, const std::shared_ptr& raw) - : pool_(pool), raw_(raw), codec_(codec), is_open_(true) {} + : pool_(pool), +raw_(raw), +codec_(codec), +is_open_(true), +compressed_pos_(0), +decompressed_pos_(0) {} Status Init() { RETURN_NOT_OK(codec_->MakeDecompressor(_)); diff --git a/cpp/src/arrow/json/parser.cc b/cpp/src/arrow/json/parser.cc index 5e51f84..6e85628 100644 --- a/cpp/src/arrow/json/parser.cc +++ b/cpp/src/arrow/json/parser.cc @@ -214,7 +214,7 @@ class RawArrayBuilder { class ScalarBuilder { public: explicit ScalarBuilder(MemoryPool* pool) - : data_builder_(pool), null_bitmap_builder_(pool) {} + : values_length_(0), data_builder_(pool), null_bitmap_builder_(pool) {} Status Append(int32_t index, int32_t value_length) { RETURN_NOT_OK(data_builder_.Append(index)); @@ -567,7 +567,10 @@ class HandlerBase : public BlockParser, public rj::BaseReaderHandler, HandlerBase> { public: explicit HandlerBase(MemoryPool* pool) - : BlockParser(pool), builder_set_(pool), scalar_values_builder_(pool) {} + : BlockParser(pool), +builder_set_(pool), +field_index_(-1), +scalar_values_builder_(pool) {} /// Retrieve a pointer to a builder from a BuilderPtr template diff --git a/cpp/src/arrow/python/flight.cc b/cpp/src/arrow/python/flight.cc index 409ba60..ee19fb9 100644 --- a/cpp/src/arrow/python/flight.cc +++
[arrow] branch master updated: ARROW-4343: [C++] Add docker-compose test for gcc 4.8 / Ubuntu 14.04 (Trusty), expand Xenial/16.04 Dockerfile to test Flight
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 2ca62eb ARROW-4343: [C++] Add docker-compose test for gcc 4.8 / Ubuntu 14.04 (Trusty), expand Xenial/16.04 Dockerfile to test Flight 2ca62eb is described below commit 2ca62eb7ad8f2b35c22eae9cdac6fbb773e3ac4b Author: Wes McKinney AuthorDate: Mon Jun 17 14:52:20 2019 -0500 ARROW-4343: [C++] Add docker-compose test for gcc 4.8 / Ubuntu 14.04 (Trusty), expand Xenial/16.04 Dockerfile to test Flight This also fixes a build failure ARROW-5617 resulting from thrift_ep needing to be pointed to the output of boost_ep when ARROW_BOOST_VENDORED=ON Author: Wes McKinney Closes #4582 from wesm/ARROW-4343 and squashes the following commits: bb358aec8 Do not add OpenSSL libraries redundantly when linking arrow_flight 1bdca89a4 Require OpenSSL in Docker builds, use AUTO method for uriparser 3d269a57f Link to OpenSSL::SSL also. Fix incorrect comment about OpenSSL imported targets that were released in CMake 3.4.0 75f44d717 Ubuntu 14.04 build working also a505105e7 Fix Xenial linking to OpenSSL, set test data paths efcda4a62 Fix boost_ep e981cf3fb Ubuntu Xenial fixes 761b8e20d Add dependency on boost_ep in thrift_ep if ARROW_BOOST_VENDORED=ON f1a615053 fix lz4 package name d71bfe1e8 Draft initial Ubuntu Trusty 14.04 Dockerfile --- ci/docker_build_and_test_cpp.sh| 4 + ci/docker_build_cpp.sh | 3 + cpp/CMakeLists.txt | 16 +- ...file.ubuntu-xenial => Dockerfile.ubuntu-trusty} | 53 ++-- cpp/Dockerfile.ubuntu-xenial | 23 +- cpp/cmake_modules/ThirdpartyToolchain.cmake| 302 +++-- docker-compose.yml | 19 ++ 7 files changed, 234 insertions(+), 186 deletions(-) diff --git a/ci/docker_build_and_test_cpp.sh b/ci/docker_build_and_test_cpp.sh index c059a5e..99b9460 100755 --- a/ci/docker_build_and_test_cpp.sh +++ b/ci/docker_build_and_test_cpp.sh @@ -20,5 +20,9 @@ set -e /arrow/ci/docker_build_cpp.sh pushd /build/cpp + +export ARROW_TEST_DATA=/arrow/testing/data +export PARQUET_TEST_DATA=/arrow/cpp/submodules/parquet-testing/data + ninja unittest popd diff --git a/ci/docker_build_cpp.sh b/ci/docker_build_cpp.sh index 6e780b6..98c2c1a 100755 --- a/ci/docker_build_cpp.sh +++ b/ci/docker_build_cpp.sh @@ -37,6 +37,7 @@ cmake -GNinja \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-ON} \ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-ON} \ -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-ON} \ + -DARROW_BOOST_VENDORED=${ARROW_BOOST_VENDORED:-OFF} \ -DARROW_FLIGHT=${ARROW_FLIGHT:-ON} \ -DARROW_ORC=${ARROW_ORC:-ON} \ -DARROW_PLASMA=${ARROW_PLASMA:-ON} \ @@ -52,7 +53,9 @@ cmake -GNinja \ -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ + -DPARQUET_REQUIRE_ENCRYPTION=${ARROW_WITH_OPENSSL:-ON} \ -DCMAKE_CXX_FLAGS=$CXXFLAGS \ + -Duriparser_SOURCE=AUTO \ ${CMAKE_ARGS} \ ${source_dir} ninja diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ea6aa74..142ae75 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -561,18 +561,18 @@ if(ARROW_WITH_URIPARSER) endif() if(ARROW_USE_OPENSSL) - list(APPEND ARROW_LINK_LIBS OpenSSL::Crypto) - list(APPEND ARROW_STATIC_LINK_LIBS OpenSSL::Crypto) - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS OpenSSL::Crypto) + set(ARROW_OPENSSL_LIBS OpenSSL::Crypto OpenSSL::SSL) + list(APPEND ARROW_LINK_LIBS ${ARROW_OPENSSL_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_OPENSSL_LIBS}) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_OPENSSL_LIBS}) endif() if(ARROW_WITH_BROTLI) # Order is important for static linking - list(APPEND ARROW_LINK_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon) - list(APPEND ARROW_STATIC_LINK_LIBS Brotli::brotlienc Brotli::brotlidec - Brotli::brotlicommon) - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS Brotli::brotlienc Brotli::brotlidec - Brotli::brotlicommon) + set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon) + list(APPEND ARROW_LINK_LIBS ${ARROW_BROTLI_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS}) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_BROTLI_LIBS}) endif() if(ARROW_WITH_BZ2) diff --git a/cpp/Dockerfile.ubuntu-xenial b/cpp/Dockerfile.ubuntu-trusty similarity index 63% copy from cpp/Dockerfile.ubuntu-xenial copy to cpp/Dockerfile.ubuntu-trusty index 4302334..181dc8a 100644 --- a/cpp/Dockerfile.ubuntu-xenial +++
[arrow] branch upr/4492 deleted (was 656b38f)
This is an automated email from the ASF dual-hosted git repository. fsaintjacques pushed a change to branch upr/4492 in repository https://gitbox.apache.org/repos/asf/arrow.git. was 656b38f Remove set -e in configure This change permanently discards the following revisions: discard 656b38f Remove set -e in configure
[arrow] branch upr/4492 created (now 656b38f)
This is an automated email from the ASF dual-hosted git repository. fsaintjacques pushed a change to branch upr/4492 in repository https://gitbox.apache.org/repos/asf/arrow.git. at 656b38f Remove set -e in configure This branch includes the following new commits: new 656b38f Remove set -e in configure The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[arrow] 01/01: Remove set -e in configure
This is an automated email from the ASF dual-hosted git repository. fsaintjacques pushed a commit to branch upr/4492 in repository https://gitbox.apache.org/repos/asf/arrow.git commit 656b38f23c1e4055712ed85f5ac307864d4734de Author: François Saint-Jacques AuthorDate: Mon Jun 17 13:37:13 2019 -0400 Remove set -e in configure --- r/configure | 3 --- 1 file changed, 3 deletions(-) diff --git a/r/configure b/r/configure index bf0c824..4b3484f 100755 --- a/r/configure +++ b/r/configure @@ -25,9 +25,6 @@ # INCLUDE_DIR and LIB_DIR manually via e.g: # R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib' -# Fail script on failing commands -set -e - # Library settings PKG_CONFIG_NAME="arrow parquet" PKG_DEB_NAME="(unsuppored)"
[arrow] branch master updated: ARROW-5520: [Packaging][deb] Add support for building on arm64
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 6f5bebc ARROW-5520: [Packaging][deb] Add support for building on arm64 6f5bebc is described below commit 6f5bebc3411984dcedeee6b401fb16e9120f70b1 Author: Sutou Kouhei AuthorDate: Mon Jun 17 12:07:49 2019 -0500 ARROW-5520: [Packaging][deb] Add support for building on arm64 We don't have NVIDIA CUDA toolkit and LLVM 7 on arm64. This includes Crossbow build configuration for arm64 but this is not used yet. Because building arm64 packages can't finish in 50min on Travis CI. Author: Sutou Kouhei Closes #4588 from kou/packaging-deb-arm64 and squashes the following commits: f97736d63 Add support for building on arm64 --- dev/tasks/linux-packages/apt/build.sh | 9 ++- .../apt/debian-stretch-arm64/Dockerfile| 77 ++ .../apt/debian-stretch-arm64/qemu-dummy-static | 33 ++ dev/tasks/linux-packages/debian/control| 40 +-- dev/tasks/linux-packages/debian/rules | 67 --- dev/tasks/linux-packages/package-task.rb | 2 + .../{travis.linux.yml => travis.linux.arm64.yml} | 23 ++- dev/tasks/linux-packages/travis.linux.yml | 7 +- dev/tasks/tasks.yml| 44 - 9 files changed, 246 insertions(+), 56 deletions(-) diff --git a/dev/tasks/linux-packages/apt/build.sh b/dev/tasks/linux-packages/apt/build.sh index b4a3fd6..a6a1cc4 100755 --- a/dev/tasks/linux-packages/apt/build.sh +++ b/dev/tasks/linux-packages/apt/build.sh @@ -41,7 +41,7 @@ case "${distribution}" in component=universe ;; esac -specific_debian_dir="debian.${distribution}-${code_name}" +architecture=$(dpkg-architecture -q DEB_BUILD_ARCH) run mkdir -p build run cp /host/tmp/${PACKAGE}-${VERSION}.tar.gz \ @@ -59,8 +59,11 @@ case "${VERSION}" in ;; esac run cd ${PACKAGE}-${VERSION}/ -if [ -d "/host/tmp/${specific_debian_dir}" ]; then - run cp -rp "/host/tmp/${specific_debian_dir}" debian +platform="${distribution}-${code_name}" +if [ -d "/host/tmp/debian.${platform}-${architecture}" ]; then + run cp -rp "/host/tmp/debian.${platform}-${architecture}" debian +elif [ -d "/host/tmp/debian.${platform}" ]; then + run cp -rp "/host/tmp/debian.${platform}" debian else run cp -rp "/host/tmp/debian" debian fi diff --git a/dev/tasks/linux-packages/apt/debian-stretch-arm64/Dockerfile b/dev/tasks/linux-packages/apt/debian-stretch-arm64/Dockerfile new file mode 100644 index 000..76365a0 --- /dev/null +++ b/dev/tasks/linux-packages/apt/debian-stretch-arm64/Dockerfile @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM arm64v8/debian:stretch + +COPY qemu-* /usr/bin/ + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ +debconf-set-selections + +ARG DEBUG + +RUN \ + echo "deb http://deb.debian.org/debian stretch-backports main" > \ +/etc/apt/sources.list.d/backports.list + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ +bison \ +build-essential \ +cmake \ +devscripts \ +flex \ +git \ +gtk-doc-tools \ +libboost-filesystem-dev \ +libboost-regex-dev \ +libboost-system-dev \ +libbrotli-dev \ +libc-ares-dev \ +libdouble-conversion-dev \ +libgirepository1.0-dev \ +libglib2.0-doc \ +libgoogle-glog-dev \ +liblz4-dev \ +libre2-dev \ +libsnappy-dev \ +libssl-dev \ +libzstd-dev \ +lsb-release \ +ninja-build \ +pkg-config \ +python3-dev \ +python3-numpy \ +python3-pip \ +tzdata && \ + apt install -y -V -t stretch-backports ${quiet} \ +debhelper \ +libgmock-dev \ +libgrpc++-dev \ +libgtest-dev \ +libprotobuf-dev \ +libprotoc-dev \ +protobuf-compiler \ +protobuf-compiler-grpc \ +rapidjson-dev && \ + pip3 install --upgrade meson && \ + ln -s /usr/local/bin/meson /usr/bin/ && \ +
[arrow] branch master updated: ARROW-5580: [C++][Gandiva] Support timestamp functions
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 73ceae4 ARROW-5580: [C++][Gandiva] Support timestamp functions 73ceae4 is described below commit 73ceae41174f70ed40f5dc5b5365e112727f1567 Author: Prudhvi Porandla AuthorDate: Mon Jun 17 11:52:37 2019 -0500 ARROW-5580: [C++][Gandiva] Support timestamp functions Match signatures of Gandiva timestamp functions with arrow Author: Prudhvi Porandla Author: prudhvi Closes #4539 from pprudhvi/timestampGandivaFixes and squashes the following commits: 6d7927946 remove unnecessary whitespaces in code 5062f7065 add spaces 4d43ba2dbTimestamp arithmetic - Correct function definitions in Gandiva c2795e7eb timestamp_arith: change method signatures in types.h, fix unit tests 59f5d00c3 make gandiva timestamp arithmetic compatible with arrow/java --- .../function_registry_timestamp_arithmetic.cc | 16 cpp/src/gandiva/precompiled/time_test.cc | 34 .../gandiva/precompiled/timestamp_arithmetic.cc| 12 +++--- cpp/src/gandiva/precompiled/types.h| 46 +++--- 4 files changed, 54 insertions(+), 54 deletions(-) diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc index 7af7690..7587212 100644 --- a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc +++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc @@ -21,10 +21,10 @@ namespace gandiva { #define TIMESTAMP_ADD_FNS(name)\ - BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, timestamp), \ - BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64), \ - BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, timestamp), \ - BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64) + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, timestamp, timestamp), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, date64, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, timestamp, timestamp), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, date64, date64) #define TIMESTAMP_DIFF_FN(name) \ BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, timestamp, int32) @@ -40,10 +40,10 @@ namespace gandiva { BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, timestamp, timestamp) #define DATE_DIFF_FNS(name) \ - BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64),\ - BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, date64), \ - BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64),\ - BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, date64) + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, date64, date64),\ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, timestamp, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, date64, date64),\ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, timestamp, date64) std::vector GetDateTimeArithmeticFunctionRegistry() { static std::vector datetime_fn_registry_ = { diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index aaadf0f..66c48e3 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -219,50 +219,50 @@ TEST(TestTime, TimeStampTrunc) { TEST(TestTime, TimeStampAdd) { EXPECT_EQ( - timestampaddSecond_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 30), + timestampaddSecond_int32_timestamp(30, StringToTimestamp("2000-05-01 10:20:34")), StringToTimestamp("2000-05-01 10:21:04")); EXPECT_EQ( - timestampaddMinute_timestamp_int64(StringToTimestamp("2000-05-01 10:20:34"), -30), + timestampaddMinute_int64_timestamp(-30, StringToTimestamp("2000-05-01 10:20:34")), StringToTimestamp("2000-05-01 09:50:34")); EXPECT_EQ( - timestampaddHour_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 20), + timestampaddHour_int32_timestamp(20, StringToTimestamp("2000-05-01 10:20:34")), StringToTimestamp("2000-05-02 06:20:34")); EXPECT_EQ( - timestampaddDay_timestamp_int64(StringToTimestamp("2000-05-01 10:20:34"), -35), + timestampaddDay_int64_timestamp(-35, StringToTimestamp("2000-05-01 10:20:34")), StringToTimestamp("2000-03-27 10:20:34")); - EXPECT_EQ(timestampaddWeek_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 4), + EXPECT_EQ(timestampaddWeek_int32_timestamp(4, StringToTimestamp("2000-05-01 10:20:34")), StringToTimestamp("2000-05-29 10:20:34")); EXPECT_EQ( -
[arrow] branch master updated: ARROW-5606: [Python] deal with deprecated RangeIndex._start/_stop/_step
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new a6f043a ARROW-5606: [Python] deal with deprecated RangeIndex._start/_stop/_step a6f043a is described below commit a6f043a19cc75cea65bfe7f312be968926b042f6 Author: Joris Van den Bossche AuthorDate: Mon Jun 17 11:45:15 2019 -0500 ARROW-5606: [Python] deal with deprecated RangeIndex._start/_stop/_step https://issues.apache.org/jira/browse/ARROW-5606 Author: Joris Van den Bossche Closes #4572 from jorisvandenbossche/ARROW-5606-rangeindex-attributes and squashes the following commits: 1ad113235 add test 2a3ab6cbe ARROW-5606: deal with deprecated RangeIndex._start/_stop/_step --- python/pyarrow/pandas-shim.pxi | 7 +++ python/pyarrow/pandas_compat.py | 9 - python/pyarrow/tests/test_pandas.py | 17 ++--- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi index e87c829..16e7056 100644 --- a/python/pyarrow/pandas-shim.pxi +++ b/python/pyarrow/pandas-shim.pxi @@ -174,6 +174,13 @@ cdef class _PandasAPIShim(object): self._check_import() return self._pd.util.testing.assert_frame_equal +def get_rangeindex_attribute(self, level, name): +# public start/stop/step attributes added in pandas 0.25.0 +self._check_import() +if hasattr(level, name): +return getattr(level, name) +return getattr(level, '_' + name) + cdef _PandasAPIShim pandas_api = _PandasAPIShim() _pandas_api = pandas_api diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 4ec3a56..ea38d41 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -386,14 +386,13 @@ def _get_columns_to_convert(df, schema, preserve_index, columns): def _get_range_index_descriptor(level): -# TODO(wesm): Why are these non-public and is there a more public way to -# get them? +# public start/stop/step attributes added in pandas 0.25.0 return { 'kind': 'range', 'name': level.name, -'start': level._start, -'stop': level._stop, -'step': level._step +'start': _pandas_api.get_rangeindex_attribute(level, 'start'), +'stop': _pandas_api.get_rangeindex_attribute(level, 'stop'), +'step': _pandas_api.get_rangeindex_attribute(level, 'step') } diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 46e4f65..4af3708 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -33,7 +33,8 @@ import numpy.testing as npt import pytest import pytz -from pyarrow.pandas_compat import get_logical_type +from pyarrow.pandas_compat import get_logical_type, _pandas_api + import pyarrow as pa try: @@ -183,15 +184,25 @@ class TestConvertMetadata(object): result = table.to_pandas() tm.assert_frame_equal(result, df) assert isinstance(result.index, pd.RangeIndex) -assert result.index._step == 2 +assert _pandas_api.get_rangeindex_attribute(result.index, 'step') == 2 assert result.index.name == index_name result2 = table_no_index_name.to_pandas() tm.assert_frame_equal(result2, df2) assert isinstance(result2.index, pd.RangeIndex) -assert result2.index._step == 1 +assert _pandas_api.get_rangeindex_attribute(result2.index, 'step') == 1 assert result2.index.name is None +def test_rangeindex_doesnt_warn(self): +# ARROW-5606: pandas 0.25 deprecated private _start/stop/step +# attributes -> can be removed if support < pd 0.25 is dropped +df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) + +with pytest.warns(None) as record: +_check_pandas_roundtrip(df, preserve_index=True) + +assert len(record) == 0 + def test_multiindex_columns(self): columns = pd.MultiIndex.from_arrays([ ['one', 'two'], ['X', 'Y']
[arrow] branch master updated: ARROW-5447: [Ruby] Ensure flushing test gz file
This is an automated email from the ASF dual-hosted git repository. shiro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 9425831 ARROW-5447: [Ruby] Ensure flushing test gz file 9425831 is described below commit 9425831dfaf854b5f6e26af1b23afe60f883af74 Author: Sutou Kouhei AuthorDate: Mon Jun 17 19:43:40 2019 +0900 ARROW-5447: [Ruby] Ensure flushing test gz file Author: Sutou Kouhei Closes #4584 from kou/ruby-test-robust and squashes the following commits: c1bdcaefe Run MinGW build when Ruby codes are changed dc498bd49 Ensure flushing test gz file --- appveyor.yml | 3 ++- ci/appveyor-filter-changes.bat| 6 +++--- ruby/red-arrow/test/test-table.rb | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 3e0e645..cefa28e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -19,7 +19,7 @@ os: Visual Studio 2015 only_commits: - # Skip commits not related to Python, C++, C#, Go or Rust + # Skip commits not related to Python, C++, C#, Go, Ruby or Rust files: - appveyor.yml - c_glib/ @@ -29,6 +29,7 @@ only_commits: - format/ - go/ - python/ +- ruby/ - rust/ cache: diff --git a/ci/appveyor-filter-changes.bat b/ci/appveyor-filter-changes.bat index 1e82ecd..04da517 100644 --- a/ci/appveyor-filter-changes.bat +++ b/ci/appveyor-filter-changes.bat @@ -22,10 +22,10 @@ if "%JOB%" == "Rust" ( echo === appveyor exit ) -) else if "%JOB%" == "MinGW" ( -if "%ARROW_CI_GLIB_AFFECTED%" == "0" ( +) else if "%JOB:~,5%" == "MinGW" ( +if "%ARROW_CI_RUBY_AFFECTED%" == "0" ( echo === -echo === No C++, or GLib changes, exiting job +echo === No C++, GLib or Ruby changes, exiting job echo === appveyor exit ) diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb index 6af2b57..2b7a46c 100644 --- a/ruby/red-arrow/test/test-table.rb +++ b/ruby/red-arrow/test/test-table.rb @@ -492,7 +492,8 @@ class TableTest < Test::Unit::TestCase test("csv.gz") do file = Tempfile.new(["red-arrow", ".csv.gz"]) -Zlib::GzipWriter.wrap(file) do |gz| +file.close +Zlib::GzipWriter.open(file.path) do |gz| gz.write(<<-CSV) name,score alice,10 @@ -505,7 +506,7 @@ chris,-1 0 alice 10 1 bob29 2 chris -1 - TABLE +TABLE end end end