[arrow] branch master updated: ARROW-4593: [Ruby] Arrow::Array#[out_of_range] returns nil
This is an automated email from the ASF dual-hosted git repository. shiro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 0c340b4 ARROW-4593: [Ruby] Arrow::Array#[out_of_range] returns nil 0c340b4 is described below commit 0c340b4a4a1bcd71c3c1a97415d6c06ceaf43c2d Author: Kouhei Sutou AuthorDate: Sun Feb 17 10:03:52 2019 +0900 ARROW-4593: [Ruby] Arrow::Array#[out_of_range] returns nil Author: Kouhei Sutou Closes #3666 from kou/ruby-array-ref-out-of-range and squashes the following commits: f64df5b5 Array# returns nil --- ruby/red-arrow/lib/arrow/array.rb | 10 ++ ruby/red-arrow/test/test-array.rb | 33 - 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb index 359e70e..f60025a 100644 --- a/ruby/red-arrow/lib/arrow/array.rb +++ b/ruby/red-arrow/lib/arrow/array.rb @@ -35,8 +35,18 @@ module Arrow end end +# @param i [Integer] +# The index of the value to be gotten. +# +# You can specify negative index like for `::Array#[]`. +# +# @return [Object, nil] +# The `i`-th value. +# +# `nil` for NULL value or out of range `i`. def [](i) i += length if i < 0 + return nil if i < 0 or i >= length if null?(i) nil else diff --git a/ruby/red-arrow/test/test-array.rb b/ruby/red-arrow/test/test-array.rb index 31e6eaf..3dd7635 100644 --- a/ruby/red-arrow/test/test-array.rb +++ b/ruby/red-arrow/test/test-array.rb @@ -24,15 +24,30 @@ class ArrayTest < Test::Unit::TestCase end end - test("#each") do -array = Arrow::BooleanArray.new([true, false, nil, true]) -assert_equal([true, false, nil, true], - array.to_a) - end + sub_test_case("instance methods") do +def setup + @values = [true, false, nil, true] + @array = Arrow::BooleanArray.new(@values) +end + +test("#each") do + assert_equal(@values, @array.to_a) +end - test("#[]") do -array = Arrow::BooleanArray.new([true, false, nil, true]) -assert_equal([true, false, nil, true], - [array[0], array[1], array[2], array[3]]) +sub_test_case("#[]") do + test("valid range") do +assert_equal(@values, + @array.length.times.collect {|i| @array[i]}) + end + + test("out of range") do +assert_nil(@array[@array.length]) + end + + test("negative index") do +assert_equal(@values.last, + @array[-1]) + end +end end end
[arrow] branch master updated: ARROW-4594: [Ruby] Arrow::StructArray#[] returns Arrow::Struct instead of Arrow::Array
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new aa765aa ARROW-4594: [Ruby] Arrow::StructArray#[] returns Arrow::Struct instead of Arrow::Array aa765aa is described below commit aa765aa2165b99ccb0f25820b3d67b9cf439890e Author: Kouhei Sutou AuthorDate: Sat Feb 16 17:49:56 2019 -0600 ARROW-4594: [Ruby] Arrow::StructArray#[] returns Arrow::Struct instead of Arrow::Array This is a compatibility breaking change but we warn about this change in 0.12.0. Author: Kouhei Sutou Closes #3667 from kou/ruby-struct-array-ref and squashes the following commits: 5b304496f Arrow::StructArray# returns Arrow::Struct instead of Arrow::Array --- ruby/red-arrow/lib/arrow/struct-array.rb | 22 +++--- ruby/red-arrow/lib/arrow/struct.rb | 11 +++ ruby/red-arrow/test/test-struct-array.rb | 19 --- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/ruby/red-arrow/lib/arrow/struct-array.rb b/ruby/red-arrow/lib/arrow/struct-array.rb index e55a507..92b02bb 100644 --- a/ruby/red-arrow/lib/arrow/struct-array.rb +++ b/ruby/red-arrow/lib/arrow/struct-array.rb @@ -19,17 +19,25 @@ require "arrow/struct" module Arrow class StructArray -def [](i) - warn("Use #{self.class}\#find_field instead. " + - "This will returns Arrow::Struct instead of Arrow::Array " + - "since 0.13.0.") - get_field(i) -end - +# @param i [Integer] +# The index of the value to be gotten. You must specify the value index. +# +# You can use {Arrow::Array#[]} for convenient value access. +# +# @return [Arrow::Struct] The `i`-th value. def get_value(i) Struct.new(self, i) end +# @overload find_field(index) +# @param index [Integer] The index of the field to be found. +# @return [Arrow::Array, nil] +# The `index`-th field or `nil` for out of range. +# +# @overload find_field(name) +# @param index [String, Symbol] The name of the field to be found. +# @return [Arrow::Array, nil] +# The field that has `name` or `nil` for nonexistent name. def find_field(index_or_name) case index_or_name when String, Symbol diff --git a/ruby/red-arrow/lib/arrow/struct.rb b/ruby/red-arrow/lib/arrow/struct.rb index 4ae12b8..6028a7b 100644 --- a/ruby/red-arrow/lib/arrow/struct.rb +++ b/ruby/red-arrow/lib/arrow/struct.rb @@ -64,5 +64,16 @@ module Arrow end super end + +def ==(other) + other.is_a?(self.class) and +@array == other.array and +@index == other.index +end + +protected +def array + @array +end end end diff --git a/ruby/red-arrow/test/test-struct-array.rb b/ruby/red-arrow/test/test-struct-array.rb index 5a00434..b82d048 100644 --- a/ruby/red-arrow/test/test-struct-array.rb +++ b/ruby/red-arrow/test/test-struct-array.rb @@ -49,9 +49,22 @@ class StructArrayTest < Test::Unit::TestCase end test("#[]") do - notify("TODO: Returns Arrow::Struct instead.") - assert_equal([[true, false], [1, 2]], - [@array[0].to_a, @array[1].to_a]) + assert_equal([ + Arrow::Struct.new(@array, 0), + Arrow::Struct.new(@array, 1), + ], + @array.to_a) +end + +test("#get_value") do + assert_equal([ + Arrow::Struct.new(@array, 0), + Arrow::Struct.new(@array, 1), + ], + [ + @array.get_value(0), + @array.get_value(1), + ]) end sub_test_case("#find_field") do
[arrow] branch master updated: ARROW-4592: [GLib] Stop configure immediately when GLib isn't available
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new b74cc65 ARROW-4592: [GLib] Stop configure immediately when GLib isn't available b74cc65 is described below commit b74cc6547ad07e91351f29e7b0fa84c7d6b81e09 Author: Kouhei Sutou AuthorDate: Sat Feb 16 17:48:08 2019 -0600 ARROW-4592: [GLib] Stop configure immediately when GLib isn't available Author: Kouhei Sutou Closes #3665 from kou/glib-exit-on-error and squashes the following commits: d8bc0731b Stop configure immediately when GLib isn't available --- c_glib/configure.ac | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/c_glib/configure.ac b/c_glib/configure.ac index 75654d2..3e3e328 100644 --- a/c_glib/configure.ac +++ b/c_glib/configure.ac @@ -99,7 +99,10 @@ fi AC_SUBST(GARROW_CFLAGS) AC_SUBST(GARROW_CXXFLAGS) -AM_PATH_GLIB_2_0([2.32.4], [], [], [gobject gio]) +AM_PATH_GLIB_2_0([2.32.4], + [], + [AC_MSG_ERROR(GLib isn't available)], + [gobject gio]) GOBJECT_INTROSPECTION_REQUIRE([1.32.1]) GTK_DOC_CHECK([1.18-2])
[arrow] branch master updated: ARROW-4341: [C++] Refactor Primitive builders and BooleanBuilder to use TypedBufferBuilder
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new bbca717 ARROW-4341: [C++] Refactor Primitive builders and BooleanBuilder to use TypedBufferBuilder bbca717 is described below commit bbca7178adf1214425f95164a887989b018707f8 Author: Benjamin Kietzman AuthorDate: Sat Feb 16 17:44:46 2019 -0600 ARROW-4341: [C++] Refactor Primitive builders and BooleanBuilder to use TypedBufferBuilder This reduces code duplication. Author: Benjamin Kietzman Author: Wes McKinney Closes #3575 from bkietz/ARROW-4341-primitive-builders-use-bufferbuilder and squashes the following commits: 3ef29721b Fix BooleanBuilder::AppendNulls, remove valid_bytes argument from AppendNulls methods 40c4d8d5c TypedBufferBuilder's output was not correctly sized b389c1308 Revert changes to arrow/util/logging.h daf524423 Revert change to UnsafeAppend that broke Python unit test 3cc5a0c46 Restore memory zeroing. Add missing override 21ce28558 Fix RETURN_NOT_OK usages d4ab3b539 Move NumericBuilder implementation to headers to avoid symbol visibility concerns 6c1e99d4e Add TypedBufferBuilder UnsafeAppend compile-time option to not track falses. Restore faster code from before this patch for appending C arrays and vector 09d2bfe8f reduce unnecessary zeroing in BufferBuilder bd736c3db add ArrowLogIgnore and use for release mode DCHECK* 7ba692cc9 moving to iterator append in NumericBuilder 188b7b9cc fix format 893457306 add explicit cast 88e57fe58 remove PrimitiveBuilder 9c050b4dc Use TypedBufferBuilder for PrimitiveBuilder 078497a38 fix BooleanBuilder::AppendNull 88eb71c97 Use TypedBufferBuilder in BooleanBuilder --- c_glib/arrow-glib/array-builder.cpp | 4 +- cpp/src/arrow/array-test.cc | 43 -- cpp/src/arrow/array/builder_adaptive.h | 7 +- cpp/src/arrow/array/builder_base.cc | 6 + cpp/src/arrow/array/builder_base.h | 2 + cpp/src/arrow/array/builder_primitive.cc | 169 ++-- cpp/src/arrow/array/builder_primitive.h | 217 +++ cpp/src/arrow/buffer-builder.h | 40 +- cpp/src/arrow/buffer-test.cc | 4 + 9 files changed, 201 insertions(+), 291 deletions(-) diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index 095c68d..afdae8c 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -136,9 +136,7 @@ garrow_array_builder_append_nulls(GArrowArrayBuilder *builder, auto arrow_builder = static_cast(garrow_array_builder_get_raw(builder)); - uint8_t valid_bytes[n]; - memset(valid_bytes, 0, sizeof(uint8_t) * n); - auto status = arrow_builder->AppendNulls(valid_bytes, n); + auto status = arrow_builder->AppendNulls(n); return garrow_error_check(error, status, context); } diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index 3b348de..dead8de 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -478,7 +478,11 @@ void TestPrimitiveBuilder::Check(const std::unique_ptr ASSERT_EQ(draws_[i] != 0, actual) << i; } } - ASSERT_TRUE(result->Equals(*expected)); + AssertArraysEqual(*result, *expected); + + // buffers are correctly sized + ASSERT_EQ(result->data()->buffers[0]->size(), BitUtil::BytesForBits(size)); + ASSERT_EQ(result->data()->buffers[1]->size(), BitUtil::BytesForBits(size)); // Builder is now reset ASSERT_EQ(0, builder->length()); @@ -518,15 +522,13 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) { TYPED_TEST(TestPrimitiveBuilder, TestAppendNulls) { const int64_t size = 10; - const uint8_t valid_bytes[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - - ASSERT_OK(this->builder_->AppendNulls(valid_bytes, size)); + ASSERT_OK(this->builder_->AppendNulls(size)); std::shared_ptr result; FinishAndCheckPadding(this->builder_.get(), ); for (int64_t i = 0; i < size; ++i) { -ASSERT_EQ(result->IsValid(i), static_cast(valid_bytes[i])); +ASSERT_FALSE(result->IsValid(i)); } } @@ -922,6 +924,27 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) { ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity()); } +TEST(TestBooleanBuilder, AppendNullsAdvanceBuilder) { + BooleanBuilder builder; + + std::vector values = {1, 0, 0, 1}; + std::vector is_valid = {1, 1, 0, 1}; + + std::shared_ptr arr; + ASSERT_OK(builder.AppendValues(values.data(), 2)); + ASSERT_OK(builder.AppendNulls(1)); + ASSERT_OK(builder.AppendValues(values.data() + 3, 1)); + ASSERT_OK(builder.Finish()); + + ASSERT_EQ(1, arr->null_count()); + + const auto& barr = static_cast(*arr); + ASSERT_TRUE(barr.Value(0)); +
[arrow] branch master updated: ARROW-4490: [Rust] Add explicit SIMD vectorization for boolean ops in "array_ops"
This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new a374c3c ARROW-4490: [Rust] Add explicit SIMD vectorization for boolean ops in "array_ops" a374c3c is described below commit a374c3c3af0a2524c51a7a1f53d6c838f7806de7 Author: Paddy Horan AuthorDate: Sat Feb 16 09:45:56 2019 -0700 ARROW-4490: [Rust] Add explicit SIMD vectorization for boolean ops in "array_ops" This PR adds explicit SIMD for boolean ops, `and`, `or` and `not`. I moved `array_ops` into the new `compute` module. From the outside this module serves the same purpose as the previous `array_ops` module (all kernels will be accessible from this namespace) and the remaining `array_ops` implementations are exposed via the `compute` module currently. As I add explicit SIMD for more kernels they will migrate from `array_ops` into their own modules under `compute`. I am keeping sub-modules under `compute` (as apposed to compute.rs) as SIMD can get r [...] I have included benchmarks where I re-create the old default implementations for others to take a look at the speed improvement. It's not clear whether we need the non-SIMD versions in the benchmarks long term but I left them in for now to make the non-SIMD/SIMD comparison. There are likely more optimizations possible (processing the values and null bit buffers in a single loop for instance) but I wanted to get the cleanest impl first and add further optimizations later if needed. Author: Paddy Horan Closes #3641 from paddyhoran/boolean-kernels and squashes the following commits: 89588e6 Removed `compute` from `mod.rs` f9ae58a Updated benchmarks e321cec Updated `not` to use trait impls da16486 Implemented `Not` for `Buffer` f21253d Updated datafusion and comments a3c01c8 Added SIMD binary boolean kernels --- rust/arrow/Cargo.toml | 2 +- rust/arrow/benches/bitwise_ops.rs | 75 - rust/arrow/benches/boolean_kernels.rs | 129 ++ rust/arrow/src/bitmap.rs| 20 ++-- rust/arrow/src/buffer.rs| 89 rust/arrow/src/{ => compute}/array_ops.rs | 106 --- rust/arrow/src/compute/boolean_kernels.rs | 159 rust/arrow/src/{lib.rs => compute/mod.rs} | 31 ++ rust/arrow/src/compute/util.rs | 83 +++ rust/arrow/src/lib.rs | 2 +- rust/datafusion/src/execution/aggregate.rs | 62 +-- rust/datafusion/src/execution/expression.rs | 6 +- 12 files changed, 492 insertions(+), 272 deletions(-) diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 6e2d483..04e8ac0 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -60,5 +60,5 @@ name = "builder" harness = false [[bench]] -name = "bitwise_ops" +name = "boolean_kernels" harness = false diff --git a/rust/arrow/benches/bitwise_ops.rs b/rust/arrow/benches/bitwise_ops.rs deleted file mode 100644 index 434ff4d..000 --- a/rust/arrow/benches/bitwise_ops.rs +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#[macro_use] -extern crate criterion; -use criterion::Criterion; - -extern crate arrow; - -use arrow::buffer::Buffer; -use arrow::builder::{BufferBuilderTrait, UInt8BufferBuilder}; - -fn create_buffer(size: usize) -> Buffer { -let mut builder = UInt8BufferBuilder::new(size); -for _i in 0..size { -builder.append(1_u8).unwrap(); -} -builder.finish() -} - -fn bitwise_default(size: usize, op: F) -where -F: Fn(, ) -> u8, -{ -let buffer_a = create_buffer(size); -let buffer_b = create_buffer(size); - -criterion::black_box({ -let mut builder = UInt8BufferBuilder::new(buffer_a.len()); -for i in 0..buffer_a.len() { -unsafe { -builder -.append(op( -buffer_a.data().get_unchecked(i), -
[arrow] branch master updated: ARROW-4586: [Rust] Remove arrow/mod.rs as it is not needed
This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new e1bc0d4 ARROW-4586: [Rust] Remove arrow/mod.rs as it is not needed e1bc0d4 is described below commit e1bc0d4bd8377c1782f779a5d37ba3c7f8d3105d Author: Paddy Horan AuthorDate: Sat Feb 16 09:36:03 2019 -0700 ARROW-4586: [Rust] Remove arrow/mod.rs as it is not needed `mod.rs` is not needed as `lib.rs` imports sub-modules directly. In fact, it's not compiled at all from what I can see... Author: Paddy Horan Closes #3659 from paddyhoran/remove-mod and squashes the following commits: 513eaa4 Removed `arrow/mod.rs` --- rust/arrow/src/mod.rs | 28 1 file changed, 28 deletions(-) diff --git a/rust/arrow/src/mod.rs b/rust/arrow/src/mod.rs deleted file mode 100644 index b9fa43a..000 --- a/rust/arrow/src/mod.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod array; -pub mod array_data; -pub mod bitmap; -pub mod buffer; -pub mod builder; -pub mod csv; -pub mod datatypes; -pub mod error; -pub mod memory; -pub mod record_batch; -pub mod tensor;