[arrow] branch new_divide created (now 12a24e9)
This is an automated email from the ASF dual-hosted git repository. paddyhoran pushed a change to branch new_divide in repository https://gitbox.apache.org/repos/asf/arrow.git. at 12a24e9 A clean build This branch includes the following new commits: new 12a24e9 A clean build The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[arrow] 01/01: A clean build
This is an automated email from the ASF dual-hosted git repository. paddyhoran pushed a commit to branch new_divide in repository https://gitbox.apache.org/repos/asf/arrow.git commit 12a24e917bfdac1760200f2e5f4afed54d608411 Author: Paddy Horan AuthorDate: Thu Mar 14 20:58:57 2019 -0400 A clean build --- rust/arrow/src/compute/arithmetic_kernels.rs | 73 ++-- rust/arrow/src/compute/util.rs | 35 + rust/arrow/src/datatypes.rs | 41 3 files changed, 146 insertions(+), 3 deletions(-) diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs b/rust/arrow/src/compute/arithmetic_kernels.rs index 2566002..d7f0b32 100644 --- a/rust/arrow/src/compute/arithmetic_kernels.rs +++ b/rust/arrow/src/compute/arithmetic_kernels.rs @@ -27,13 +27,13 @@ use std::ops::{Add, Div, Mul, Sub}; use std::slice::from_raw_parts_mut; use std::sync::Arc; -use num::Zero; +use num::{One, Zero}; use crate::array::*; use crate::array_data::ArrayData; use crate::buffer::MutableBuffer; use crate::builder::PrimitiveBuilder; -use crate::compute::util::apply_bin_op_to_option_bitmap; +use crate::compute::util::{apply_bin_op_to_option_bitmap, is_valid}; use crate::datatypes; use crate::error::{ArrowError, Result}; @@ -123,6 +123,68 @@ where Ok(PrimitiveArrayfrom(Arc::new(data))) } +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn simd_divide( +left: , +right: , +) -> Result> +where +T: datatypes::ArrowNumericType, +T::Native: One + Zero, +T::Simd: Add ++ Sub ++ Mul ++ Div, +{ +if left.len() != right.len() { +return Err(ArrowError::ComputeError( +"Cannot perform math operation on arrays of different length".to_string(), +)); +} + +let null_bit_buffer = apply_bin_op_to_option_bitmap( +left.data().null_bitmap(), +right.data().null_bitmap(), +|a, b| a & b, +)?; + +let lanes = T::lanes(); +let buffer_size = left.len() * mem::size_of::(); +let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + +for i in (0..left.len()).step_by(lanes) { +let simd_right_raw_check = T::load(right.value_slice(i, lanes)); +let simd_right_check = unsafe{T::mask_select(is_valid::(, i, lanes, right.len()), simd_right_raw_check, T::init(T::Native::one()))}; +let is_zero = T::eq(T::init(T::Native::zero()), simd_right_check); +if T::mask_any(is_zero) { +return Err(ArrowError::DivideByZero); +} +let simd_right_raw = T::load(right.value_slice(i, lanes)); +let simd_right = unsafe{T::mask_select(is_valid::(, i, lanes, right.len()), simd_right_raw, T::init(T::Native::one()))}; +let simd_left = T::load(left.value_slice(i, lanes)); +let simd_result = T::bin_op(simd_left, simd_right, |a, b| a / b); + +let result_slice: [T::Native] = unsafe { +from_raw_parts_mut( +(result.data_mut().as_mut_ptr() as *mut T::Native).offset(i as isize), +lanes, +) +}; +T::write(simd_result, result_slice); +} + +let data = ArrayData::new( +T::get_data_type(), +left.len(), +None, +null_bit_buffer, +left.offset(), +vec![result.freeze()], +vec![], +); +Ok(PrimitiveArrayfrom(Arc::new(data))) +} + /// Perform `left + right` operation on two arrays. If either left or right value is null /// then the result is also null. pub fn add( @@ -199,8 +261,13 @@ where + Sub + Mul + Div -+ Zero, ++ Zero ++ One, { +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +return simd_divide(, ); + +#[allow(unreachable_code)] math_op(left, right, |a, b| { if b.is_zero() { Err(ArrowError::DivideByZero) diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs index 55726b8..64f060a 100644 --- a/rust/arrow/src/compute/util.rs +++ b/rust/arrow/src/compute/util.rs @@ -20,6 +20,7 @@ use crate::bitmap::Bitmap; use crate::buffer::Buffer; use crate::error::Result; +use crate::datatypes::*; /// Applies a given binary operation, `op`, to two references to `Option`'s. /// @@ -44,6 +45,40 @@ where } } + +pub unsafe fn is_valid(bitmap: , i: usize, lanes: usize, len: usize) -> T::SimdMask +where +T: ArrowNumericType, +{ + +// Validity based on the length of the Array +let upper_bound = i + lanes; +let mut length_based_validity = T::new_mask(true); +for j in upper_bound..len { +length_based_validity = T::mask_set(length_based_validity, j - i, false); +} + +match { +Some(_) => length_based_validity, +None => length_based_validity, +} + +//let length_based_validity = if upper_bound < len { +//
[arrow] branch master updated: ARROW-4673: [C++] Implement Scalar::Equals and Datum::Equals
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 548e194 ARROW-4673: [C++] Implement Scalar::Equals and Datum::Equals 548e194 is described below commit 548e1949d527717d7821a4ab2f09ff7c39882152 Author: François Saint-Jacques AuthorDate: Thu Mar 14 20:03:44 2019 -0500 ARROW-4673: [C++] Implement Scalar::Equals and Datum::Equals Handy for validating kernels. Author: François Saint-Jacques Author: Wes McKinney Closes #3875 from fsaintjacques/ARROW-4673-datum-equal and squashes the following commits: 3fff08785 Add common base class for some primitive scalar, a little DRY 093e1bd55 Fix struct Scalar warning 66cae36d8 Fix warnings. 7a7c0d6a1 ARROW-4673: Implement Scalar::Equals and Datum::Equals --- cpp/src/arrow/compare.cc| 91 + cpp/src/arrow/compare.h | 6 ++ cpp/src/arrow/compute/kernel.h | 46 + cpp/src/arrow/compute/kernels/aggregate-test.cc | 4 +- cpp/src/arrow/scalar-test.cc| 15 cpp/src/arrow/scalar.cc | 9 ++- cpp/src/arrow/scalar.h | 32 +++-- cpp/src/arrow/testing/gtest_util.cc | 6 ++ cpp/src/arrow/testing/gtest_util.h | 8 +++ cpp/src/arrow/type_fwd.h| 2 + cpp/src/arrow/util/memory.h | 12 cpp/src/arrow/visitor.cc| 39 +++ cpp/src/arrow/visitor.h | 32 + cpp/src/arrow/visitor_inline.h | 18 + 14 files changed, 308 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index fcb16b5..aca6094 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -30,6 +30,7 @@ #include "arrow/array.h" #include "arrow/buffer.h" +#include "arrow/scalar.h" #include "arrow/sparse_tensor.h" #include "arrow/status.h" #include "arrow/tensor.h" @@ -38,6 +39,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/memory.h" #include "arrow/visitor_inline.h" namespace arrow { @@ -717,6 +719,78 @@ class TypeEqualsVisitor { bool result_; }; +class ScalarEqualsVisitor { + public: + explicit ScalarEqualsVisitor(const Scalar& right) : right_(right), result_(false) {} + + Status Visit(const NullScalar& left) { +result_ = true; +return Status::OK(); + } + + template + typename std::enable_if::value, + Status>::type + Visit(const T& left_) { +const auto& right = checked_cast(right_); +result_ = right.value == left_.value; +return Status::OK(); + } + + template + typename std::enable_if::value, Status>::type Visit( + const T& left_) { +const auto& left = checked_cast(left_); +const auto& right = checked_cast(right_); +result_ = internal::SharedPtrEquals(left.value, right.value); +return Status::OK(); + } + + Status Visit(const Decimal128Scalar& left) { +const auto& right = checked_cast(right_); +result_ = left.value == right.value; +return Status::OK(); + } + + Status Visit(const ListScalar& left) { +const auto& right = checked_cast(right_); +result_ = internal::SharedPtrEquals(left.value, right.value); +return Status::OK(); + } + + Status Visit(const StructScalar& left) { +const auto& right = checked_cast(right_); + +if (right.value.size() != left.value.size()) { + result_ = false; +} else { + bool all_equals = true; + for (size_t i = 0; i < left.value.size() && all_equals; i++) { +all_equals &= internal::SharedPtrEquals(left.value[i], right.value[i]); + } + result_ = all_equals; +} + +return Status::OK(); + } + + Status Visit(const UnionScalar& left) { return Status::NotImplemented("union"); } + + Status Visit(const DictionaryScalar& left) { +return Status::NotImplemented("dictionary"); + } + + Status Visit(const ExtensionScalar& left) { +return Status::NotImplemented("extension"); + } + + bool result() const { return result_; } + + protected: + const Scalar& right_; + bool result_; +}; + } // namespace internal bool ArrayEquals(const Array& left, const Array& right) { @@ -915,4 +989,21 @@ bool TypeEquals(const DataType& left, const DataType& right, bool check_metadata return are_equal; } +bool ScalarEquals(const Scalar& left, const Scalar& right) { + bool are_equal = false; + if ( == ) { +are_equal = true; + } else if (!left.type->Equals(right.type)) { +are_equal = false; + } else if (left.is_valid != right.is_valid) { +are_equal = false; + } else { +
[arrow] branch master updated: ARROW-4870: [Ruby] Fix mys2_mingw_dependencies
This is an automated email from the ASF dual-hosted git repository. kou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 76e5425 ARROW-4870: [Ruby] Fix mys2_mingw_dependencies 76e5425 is described below commit 76e5425da7ec335c7213a0082a1872850517446d Author: dsisnero AuthorDate: Fri Mar 15 09:14:28 2019 +0900 ARROW-4870: [Ruby] Fix mys2_mingw_dependencies $ pacman -Ss arrow mingw32/mingw-w64-i686-arrow 0.11.1-1 Apache Arrow is a cross-language development platform for in-memory data (mingw-w64) mingw64/mingw-w64-x86_64-arrow 0.11.1-1 [installed] Apache Arrow is a cross-language development platform for in-memory data (mingw-w64) Author: dsisnero Closes #3881 from dsisnero/patch-1 and squashes the following commits: 1839d12a change mys2_mingw_dependencies to correct package --- ruby/red-arrow/red-arrow.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec index 121f567..9451c9c 100644 --- a/ruby/red-arrow/red-arrow.gemspec +++ b/ruby/red-arrow/red-arrow.gemspec @@ -55,5 +55,5 @@ Gem::Specification.new do |spec| spec.add_development_dependency("test-unit") spec.add_development_dependency("yard") - spec.metadata["msys2_mingw_dependencies"] = "apache-arrow" + spec.metadata["msys2_mingw_dependencies"] = "arrow" end
[arrow] branch master updated: ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 property
This is an automated email from the ASF dual-hosted git repository. shiro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 99a47ab ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 property 99a47ab is described below commit 99a47ab1f8d3a89e5f49006f072c9fba276858e1 Author: Kouhei Sutou AuthorDate: Fri Mar 15 09:03:11 2019 +0900 ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 property Author: Kouhei Sutou Closes #3894 from kou/glib-cast-options-allow-invalid-utf8 and squashes the following commits: 9fc06744 Add GArrowCastOptions::allow-invalid-utf8 property --- c_glib/arrow-glib/compute.cpp | 24 +++- c_glib/test/test-cast.rb | 17 + 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 2039eea..a9f6721 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -42,7 +42,8 @@ enum { PROP_0, PROP_ALLOW_INT_OVERFLOW, PROP_ALLOW_TIME_TRUNCATE, - PROP_ALLOW_FLOAT_TRUNCATE + PROP_ALLOW_FLOAT_TRUNCATE, + PROP_ALLOW_INVALID_UTF8, }; G_DEFINE_TYPE_WITH_PRIVATE(GArrowCastOptions, @@ -72,6 +73,9 @@ garrow_cast_options_set_property(GObject *object, case PROP_ALLOW_FLOAT_TRUNCATE: priv->options.allow_float_truncate = g_value_get_boolean(value); break; + case PROP_ALLOW_INVALID_UTF8: +priv->options.allow_invalid_utf8 = g_value_get_boolean(value); +break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; @@ -96,6 +100,9 @@ garrow_cast_options_get_property(GObject *object, case PROP_ALLOW_FLOAT_TRUNCATE: g_value_set_boolean(value, priv->options.allow_float_truncate); break; + case PROP_ALLOW_INVALID_UTF8: +g_value_set_boolean(value, priv->options.allow_invalid_utf8); +break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; @@ -157,6 +164,20 @@ garrow_cast_options_class_init(GArrowCastOptionsClass *klass) FALSE, static_cast(G_PARAM_READWRITE)); g_object_class_install_property(gobject_class, PROP_ALLOW_FLOAT_TRUNCATE, spec); + + /** + * GArrowCastOptions:allow-invalid-utf8: + * + * Whether invalid UTF-8 string value is allowed or not. + * + * Since: 0.13.0 + */ + spec = g_param_spec_boolean("allow-invalid-utf8", + "Allow invalid UTF-8", + "Whether invalid UTF-8 string value is allowed or not", + FALSE, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ALLOW_INVALID_UTF8, spec); } /** @@ -183,6 +204,7 @@ garrow_cast_options_new_raw(arrow::compute::CastOptions *arrow_cast_options) "allow-int-overflow", arrow_cast_options->allow_int_overflow, "allow-time-truncate", arrow_cast_options->allow_time_truncate, "allow-float-truncate", arrow_cast_options->allow_float_truncate, + "allow-invalid-utf8", arrow_cast_options->allow_invalid_utf8, NULL); return GARROW_CAST_OPTIONS(cast_options); } diff --git a/c_glib/test/test-cast.rb b/c_glib/test/test-cast.rb index 2512e05..f9d406c 100644 --- a/c_glib/test/test-cast.rb +++ b/c_glib/test/test-cast.rb @@ -82,4 +82,21 @@ class TestCast < Test::Unit::TestCase build_float_array([1.1]).cast(int8_data_type, options)) end end + + sub_test_case("allow-invalid-utf8") do +def test_default + require_gi(1, 42, 0) + assert_raise(Arrow::Error::Invalid) do +build_binary_array(["\xff"]).cast(Arrow::StringDataType.new) + end +end + +def test_true + options = Arrow::CastOptions.new + options.allow_invalid_utf8 = true + string_data_type = Arrow::StringDataType.new + assert_equal(build_string_array(["\xff"]), + build_binary_array(["\xff"]).cast(string_data_type, options)) +end + end end
[arrow] branch master updated: ARROW-4866: [C++] Fix zstd_ep build for Debug, static CRT builds. Add separate CMake variable for propagating compiler toolchain to ExternalProjects
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 431fc13 ARROW-4866: [C++] Fix zstd_ep build for Debug, static CRT builds. Add separate CMake variable for propagating compiler toolchain to ExternalProjects 431fc13 is described below commit 431fc13011cd959ecd3ea57b960436e960256f91 Author: Wes McKinney AuthorDate: Thu Mar 14 17:05:43 2019 -0500 ARROW-4866: [C++] Fix zstd_ep build for Debug, static CRT builds. Add separate CMake variable for propagating compiler toolchain to ExternalProjects zstd sets a bunch of its own compilation flags and they conflict with the CFLAGS/CXXFLAGS that are part of EP_COMMON_CMAKE_ARGS Author: Wes McKinney Closes #3905 from wesm/ARROW-4866 and squashes the following commits: de7c0ae6 Pass EP compiler toolchain as separate variable. Do not override CFLAGS and CXXFLAGS for zstd_ep --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 28 ++-- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index baefc6b..89fd7a6 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -350,30 +350,30 @@ endif() # directory. This leads to issues if the variables are exported in a subshell # and the invocation of make/ninja is in distinct subshell without the same # environment (CC/CXX). -set(EP_COMMON_CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) +set(EP_COMMON_TOOLCHAIN -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} +-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) + +if (CMAKE_AR) + set(EP_COMMON_TOOLCHAIN ${EP_COMMON_TOOLCHAIN} + -DCMAKE_AR=${CMAKE_AR}) +endif() + +if (CMAKE_RANLIB) + set(EP_COMMON_TOOLCHAIN ${EP_COMMON_TOOLCHAIN} + -DCMAKE_RANLIB=${CMAKE_RANLIB}) +endif() # External projects are still able to override the following declarations. # cmake command line will favor the last defined variable when a duplicate is # encountered. This requires that `EP_COMMON_CMAKE_ARGS` is always the first # argument. -set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} +set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_TOOLCHAIN} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_C_FLAGS=${EP_C_FLAGS} -DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS} -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS} -DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}) -if (CMAKE_AR) - set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} - -DCMAKE_AR=${CMAKE_AR}) -endif() - -if (CMAKE_RANLIB) - set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} - -DCMAKE_RANLIB=${CMAKE_RANLIB}) -endif() - if (NOT ARROW_VERBOSE_THIRDPARTY_BUILD) set(EP_LOG_OPTIONS LOG_CONFIGURE 1 @@ -1438,7 +1438,7 @@ macro(build_zstd) message(STATUS "Building zstd from source") set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install") - set(ZSTD_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} + set(ZSTD_CMAKE_ARGS ${EP_COMMON_TOOLCHAIN} "-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}" -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}
[arrow] branch master updated: ARROW-4705: [Rust] Improve error handling in csv reader
This is an automated email from the ASF dual-hosted git repository. sunchao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 74436f0 ARROW-4705: [Rust] Improve error handling in csv reader 74436f0 is described below commit 74436f07c482c0d46a8a52c2b50dc9ad1191922a Author: Andy Grove AuthorDate: Thu Mar 14 14:11:40 2019 -0700 ARROW-4705: [Rust] Improve error handling in csv reader Author: Andy Grove Closes #3895 from andygrove/ARROW-4705 and squashes the following commits: b32f4a8e Address PR feedback 00d6b3b4 start line number at 1 if csv has header row 8c9fca00 Improve error handling in csv reader --- rust/arrow/Cargo.toml | 2 +- rust/arrow/src/csv/reader.rs | 51 ++ rust/arrow/test/data/various_types_invalid.csv | 6 +++ 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index fbc9be0..5e96e0d 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -41,7 +41,7 @@ serde_derive = "1.0.80" serde_json = { version = "1.0.13", features = ["preserve_order"] } indexmap = "1.0" rand = "0.5" -csv = "1.0.0" +csv = "1.0" num = "0.2" regex = "1.1" lazy_static = "1.2" diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs index 85b2ccd..ffeffdd 100644 --- a/rust/arrow/src/csv/reader.rs +++ b/rust/arrow/src/csv/reader.rs @@ -191,6 +191,8 @@ pub struct Reader { record_iter: StringRecordsIntoIter>, /// Batch size (number of records to load each time) batch_size: usize, +/// Current line number, used in error reporting +line_number: usize, } impl Reader { @@ -235,6 +237,7 @@ impl Reader { projection, record_iter, batch_size, +line_number: if has_headers { 1 } else { 0 }, } } @@ -242,15 +245,17 @@ impl Reader { pub fn next( self) -> Result> { // read a batch of rows into memory let mut rows: Vec = Vec::with_capacity(self.batch_size); -for _ in 0..self.batch_size { +for i in 0..self.batch_size { match self.record_iter.next() { Some(Ok(r)) => { rows.push(r); } -Some(Err(_)) => { -return Err(ArrowError::ParseError( -"Error reading CSV file".to_string(), -)); +Some(Err(e)) => { +return Err(ArrowError::ParseError(format!( +"Error parsing line {}: {:?}", +self.line_number + i, +e +))); } None => break, } @@ -319,6 +324,8 @@ impl Reader { }) .collect(); +self.line_number += rows.len(); + let schema_fields = self.schema.fields(); let projected_fields: Vec = projection @@ -358,8 +365,9 @@ impl Reader { Err(_) => { // TODO: we should surface the underlying error here. return Err(ArrowError::ParseError(format!( -"Error while parsing value {}", -s +"Error while parsing value {} at line {}", +s, +self.line_number + row_index ))); } } @@ -503,6 +511,7 @@ impl ReaderBuilder { projection: self.projection.clone(), record_iter, batch_size: self.batch_size, +line_number: if self.has_headers { 1 } else { 0 }, }) } } @@ -718,4 +727,32 @@ mod tests { assert_eq!(false, batch.column(1).is_null(3)); assert_eq!(false, batch.column(1).is_null(4)); } + +#[test] +fn test_parse_invalid_csv() { +let file = File::open("test/data/various_types_invalid.csv").unwrap(); + +let schema = Schema::new(vec![ +Field::new("c_int", DataType::UInt64, false), +Field::new("c_float", DataType::Float32, false), +Field::new("c_string", DataType::Utf8, false), +Field::new("c_bool", DataType::Boolean, false), +]); + +let builder = ReaderBuilder::new() +.with_schema(Arc::new(schema)) +.has_headers(true) +.with_delimiter(b'|') +.with_batch_size(512) +.with_projection(vec![0, 1, 2, 3]); + +let mut csv = builder.build(file).unwrap(); +match csv.next() { +Err(e) => assert_eq!( +"ParseError(\"Error while parsing value 4.x4 at line 4\")", +
[arrow] branch master updated: ARROW-4859: [GLib] Add garrow_numeric_array_mean()
This is an automated email from the ASF dual-hosted git repository. kou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new dcee4ad ARROW-4859: [GLib] Add garrow_numeric_array_mean() dcee4ad is described below commit dcee4ad1c5f62df0265e26f59ec58ed85597ffa2 Author: Yosuke Shiro AuthorDate: Fri Mar 15 05:36:09 2019 +0900 ARROW-4859: [GLib] Add garrow_numeric_array_mean() Author: Yosuke Shiro Closes #3889 from shiro615/glib-mean and squashes the following commits: 1d2abbf5 Use assert_in_delta for floating point number e4be5213 Use 0.0 for double dfff46be Add garrow_numeric_array_mean() --- c_glib/arrow-glib/basic-array.cpp | 27 +++ c_glib/arrow-glib/basic-array.h | 4 c_glib/test/test-numeric-array.rb | 26 ++ cpp/src/arrow/compute/api.h | 1 + 4 files changed, 58 insertions(+) diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp index 7409945..c201f9d 100644 --- a/c_glib/arrow-glib/basic-array.cpp +++ b/c_glib/arrow-glib/basic-array.cpp @@ -939,6 +939,33 @@ garrow_numeric_array_class_init(GArrowNumericArrayClass *klass) { } +/** + * garrow_numeric_array_mean: + * @array: A #GArrowNumericArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed mean. + * + * Since: 0.13.0 + */ +gdouble +garrow_numeric_array_mean(GArrowNumericArray *array, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto memory_pool = arrow::default_memory_pool(); + arrow::compute::FunctionContext context(memory_pool); + arrow::compute::Datum mean_datum; + auto status = arrow::compute::Mean(, arrow_array, _datum); + if (garrow_error_check(error, status, "[numeric-array][mean]")) { +using ScalarType = typename arrow::TypeTraits::ScalarType; +auto arrow_numeric_scalar = std::dynamic_pointer_cast(mean_datum.scalar()); +return arrow_numeric_scalar->value; + } else { +return 0.0; + } +} + G_DEFINE_TYPE(GArrowInt8Array, garrow_int8_array, diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h index 592699d..1dde2f2 100644 --- a/c_glib/arrow-glib/basic-array.h +++ b/c_glib/arrow-glib/basic-array.h @@ -212,6 +212,10 @@ struct _GArrowNumericArrayClass GArrowPrimitiveArrayClass parent_class; }; +GARROW_AVAILABLE_IN_0_13 +gdouble garrow_numeric_array_mean(GArrowNumericArray *array, + GError **error); + #define GARROW_TYPE_INT8_ARRAY (garrow_int8_array_get_type()) G_DECLARE_DERIVABLE_TYPE(GArrowInt8Array, garrow_int8_array, diff --git a/c_glib/test/test-numeric-array.rb b/c_glib/test/test-numeric-array.rb new file mode 100644 index 000..d919d59 --- /dev/null +++ b/c_glib/test/test-numeric-array.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestNumericArray < Test::Unit::TestCase + include Helper::Buildable + + def test_mean +array = build_double_array([1.1, 2.2, nil]) +assert_in_delta(array.values.sum / 2, +array.mean) + end +end diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h index cd5f11e..42839ce 100644 --- a/cpp/src/arrow/compute/api.h +++ b/cpp/src/arrow/compute/api.h @@ -24,5 +24,6 @@ #include "arrow/compute/kernels/boolean.h" // IWYU pragma: export #include "arrow/compute/kernels/cast.h" // IWYU pragma: export #include "arrow/compute/kernels/hash.h" // IWYU pragma: export +#include "arrow/compute/kernels/mean.h" // IWYU pragma: export #endif // ARROW_COMPUTE_API_H
[arrow] branch master updated: ARROW-4251: [C++][Release] Add option to set ARROW_BOOST_VENDORED environment variable in verify-release-candidate.sh
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 954e3f4 ARROW-4251: [C++][Release] Add option to set ARROW_BOOST_VENDORED environment variable in verify-release-candidate.sh 954e3f4 is described below commit 954e3f44c8753e548c7b24f2269135716a2429cd Author: Wes McKinney AuthorDate: Thu Mar 14 14:30:38 2019 -0500 ARROW-4251: [C++][Release] Add option to set ARROW_BOOST_VENDORED environment variable in verify-release-candidate.sh I'm taking this for a spin on 0.12.1 RC0 on Ubuntu 14.04 (where the system boost does not work -- see ARROW-4868) Author: Wes McKinney Closes #3903 from wesm/ARROW-4251 and squashes the following commits: eb38b0f68 Add option to set ARROW_BOOST_VENDORED environment variable in dev/release/verify-release-candidate.sh --- dev/release/verify-release-candidate.sh | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index e96cd28..8d4cc52 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -26,7 +26,8 @@ # - nodejs >= 6.0.0 (best way is to use nvm) # # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to -# LD_LIBRARY_PATH +# LD_LIBRARY_PATH. If your system Boost is too old for the C++ libraries, then +# set $ARROW_BOOST_VENDORED to "ON" or "1" case $# in 3) ARTIFACT="$1" @@ -49,6 +50,8 @@ set -o pipefail HERE=$(cd `dirname "${BASH_SOURCE[0]:-$0}"` && pwd) +ARROW_BOOST_VENDORED=${ARROW_BOOST_VENDORED:=OFF} + ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' detect_cuda() { @@ -207,6 +210,7 @@ ${ARROW_CMAKE_OPTIONS} -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_BOOST_USE_SHARED=ON +-DARROW_BOOST_VENDORED=$ARROW_BOOST_VENDORED -DCMAKE_BUILD_TYPE=release -DARROW_BUILD_TESTS=ON -DARROW_CUDA=${ARROW_CUDA}
[arrow] branch master updated: ARROW-4486: [Python][CUDA] Add base argument to foreign_buffer
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 2b574f9 ARROW-4486: [Python][CUDA] Add base argument to foreign_buffer 2b574f9 is described below commit 2b574f913784e77cc79ec7d002c94b1ae51116f1 Author: Pearu Peterson AuthorDate: Thu Mar 14 09:47:20 2019 -0500 ARROW-4486: [Python][CUDA] Add base argument to foreign_buffer Resolves [ARROW-4486](https://issues.apache.org/jira/browse/ARROW-4486) Author: Pearu Peterson Closes #3850 from pearu/arrow-4486 and squashes the following commits: e96265df0 Add base argument to foreign_buffer, resolves ARROW-4486 --- python/pyarrow/_cuda.pxd | 5 - python/pyarrow/_cuda.pyx | 26 +++--- python/pyarrow/tests/test_cuda.py | 27 +++ 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/python/pyarrow/_cuda.pxd b/python/pyarrow/_cuda.pxd index 3b8d966..1180601 100644 --- a/python/pyarrow/_cuda.pxd +++ b/python/pyarrow/_cuda.pxd @@ -41,8 +41,11 @@ cdef class IpcMemHandle: cdef class CudaBuffer(Buffer): cdef: shared_ptr[CCudaBuffer] cuda_buffer +object base -cdef void init_cuda(self, const shared_ptr[CCudaBuffer]& buffer) +cdef void init_cuda(self, +const shared_ptr[CCudaBuffer]& buffer, +object base) cdef class HostBuffer(Buffer): diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx index fa84fc6..87be0e6 100644 --- a/python/pyarrow/_cuda.pyx +++ b/python/pyarrow/_cuda.pyx @@ -190,7 +190,7 @@ cdef class Context: check_status(self.context.get().Allocate(nbytes, )) return pyarrow_wrap_cudabuffer(cudabuf) -def foreign_buffer(self, address, size): +def foreign_buffer(self, address, size, base=None): """Create device buffer from address and size as a view. The caller is responsible for allocating and freeing the @@ -206,6 +206,8 @@ cdef class Context: `get_device_address` method. size : int Specify the size of device buffer in bytes. +base : {None, object} + Specify object that owns the referenced memory. Returns --- @@ -222,7 +224,7 @@ cdef class Context: check_status(self.context.get().View(c_addr, c_size, )) -return pyarrow_wrap_cudabuffer(cudabuf) +return pyarrow_wrap_cudabuffer_base(cudabuf, base) def open_ipc_buffer(self, ipc_handle): """ Open existing CUDA IPC memory handle @@ -309,7 +311,7 @@ cdef class Context: """ if isinstance(obj, HostBuffer): -return self.foreign_buffer(obj.address, obj.size) +return self.foreign_buffer(obj.address, obj.size, base=obj) elif isinstance(obj, Buffer): return CudaBuffer.from_buffer(obj) elif isinstance(obj, CudaBuffer): @@ -323,7 +325,7 @@ cdef class Context: start, end = get_contiguous_span( desc['shape'], desc.get('strides'), np.dtype(desc['typestr']).itemsize) -return self.foreign_buffer(addr + start, end - start) +return self.foreign_buffer(addr + start, end - start, base=obj) raise ArrowTypeError('cannot create device buffer view from' ' `%s` object' % (type(obj))) @@ -387,9 +389,12 @@ cdef class CudaBuffer(Buffer): "`.device_buffer`" " method instead.") -cdef void init_cuda(self, const shared_ptr[CCudaBuffer]& buffer): +cdef void init_cuda(self, +const shared_ptr[CCudaBuffer]& buffer, +object base): self.cuda_buffer = buffer self.init( buffer) +self.base = base @staticmethod def from_buffer(buf): @@ -426,7 +431,7 @@ cdef class CudaBuffer(Buffer): ctx = Context.from_numba(mem.context) if mem.device_pointer.value is None and mem.size==0: return ctx.new_buffer(0) -return ctx.foreign_buffer(mem.device_pointer.value, mem.size) +return ctx.foreign_buffer(mem.device_pointer.value, mem.size, base=mem) def to_numba(self): """Return numba memory pointer of CudaBuffer instance. @@ -949,9 +954,16 @@ cdef public api bint pyarrow_is_cudabuffer(object buffer): cdef public api object \ +pyarrow_wrap_cudabuffer_base(const shared_ptr[CCudaBuffer]& buf, base): +cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer) +result.init_cuda(buf, base) +return result + + +cdef public api object \ pyarrow_wrap_cudabuffer(const shared_ptr[CCudaBuffer]& buf): cdef
[arrow] branch master updated: [C++/Python/CI] Use more recent docker-compose in manylinux1 jobs [skip ci] (#3897)
This is an automated email from the ASF dual-hosted git repository. kszucs pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 3e6d968 [C++/Python/CI] Use more recent docker-compose in manylinux1 jobs [skip ci] (#3897) 3e6d968 is described below commit 3e6d968db7262bc2924cea6d248b2df572485f91 Author: Uwe L. Korn AuthorDate: Thu Mar 14 14:01:27 2019 +0100 [C++/Python/CI] Use more recent docker-compose in manylinux1 jobs [skip ci] (#3897) --- dev/tasks/python-wheels/travis.linux.yml | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dev/tasks/python-wheels/travis.linux.yml b/dev/tasks/python-wheels/travis.linux.yml index 64b9e7e..cccb30b 100644 --- a/dev/tasks/python-wheels/travis.linux.yml +++ b/dev/tasks/python-wheels/travis.linux.yml @@ -21,6 +21,12 @@ language: cpp services: - docker +# Update docker to support newer docker-compose versions +addons: + apt: +packages: + - docker-ce + # don't build twice if: tag IS blank @@ -30,7 +36,7 @@ env: - TRAVIS_TAG={{ task.tag }} before_script: - - docker pull quay.io/xhochy/arrow_manylinux1_x86_64_base:latest + - sudo pip install -U docker-compose script: - git clone -b {{ arrow.branch }} {{ arrow.remote }} arrow
[arrow] branch master updated: ARROW-3364: [Docs] Add docker-compose integration documentation
This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 9198f63 ARROW-3364: [Docs] Add docker-compose integration documentation 9198f63 is described below commit 9198f630663e7f8ddaecbd4d9f037f61e5b450d3 Author: François Saint-Jacques AuthorDate: Thu Mar 14 07:54:56 2019 -0500 ARROW-3364: [Docs] Add docker-compose integration documentation Author: François Saint-Jacques Closes #3882 from fsaintjacques/ARROW-3364-docker-compose-documentation and squashes the following commits: 08f91ff35 Simplify makefile 595d50ac0 Refactor ambiguous makefile run target 9d90bbaf8 ARROW-3364: Add docker-compose integration documentation --- Makefile.docker| 92 -- .../{building.rst => developers/documentation.rst} | 5 +- docs/source/{ => developers}/index.rst | 31 +--- docs/source/developers/integration.rst | 67 docs/source/index.rst | 4 +- 5 files changed, 109 insertions(+), 90 deletions(-) diff --git a/Makefile.docker b/Makefile.docker index dc1f0bf..8e7d861 100644 --- a/Makefile.docker +++ b/Makefile.docker @@ -16,70 +16,48 @@ # under the License. # build docker compose images: +# $ make -f Makefile.docker build-cpp +# To run the test suite # $ make -f Makefile.docker cpp -# run the built image: -# $ make -f Makefile.docker run cpp -.PHONY: clean run cpp cpp-alpine go js java rust r +LANGUAGES = cpp cpp-alpine cpp-cmake32 c_glib go java js python python-alpine rust r +MISC = lint iwyu clang-format docs pandas-master +SERVERS = dask hdfs-integration spark-integration + +# declare images dependencies +DEPENDS_ON_CPP = build-c_glib build-python build-r +DEPENDS_ON_CPP_ALPINE = build-python-alpine +DEPENDS_ON_PYTHON = build-lint build-docs build-dask build-hdfs-integration build-spark-integration +DEPENDS_ON_LINT = build-iwyu build-clang-format + +SERVICES = $(LANGUAGES) $(MISC) $(SERVERS) +.PHONY: clean build-% run-% $(SERVICES) DC := docker-compose clean: $(DC) down -v -run: - $(DC) run --rm $(filter-out $@,$(MAKECMDGOALS)) - -go: - $(DC) build go - -js: - $(DC) build js - -java: - $(DC) build java - -rust: - $(DC) build rust - -cpp: - $(DC) build cpp - -cpp-alpine: - $(DC) build cpp-alpine - -cpp-cmake32: - $(DC) build cpp-cmake32 - -c_glib: cpp - $(DC) build c_glib - -r: cpp - $(DC) build r - -python: cpp - $(DC) build python - -python-alpine: cpp-alpine - $(DC) build python-alpine - -lint: python - $(DC) build lint - -iwyu: lint - -clang-format: lint - -docs: python - -dask: python - $(DC) build dask - -hdfs: python - $(DC) build hdfs-integration +# Default build target if no dependencies +build-%: + $(DC) build $* + +# The following targets create the dependencies of the form `build-X: build-Y` +$(DEPENDS_ON_CPP): build-%: build-cpp + $(DC) build $* +$(DEPENDS_ON_CPP_ALPINE): build-%: build-cpp-alpine + $(DC) build $* +$(DEPENDS_ON_PYTHON): build-%: build-python + $(DC) build $* +# The dependents of lint image don't build anything +$(DEPENDS_ON_LINT): build-%: build-lint + +# panda master is a special case due to --no-cache +build-pandas-master: build-python + $(DC) build --no-cache pandas-master -spark: python - $(DC) build spark-integration +run-%: build-% + $(DC) run --rm $* -pandas-master: python - $(DC) build --no-cache pandas-master +# Trick to get `service` expand to `run-service` +$(SERVICES): % : run-% diff --git a/docs/source/building.rst b/docs/source/developers/documentation.rst similarity index 96% rename from docs/source/building.rst rename to docs/source/developers/documentation.rst index 2239a19..1fbab43 100644 --- a/docs/source/building.rst +++ b/docs/source/developers/documentation.rst @@ -90,9 +90,6 @@ You can use Docker to build the documentation: .. code-block:: shell - docker-compose build cpp - docker-compose build python - docker-compose build docs - docker-compose run docs + make -f Makefile.docker docs The final output is located under ``docs/_build/html``. diff --git a/docs/source/index.rst b/docs/source/developers/index.rst similarity index 56% copy from docs/source/index.rst copy to docs/source/developers/index.rst index 2b367b3..e99f7c5 100644 --- a/docs/source/index.rst +++ b/docs/source/developers/index.rst @@ -15,34 +15,11 @@ .. specific language governing permissions and limitations .. under the License. -Apache Arrow - - -Apache Arrow is a cross-language development platform for in-memory data. It -specifies a standardized language-independent columnar memory format for flat -and hierarchical data, organized for
[arrow] branch master updated: ARROW-4743: [Java] Add javadoc missing in classes and methods in java…
This is an automated email from the ASF dual-hosted git repository. ravindra pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new d2e1ee9 ARROW-4743: [Java] Add javadoc missing in classes and methods in java… d2e1ee9 is described below commit d2e1ee9edd70c05145adb0f4fc0da38de3be4e6d Author: Micah Kornfield AuthorDate: Thu Mar 14 11:34:22 2019 +0530 ARROW-4743: [Java] Add javadoc missing in classes and methods in java… This is the result finding errors after applying the following diff to suppressions.xml (note for some reason this only seems to highlight method level documentation issue not class ones, if anyone can point out the right configuration knob to automatically detect missing class docs as well I would appreciate it): --- a/java/dev/checkstyle/suppressions.xml +++ b/java/dev/checkstyle/suppressions.xml @@ -24,17 +24,12 @@ - - - + - - Author: Micah Kornfield Closes #3792 from emkornfield/javadoc and squashes the following commits: d01c123c Address code review feedback a64c11ec ARROW-4743: Add javadoc missing in classes and methods in java/memory --- .../src/main/java/io/netty/buffer/ArrowBuf.java| 42 ++ .../io/netty/buffer/MutableWrappedByteBuf.java | 5 ++- .../io/netty/buffer/PooledByteBufAllocatorL.java | 3 ++ .../org/apache/arrow/memory/BaseAllocator.java | 16 + .../org/apache/arrow/memory/BoundsChecking.java| 7 .../apache/arrow/memory/OutOfMemoryException.java | 8 - .../org/apache/arrow/memory/RootAllocator.java | 5 +-- .../test/java/io/netty/buffer/TestArrowBuf.java| 3 +- 8 files changed, 82 insertions(+), 7 deletions(-) diff --git a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java index 607f8ca..bb6a940 100644 --- a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java +++ b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java @@ -41,6 +41,18 @@ import org.apache.arrow.util.Preconditions; import io.netty.util.internal.PlatformDependent; +/** + * ArrowBuf is the abstraction around raw byte arrays that + * comprise arrow data structures. + * + * + * Specifically, it serves as a facade over + * {@linkplain UnsafeDirectLittleEndian} memory objects that hides the details + * of raw memory addresses. + * + * ArrowBuf supports reference counting and ledgering to closely track where + * memory is being used. + */ public final class ArrowBuf extends AbstractByteBuf implements AutoCloseable { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ArrowBuf.class); @@ -60,6 +72,17 @@ public final class ArrowBuf extends AbstractByteBuf implements AutoCloseable { new HistoricalLog(BaseAllocator.DEBUG_LOG_LENGTH, "ArrowBuf[%d]", id) : null; private volatile int length; + /** + * Constructs a new ArrowBuf + * @param refCnt The atomic integer to use for reference counting this buffer. + * @param ledger The ledger to use for tracking memory usage of this buffer. + * @param byteBuf The underlying storage for this buffer. + * @param manager The manager that handles replacing this buffer. + * @param alloc The allocator for the buffer (needed for superclass compatibility) + * @param offset The byte offset into byteBuf this buffer starts at. + * @param length The byte length of this buffer + * @param isEmpty Indicates if this buffer is empty which enables some optimizations. + */ public ArrowBuf( final AtomicInteger refCnt, final BufferLedger ledger, @@ -69,6 +92,7 @@ public final class ArrowBuf extends AbstractByteBuf implements AutoCloseable { final int offset, final int length, boolean isEmpty) { +// TODO(emkornfield): Should this be byteBuf.maxCapacity - offset? super(byteBuf.maxCapacity()); this.refCnt = refCnt; this.udle = byteBuf; @@ -86,6 +110,7 @@ public final class ArrowBuf extends AbstractByteBuf implements AutoCloseable { } + /** Returns a debug friendly string for the given ByteBuf. */ public static String bufferState(final ByteBuf buf) { final int cap = buf.capacity(); final int mcap = buf.maxCapacity(); @@ -97,6 +122,10 @@ public final class ArrowBuf extends AbstractByteBuf implements AutoCloseable { cap, mcap, ri, rb, wi, wb); } + /** + * Returns this if size is less then {@link #capacity()}, otherwise + * delegates to {@link BufferManager#replace(ArrowBuf, int)} to get a new buffer. + */ public ArrowBuf reallocIfNeeded(final int size) { Preconditions.checkArgument(size >= 0, "reallocation size must be non-negative"); @@ -928,6 +957,10 @@ public final class ArrowBuf extends
[arrow] branch master updated: ARROW-4206: [Gandiva] support decimal divide and mod
This is an automated email from the ASF dual-hosted git repository. ravindra pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/master by this push: new 31aa19d ARROW-4206: [Gandiva] support decimal divide and mod 31aa19d is described below commit 31aa19da25352d5f6abdf3264d57306c3b52bb05 Author: Pindikura Ravindra AuthorDate: Thu Mar 14 11:29:42 2019 +0530 ARROW-4206: [Gandiva] support decimal divide and mod Author: Pindikura Ravindra Closes #3813 from pravindra/gdivmod and squashes the following commits: 96ef4054 ARROW-4206: add global symbol for new fns a9ad13fe ARROW-4206: Add more tests/comments 697c2343 ARROW-4206: Fix build errors 267f117e ARROW-4206: support decimal divide and mod --- cpp/src/arrow/util/basic_decimal.h | 3 + cpp/src/arrow/util/decimal-test.cc | 115 +- cpp/src/arrow/util/decimal.h| 5 + cpp/src/gandiva/decimal_ir.cc | 58 + cpp/src/gandiva/decimal_ir.h| 4 + cpp/src/gandiva/decimal_xlarge.cc | 121 -- cpp/src/gandiva/decimal_xlarge.h| 8 + cpp/src/gandiva/function_registry_arithmetic.cc | 2 + cpp/src/gandiva/precompiled/CMakeLists.txt | 7 +- cpp/src/gandiva/precompiled/decimal_ops.cc | 84 +++ cpp/src/gandiva/precompiled/decimal_ops.h | 10 + cpp/src/gandiva/precompiled/decimal_ops_test.cc | 281 cpp/src/gandiva/precompiled/decimal_wrapper.cc | 34 +++ cpp/src/gandiva/tests/decimal_single_test.cc| 33 ++- 14 files changed, 698 insertions(+), 67 deletions(-) diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h index 7929b11..2e5857c 100644 --- a/cpp/src/arrow/util/basic_decimal.h +++ b/cpp/src/arrow/util/basic_decimal.h @@ -138,6 +138,9 @@ class ARROW_EXPORT BasicDecimal128 { /// - If 'round' is false, the right-most digits are simply dropped. BasicDecimal128 ReduceScaleBy(int32_t reduce_by, bool round = true) const; + // returns 1 for positive and zero decimal values, -1 for negative decimal values. + inline int64_t Sign() const { return 1 | (high_bits_ >> 63); } + /// \brief count the number of leading binary zeroes. int32_t CountLeadingBinaryZeros() const; diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc index db4d35f..4ba7d7f 100644 --- a/cpp/src/arrow/util/decimal-test.cc +++ b/cpp/src/arrow/util/decimal-test.cc @@ -23,12 +23,16 @@ #include #include +#include #include "arrow/status.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" #include "arrow/util/decimal.h" #include "arrow/util/macros.h" +using boost::multiprecision::int128_t; + namespace arrow { class DecimalTestFixture : public ::testing::Test { @@ -466,20 +470,113 @@ TEST(Decimal128Test, TestToInteger) { ASSERT_RAISES(Invalid, invalid_int64.ToInteger()); } +template +std::vector GetRandomNumbers(int32_t size) { + auto rand = random::RandomArrayGenerator(0x5487655); + auto x_array = rand.Numeric(size, 0, std::numeric_limits::max(), 0); + + auto x_ptr = x_array->data()->template GetValues(1); + std::vector ret; + for (int i = 0; i < size; ++i) { +ret.push_back(x_ptr[i]); + } + return ret; +} + TEST(Decimal128Test, Multiply) { - Decimal128 result; + ASSERT_EQ(Decimal128(60501), Decimal128(301) * Decimal128(201)); + + ASSERT_EQ(Decimal128(-60501), Decimal128(-301) * Decimal128(201)); + + ASSERT_EQ(Decimal128(-60501), Decimal128(301) * Decimal128(-201)); + + ASSERT_EQ(Decimal128(60501), Decimal128(-301) * Decimal128(-201)); + + // Test some random numbers. + for (auto x : GetRandomNumbers(16)) { +for (auto y : GetRandomNumbers(16)) { + Decimal128 result = Decimal128(x) * Decimal128(y); + ASSERT_EQ(Decimal128(static_cast(x) * y), result) + << " x: " << x << " y: " << y; +} + } + + // Test some edge cases + for (auto x : std::vector{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) { +for (auto y : + std::vector{-INT32_MAX, -32, -2, -1, 0, 1, 2, 32, INT32_MAX}) { + Decimal128 result = Decimal128(x.str()) * Decimal128(y.str()); + ASSERT_EQ(Decimal128((x * y).str()), result) << " x: " << x << " y: " << y; +} + } +} + +TEST(Decimal128Test, Divide) { + ASSERT_EQ(Decimal128(66), Decimal128(20100) / Decimal128(301)); + + ASSERT_EQ(Decimal128(-66), Decimal128(-20100) / Decimal128(301)); + + ASSERT_EQ(Decimal128(-66), Decimal128(20100) / Decimal128(-301)); - result = Decimal128("301") * Decimal128("201"); - ASSERT_EQ(result.ToIntegerString(), "60501"); + ASSERT_EQ(Decimal128(66), Decimal128(-20100) / Decimal128(-301)); - result = Decimal128("-301") * Decimal128("201"); - ASSERT_EQ(result.ToIntegerString(), "-60501"); + // Test some random numbers. +