[arrow] branch new_divide created (now 12a24e9)

2019-03-14 Thread paddyhoran
This is an automated email from the ASF dual-hosted git repository.

paddyhoran pushed a change to branch new_divide
in repository https://gitbox.apache.org/repos/asf/arrow.git.


  at 12a24e9  A clean build

This branch includes the following new commits:

 new 12a24e9  A clean build

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[arrow] 01/01: A clean build

2019-03-14 Thread paddyhoran
This is an automated email from the ASF dual-hosted git repository.

paddyhoran pushed a commit to branch new_divide
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 12a24e917bfdac1760200f2e5f4afed54d608411
Author: Paddy Horan 
AuthorDate: Thu Mar 14 20:58:57 2019 -0400

A clean build
---
 rust/arrow/src/compute/arithmetic_kernels.rs | 73 ++--
 rust/arrow/src/compute/util.rs   | 35 +
 rust/arrow/src/datatypes.rs  | 41 
 3 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs 
b/rust/arrow/src/compute/arithmetic_kernels.rs
index 2566002..d7f0b32 100644
--- a/rust/arrow/src/compute/arithmetic_kernels.rs
+++ b/rust/arrow/src/compute/arithmetic_kernels.rs
@@ -27,13 +27,13 @@ use std::ops::{Add, Div, Mul, Sub};
 use std::slice::from_raw_parts_mut;
 use std::sync::Arc;
 
-use num::Zero;
+use num::{One, Zero};
 
 use crate::array::*;
 use crate::array_data::ArrayData;
 use crate::buffer::MutableBuffer;
 use crate::builder::PrimitiveBuilder;
-use crate::compute::util::apply_bin_op_to_option_bitmap;
+use crate::compute::util::{apply_bin_op_to_option_bitmap, is_valid};
 use crate::datatypes;
 use crate::error::{ArrowError, Result};
 
@@ -123,6 +123,68 @@ where
 Ok(PrimitiveArrayfrom(Arc::new(data)))
 }
 
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn simd_divide(
+left: ,
+right: ,
+) -> Result>
+where
+T: datatypes::ArrowNumericType,
+T::Native: One + Zero,
+T::Simd: Add
++ Sub
++ Mul
++ Div,
+{
+if left.len() != right.len() {
+return Err(ArrowError::ComputeError(
+"Cannot perform math operation on arrays of different 
length".to_string(),
+));
+}
+
+let null_bit_buffer = apply_bin_op_to_option_bitmap(
+left.data().null_bitmap(),
+right.data().null_bitmap(),
+|a, b| a & b,
+)?;
+
+let lanes = T::lanes();
+let buffer_size = left.len() * mem::size_of::();
+let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, 
false);
+
+for i in (0..left.len()).step_by(lanes) {
+let simd_right_raw_check = T::load(right.value_slice(i, lanes));
+let simd_right_check = unsafe{T::mask_select(is_valid::(, i, 
lanes, right.len()), simd_right_raw_check, T::init(T::Native::one()))};
+let is_zero = T::eq(T::init(T::Native::zero()), simd_right_check);
+if T::mask_any(is_zero) {
+return Err(ArrowError::DivideByZero);
+}
+let simd_right_raw = T::load(right.value_slice(i, lanes));
+let simd_right = unsafe{T::mask_select(is_valid::(, i, lanes, 
right.len()), simd_right_raw, T::init(T::Native::one()))};
+let simd_left = T::load(left.value_slice(i, lanes));
+let simd_result = T::bin_op(simd_left, simd_right, |a, b| a / b);
+
+let result_slice:  [T::Native] = unsafe {
+from_raw_parts_mut(
+(result.data_mut().as_mut_ptr() as *mut T::Native).offset(i as 
isize),
+lanes,
+)
+};
+T::write(simd_result, result_slice);
+}
+
+let data = ArrayData::new(
+T::get_data_type(),
+left.len(),
+None,
+null_bit_buffer,
+left.offset(),
+vec![result.freeze()],
+vec![],
+);
+Ok(PrimitiveArrayfrom(Arc::new(data)))
+}
+
 /// Perform `left + right` operation on two arrays. If either left or right 
value is null
 /// then the result is also null.
 pub fn add(
@@ -199,8 +261,13 @@ where
 + Sub
 + Mul
 + Div
-+ Zero,
++ Zero
++ One,
 {
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+return simd_divide(, );
+
+#[allow(unreachable_code)]
 math_op(left, right, |a, b| {
 if b.is_zero() {
 Err(ArrowError::DivideByZero)
diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs
index 55726b8..64f060a 100644
--- a/rust/arrow/src/compute/util.rs
+++ b/rust/arrow/src/compute/util.rs
@@ -20,6 +20,7 @@
 use crate::bitmap::Bitmap;
 use crate::buffer::Buffer;
 use crate::error::Result;
+use crate::datatypes::*;
 
 /// Applies a given binary operation, `op`, to two references to 
`Option`'s.
 ///
@@ -44,6 +45,40 @@ where
 }
 }
 
+
+pub unsafe fn is_valid(bitmap: , i: usize, lanes: usize, 
len: usize) -> T::SimdMask
+where
+T: ArrowNumericType,
+{
+
+// Validity based on the length of the Array
+let upper_bound = i + lanes;
+let mut length_based_validity  = T::new_mask(true);
+for j in upper_bound..len {
+length_based_validity = T::mask_set(length_based_validity, j - i, 
false);
+}
+
+match  {
+Some(_) => length_based_validity,
+None => length_based_validity,
+}
+
+//let length_based_validity = if upper_bound < len {
+//

[arrow] branch master updated: ARROW-4673: [C++] Implement Scalar::Equals and Datum::Equals

2019-03-14 Thread wesm
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 548e194  ARROW-4673: [C++] Implement Scalar::Equals and Datum::Equals
548e194 is described below

commit 548e1949d527717d7821a4ab2f09ff7c39882152
Author: François Saint-Jacques 
AuthorDate: Thu Mar 14 20:03:44 2019 -0500

ARROW-4673: [C++] Implement Scalar::Equals and Datum::Equals

Handy for validating kernels.

Author: François Saint-Jacques 
Author: Wes McKinney 

Closes #3875 from fsaintjacques/ARROW-4673-datum-equal and squashes the 
following commits:

3fff08785  Add common base class for some primitive scalar, a 
little DRY
093e1bd55  Fix struct Scalar warning
66cae36d8  Fix warnings.
7a7c0d6a1  ARROW-4673:  Implement Scalar::Equals 
and Datum::Equals
---
 cpp/src/arrow/compare.cc| 91 +
 cpp/src/arrow/compare.h |  6 ++
 cpp/src/arrow/compute/kernel.h  | 46 +
 cpp/src/arrow/compute/kernels/aggregate-test.cc |  4 +-
 cpp/src/arrow/scalar-test.cc| 15 
 cpp/src/arrow/scalar.cc |  9 ++-
 cpp/src/arrow/scalar.h  | 32 +++--
 cpp/src/arrow/testing/gtest_util.cc |  6 ++
 cpp/src/arrow/testing/gtest_util.h  |  8 +++
 cpp/src/arrow/type_fwd.h|  2 +
 cpp/src/arrow/util/memory.h | 12 
 cpp/src/arrow/visitor.cc| 39 +++
 cpp/src/arrow/visitor.h | 32 +
 cpp/src/arrow/visitor_inline.h  | 18 +
 14 files changed, 308 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index fcb16b5..aca6094 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -30,6 +30,7 @@
 
 #include "arrow/array.h"
 #include "arrow/buffer.h"
+#include "arrow/scalar.h"
 #include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
@@ -38,6 +39,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/memory.h"
 #include "arrow/visitor_inline.h"
 
 namespace arrow {
@@ -717,6 +719,78 @@ class TypeEqualsVisitor {
   bool result_;
 };
 
+class ScalarEqualsVisitor {
+ public:
+  explicit ScalarEqualsVisitor(const Scalar& right) : right_(right), 
result_(false) {}
+
+  Status Visit(const NullScalar& left) {
+result_ = true;
+return Status::OK();
+  }
+
+  template 
+  typename std::enable_if::value,
+  Status>::type
+  Visit(const T& left_) {
+const auto& right = checked_cast(right_);
+result_ = right.value == left_.value;
+return Status::OK();
+  }
+
+  template 
+  typename std::enable_if::value, 
Status>::type Visit(
+  const T& left_) {
+const auto& left = checked_cast(left_);
+const auto& right = checked_cast(right_);
+result_ = internal::SharedPtrEquals(left.value, right.value);
+return Status::OK();
+  }
+
+  Status Visit(const Decimal128Scalar& left) {
+const auto& right = checked_cast(right_);
+result_ = left.value == right.value;
+return Status::OK();
+  }
+
+  Status Visit(const ListScalar& left) {
+const auto& right = checked_cast(right_);
+result_ = internal::SharedPtrEquals(left.value, right.value);
+return Status::OK();
+  }
+
+  Status Visit(const StructScalar& left) {
+const auto& right = checked_cast(right_);
+
+if (right.value.size() != left.value.size()) {
+  result_ = false;
+} else {
+  bool all_equals = true;
+  for (size_t i = 0; i < left.value.size() && all_equals; i++) {
+all_equals &= internal::SharedPtrEquals(left.value[i], right.value[i]);
+  }
+  result_ = all_equals;
+}
+
+return Status::OK();
+  }
+
+  Status Visit(const UnionScalar& left) { return 
Status::NotImplemented("union"); }
+
+  Status Visit(const DictionaryScalar& left) {
+return Status::NotImplemented("dictionary");
+  }
+
+  Status Visit(const ExtensionScalar& left) {
+return Status::NotImplemented("extension");
+  }
+
+  bool result() const { return result_; }
+
+ protected:
+  const Scalar& right_;
+  bool result_;
+};
+
 }  // namespace internal
 
 bool ArrayEquals(const Array& left, const Array& right) {
@@ -915,4 +989,21 @@ bool TypeEquals(const DataType& left, const DataType& 
right, bool check_metadata
   return are_equal;
 }
 
+bool ScalarEquals(const Scalar& left, const Scalar& right) {
+  bool are_equal = false;
+  if ( == ) {
+are_equal = true;
+  } else if (!left.type->Equals(right.type)) {
+are_equal = false;
+  } else if (left.is_valid != right.is_valid) {
+are_equal = false;
+  } else {
+

[arrow] branch master updated: ARROW-4870: [Ruby] Fix mys2_mingw_dependencies

2019-03-14 Thread kou
This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 76e5425  ARROW-4870: [Ruby] Fix mys2_mingw_dependencies
76e5425 is described below

commit 76e5425da7ec335c7213a0082a1872850517446d
Author: dsisnero 
AuthorDate: Fri Mar 15 09:14:28 2019 +0900

ARROW-4870: [Ruby] Fix mys2_mingw_dependencies

$ pacman -Ss arrow
mingw32/mingw-w64-i686-arrow 0.11.1-1
Apache Arrow is a cross-language development platform for in-memory 
data (mingw-w64)
mingw64/mingw-w64-x86_64-arrow 0.11.1-1 [installed]
Apache Arrow is a cross-language development platform for in-memory 
data (mingw-w64)

Author: dsisnero 

Closes #3881 from dsisnero/patch-1 and squashes the following commits:

1839d12a  change mys2_mingw_dependencies to correct package
---
 ruby/red-arrow/red-arrow.gemspec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index 121f567..9451c9c 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -55,5 +55,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency("test-unit")
   spec.add_development_dependency("yard")
 
-  spec.metadata["msys2_mingw_dependencies"] = "apache-arrow"
+  spec.metadata["msys2_mingw_dependencies"] = "arrow"
 end



[arrow] branch master updated: ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 property

2019-03-14 Thread shiro
This is an automated email from the ASF dual-hosted git repository.

shiro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 99a47ab  ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 
property
99a47ab is described below

commit 99a47ab1f8d3a89e5f49006f072c9fba276858e1
Author: Kouhei Sutou 
AuthorDate: Fri Mar 15 09:03:11 2019 +0900

ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 property

Author: Kouhei Sutou 

Closes #3894 from kou/glib-cast-options-allow-invalid-utf8 and squashes the 
following commits:

9fc06744   Add GArrowCastOptions::allow-invalid-utf8 property
---
 c_glib/arrow-glib/compute.cpp | 24 +++-
 c_glib/test/test-cast.rb  | 17 +
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 2039eea..a9f6721 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -42,7 +42,8 @@ enum {
   PROP_0,
   PROP_ALLOW_INT_OVERFLOW,
   PROP_ALLOW_TIME_TRUNCATE,
-  PROP_ALLOW_FLOAT_TRUNCATE
+  PROP_ALLOW_FLOAT_TRUNCATE,
+  PROP_ALLOW_INVALID_UTF8,
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowCastOptions,
@@ -72,6 +73,9 @@ garrow_cast_options_set_property(GObject *object,
   case PROP_ALLOW_FLOAT_TRUNCATE:
 priv->options.allow_float_truncate = g_value_get_boolean(value);
 break;
+  case PROP_ALLOW_INVALID_UTF8:
+priv->options.allow_invalid_utf8 = g_value_get_boolean(value);
+break;
   default:
 G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
 break;
@@ -96,6 +100,9 @@ garrow_cast_options_get_property(GObject *object,
   case PROP_ALLOW_FLOAT_TRUNCATE:
 g_value_set_boolean(value, priv->options.allow_float_truncate);
 break;
+  case PROP_ALLOW_INVALID_UTF8:
+g_value_set_boolean(value, priv->options.allow_invalid_utf8);
+break;
   default:
 G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
 break;
@@ -157,6 +164,20 @@ garrow_cast_options_class_init(GArrowCastOptionsClass 
*klass)
   FALSE,
   static_cast(G_PARAM_READWRITE));
   g_object_class_install_property(gobject_class, PROP_ALLOW_FLOAT_TRUNCATE, 
spec);
+
+  /**
+   * GArrowCastOptions:allow-invalid-utf8:
+   *
+   * Whether invalid UTF-8 string value is allowed or not.
+   *
+   * Since: 0.13.0
+   */
+  spec = g_param_spec_boolean("allow-invalid-utf8",
+  "Allow invalid UTF-8",
+  "Whether invalid UTF-8 string value is allowed 
or not",
+  FALSE,
+  static_cast(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_ALLOW_INVALID_UTF8, 
spec);
 }
 
 /**
@@ -183,6 +204,7 @@ garrow_cast_options_new_raw(arrow::compute::CastOptions 
*arrow_cast_options)
  "allow-int-overflow", arrow_cast_options->allow_int_overflow,
  "allow-time-truncate", 
arrow_cast_options->allow_time_truncate,
  "allow-float-truncate", 
arrow_cast_options->allow_float_truncate,
+ "allow-invalid-utf8", arrow_cast_options->allow_invalid_utf8,
  NULL);
   return GARROW_CAST_OPTIONS(cast_options);
 }
diff --git a/c_glib/test/test-cast.rb b/c_glib/test/test-cast.rb
index 2512e05..f9d406c 100644
--- a/c_glib/test/test-cast.rb
+++ b/c_glib/test/test-cast.rb
@@ -82,4 +82,21 @@ class TestCast < Test::Unit::TestCase
build_float_array([1.1]).cast(int8_data_type, options))
 end
   end
+
+  sub_test_case("allow-invalid-utf8") do
+def test_default
+  require_gi(1, 42, 0)
+  assert_raise(Arrow::Error::Invalid) do
+build_binary_array(["\xff"]).cast(Arrow::StringDataType.new)
+  end
+end
+
+def test_true
+  options = Arrow::CastOptions.new
+  options.allow_invalid_utf8 = true
+  string_data_type = Arrow::StringDataType.new
+  assert_equal(build_string_array(["\xff"]),
+   build_binary_array(["\xff"]).cast(string_data_type, 
options))
+end
+  end
 end



[arrow] branch master updated: ARROW-4866: [C++] Fix zstd_ep build for Debug, static CRT builds. Add separate CMake variable for propagating compiler toolchain to ExternalProjects

2019-03-14 Thread wesm
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 431fc13  ARROW-4866: [C++] Fix zstd_ep build for Debug, static CRT 
builds. Add separate CMake variable for propagating compiler toolchain to 
ExternalProjects
431fc13 is described below

commit 431fc13011cd959ecd3ea57b960436e960256f91
Author: Wes McKinney 
AuthorDate: Thu Mar 14 17:05:43 2019 -0500

ARROW-4866: [C++] Fix zstd_ep build for Debug, static CRT builds. Add 
separate CMake variable for propagating compiler toolchain to ExternalProjects

zstd sets a bunch of its own compilation flags and they conflict with the 
CFLAGS/CXXFLAGS that are part of EP_COMMON_CMAKE_ARGS

Author: Wes McKinney 

Closes #3905 from wesm/ARROW-4866 and squashes the following commits:

de7c0ae6  Pass EP compiler toolchain as separate variable. Do 
not override CFLAGS and CXXFLAGS for zstd_ep
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index baefc6b..89fd7a6 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -350,30 +350,30 @@ endif()
 # directory. This leads to issues if the variables are exported in a subshell
 # and the invocation of make/ninja is in distinct subshell without the same
 # environment (CC/CXX).
-set(EP_COMMON_CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
- -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER})
+set(EP_COMMON_TOOLCHAIN -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER})
+
+if (CMAKE_AR)
+  set(EP_COMMON_TOOLCHAIN ${EP_COMMON_TOOLCHAIN}
+   -DCMAKE_AR=${CMAKE_AR})
+endif()
+
+if (CMAKE_RANLIB)
+  set(EP_COMMON_TOOLCHAIN ${EP_COMMON_TOOLCHAIN}
+   -DCMAKE_RANLIB=${CMAKE_RANLIB})
+endif()
 
 # External projects are still able to override the following declarations.
 # cmake command line will favor the last defined variable when a duplicate is
 # encountered. This requires that `EP_COMMON_CMAKE_ARGS` is always the first
 # argument.
-set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_TOOLCHAIN}
  -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
  -DCMAKE_C_FLAGS=${EP_C_FLAGS}
  -DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS}
  -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}
  
-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS})
 
-if (CMAKE_AR)
-  set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
-   -DCMAKE_AR=${CMAKE_AR})
-endif()
-
-if (CMAKE_RANLIB)
-  set(EP_COMMON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
-   -DCMAKE_RANLIB=${CMAKE_RANLIB})
-endif()
-
 if (NOT ARROW_VERBOSE_THIRDPARTY_BUILD)
   set(EP_LOG_OPTIONS
 LOG_CONFIGURE 1
@@ -1438,7 +1438,7 @@ macro(build_zstd)
   message(STATUS "Building zstd from source")
   set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install")
 
-  set(ZSTD_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS}
+  set(ZSTD_CMAKE_ARGS ${EP_COMMON_TOOLCHAIN}
 "-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}"
 -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}



[arrow] branch master updated: ARROW-4705: [Rust] Improve error handling in csv reader

2019-03-14 Thread sunchao
This is an automated email from the ASF dual-hosted git repository.

sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 74436f0  ARROW-4705: [Rust] Improve error handling in csv reader
74436f0 is described below

commit 74436f07c482c0d46a8a52c2b50dc9ad1191922a
Author: Andy Grove 
AuthorDate: Thu Mar 14 14:11:40 2019 -0700

ARROW-4705: [Rust] Improve error handling in csv reader

Author: Andy Grove 

Closes #3895 from andygrove/ARROW-4705 and squashes the following commits:

b32f4a8e  Address PR feedback
00d6b3b4  start line number at 1 if csv has header row
8c9fca00  Improve error handling in csv reader
---
 rust/arrow/Cargo.toml  |  2 +-
 rust/arrow/src/csv/reader.rs   | 51 ++
 rust/arrow/test/data/various_types_invalid.csv |  6 +++
 3 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index fbc9be0..5e96e0d 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -41,7 +41,7 @@ serde_derive = "1.0.80"
 serde_json = { version = "1.0.13", features = ["preserve_order"] }
 indexmap = "1.0"
 rand = "0.5"
-csv = "1.0.0"
+csv = "1.0"
 num = "0.2"
 regex = "1.1"
 lazy_static = "1.2"
diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs
index 85b2ccd..ffeffdd 100644
--- a/rust/arrow/src/csv/reader.rs
+++ b/rust/arrow/src/csv/reader.rs
@@ -191,6 +191,8 @@ pub struct Reader {
 record_iter: StringRecordsIntoIter>,
 /// Batch size (number of records to load each time)
 batch_size: usize,
+/// Current line number, used in error reporting
+line_number: usize,
 }
 
 impl Reader {
@@ -235,6 +237,7 @@ impl Reader {
 projection,
 record_iter,
 batch_size,
+line_number: if has_headers { 1 } else { 0 },
 }
 }
 
@@ -242,15 +245,17 @@ impl Reader {
 pub fn next( self) -> Result> {
 // read a batch of rows into memory
 let mut rows: Vec = Vec::with_capacity(self.batch_size);
-for _ in 0..self.batch_size {
+for i in 0..self.batch_size {
 match self.record_iter.next() {
 Some(Ok(r)) => {
 rows.push(r);
 }
-Some(Err(_)) => {
-return Err(ArrowError::ParseError(
-"Error reading CSV file".to_string(),
-));
+Some(Err(e)) => {
+return Err(ArrowError::ParseError(format!(
+"Error parsing line {}: {:?}",
+self.line_number + i,
+e
+)));
 }
 None => break,
 }
@@ -319,6 +324,8 @@ impl Reader {
 })
 .collect();
 
+self.line_number += rows.len();
+
 let schema_fields = self.schema.fields();
 
 let projected_fields: Vec = projection
@@ -358,8 +365,9 @@ impl Reader {
 Err(_) => {
 // TODO: we should surface the underlying error 
here.
 return Err(ArrowError::ParseError(format!(
-"Error while parsing value {}",
-s
+"Error while parsing value {} at line {}",
+s,
+self.line_number + row_index
 )));
 }
 }
@@ -503,6 +511,7 @@ impl ReaderBuilder {
 projection: self.projection.clone(),
 record_iter,
 batch_size: self.batch_size,
+line_number: if self.has_headers { 1 } else { 0 },
 })
 }
 }
@@ -718,4 +727,32 @@ mod tests {
 assert_eq!(false, batch.column(1).is_null(3));
 assert_eq!(false, batch.column(1).is_null(4));
 }
+
+#[test]
+fn test_parse_invalid_csv() {
+let file = File::open("test/data/various_types_invalid.csv").unwrap();
+
+let schema = Schema::new(vec![
+Field::new("c_int", DataType::UInt64, false),
+Field::new("c_float", DataType::Float32, false),
+Field::new("c_string", DataType::Utf8, false),
+Field::new("c_bool", DataType::Boolean, false),
+]);
+
+let builder = ReaderBuilder::new()
+.with_schema(Arc::new(schema))
+.has_headers(true)
+.with_delimiter(b'|')
+.with_batch_size(512)
+.with_projection(vec![0, 1, 2, 3]);
+
+let mut csv = builder.build(file).unwrap();
+match csv.next() {
+Err(e) => assert_eq!(
+"ParseError(\"Error while parsing value 4.x4 at line 4\")",
+

[arrow] branch master updated: ARROW-4859: [GLib] Add garrow_numeric_array_mean()

2019-03-14 Thread kou
This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new dcee4ad  ARROW-4859: [GLib]  Add garrow_numeric_array_mean()
dcee4ad is described below

commit dcee4ad1c5f62df0265e26f59ec58ed85597ffa2
Author: Yosuke Shiro 
AuthorDate: Fri Mar 15 05:36:09 2019 +0900

ARROW-4859: [GLib]  Add garrow_numeric_array_mean()

Author: Yosuke Shiro 

Closes #3889 from shiro615/glib-mean and squashes the following commits:

1d2abbf5  Use assert_in_delta for floating point number
e4be5213  Use 0.0 for double
dfff46be  Add garrow_numeric_array_mean()
---
 c_glib/arrow-glib/basic-array.cpp | 27 +++
 c_glib/arrow-glib/basic-array.h   |  4 
 c_glib/test/test-numeric-array.rb | 26 ++
 cpp/src/arrow/compute/api.h   |  1 +
 4 files changed, 58 insertions(+)

diff --git a/c_glib/arrow-glib/basic-array.cpp 
b/c_glib/arrow-glib/basic-array.cpp
index 7409945..c201f9d 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -939,6 +939,33 @@ garrow_numeric_array_class_init(GArrowNumericArrayClass 
*klass)
 {
 }
 
+/**
+ * garrow_numeric_array_mean:
+ * @array: A #GArrowNumericArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: The value of the computed mean.
+ *
+ * Since: 0.13.0
+ */
+gdouble
+garrow_numeric_array_mean(GArrowNumericArray *array,
+  GError **error)
+{
+  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+  auto memory_pool = arrow::default_memory_pool();
+  arrow::compute::FunctionContext context(memory_pool);
+  arrow::compute::Datum mean_datum;
+  auto status = arrow::compute::Mean(, arrow_array, _datum);
+  if (garrow_error_check(error, status, "[numeric-array][mean]")) {
+using ScalarType = typename 
arrow::TypeTraits::ScalarType;
+auto arrow_numeric_scalar = 
std::dynamic_pointer_cast(mean_datum.scalar());
+return arrow_numeric_scalar->value;
+  } else {
+return 0.0;
+  }
+}
+
 
 G_DEFINE_TYPE(GArrowInt8Array,
   garrow_int8_array,
diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h
index 592699d..1dde2f2 100644
--- a/c_glib/arrow-glib/basic-array.h
+++ b/c_glib/arrow-glib/basic-array.h
@@ -212,6 +212,10 @@ struct _GArrowNumericArrayClass
   GArrowPrimitiveArrayClass parent_class;
 };
 
+GARROW_AVAILABLE_IN_0_13
+gdouble garrow_numeric_array_mean(GArrowNumericArray *array,
+  GError **error);
+
 #define GARROW_TYPE_INT8_ARRAY (garrow_int8_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt8Array,
  garrow_int8_array,
diff --git a/c_glib/test/test-numeric-array.rb 
b/c_glib/test/test-numeric-array.rb
new file mode 100644
index 000..d919d59
--- /dev/null
+++ b/c_glib/test/test-numeric-array.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestNumericArray < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def test_mean
+array = build_double_array([1.1, 2.2, nil])
+assert_in_delta(array.values.sum / 2,
+array.mean)
+  end
+end
diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h
index cd5f11e..42839ce 100644
--- a/cpp/src/arrow/compute/api.h
+++ b/cpp/src/arrow/compute/api.h
@@ -24,5 +24,6 @@
 #include "arrow/compute/kernels/boolean.h"  // IWYU pragma: export
 #include "arrow/compute/kernels/cast.h" // IWYU pragma: export
 #include "arrow/compute/kernels/hash.h" // IWYU pragma: export
+#include "arrow/compute/kernels/mean.h" // IWYU pragma: export
 
 #endif  // ARROW_COMPUTE_API_H



[arrow] branch master updated: ARROW-4251: [C++][Release] Add option to set ARROW_BOOST_VENDORED environment variable in verify-release-candidate.sh

2019-03-14 Thread wesm
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 954e3f4  ARROW-4251: [C++][Release] Add option to set 
ARROW_BOOST_VENDORED environment variable in verify-release-candidate.sh
954e3f4 is described below

commit 954e3f44c8753e548c7b24f2269135716a2429cd
Author: Wes McKinney 
AuthorDate: Thu Mar 14 14:30:38 2019 -0500

ARROW-4251: [C++][Release] Add option to set ARROW_BOOST_VENDORED 
environment variable in verify-release-candidate.sh

I'm taking this for a spin on 0.12.1 RC0 on Ubuntu 14.04 (where the system 
boost does not work -- see ARROW-4868)

Author: Wes McKinney 

Closes #3903 from wesm/ARROW-4251 and squashes the following commits:

eb38b0f68  Add option to set ARROW_BOOST_VENDORED environment 
variable in dev/release/verify-release-candidate.sh
---
 dev/release/verify-release-candidate.sh | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/dev/release/verify-release-candidate.sh 
b/dev/release/verify-release-candidate.sh
index e96cd28..8d4cc52 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -26,7 +26,8 @@
 # - nodejs >= 6.0.0 (best way is to use nvm)
 #
 # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to
-# LD_LIBRARY_PATH
+# LD_LIBRARY_PATH. If your system Boost is too old for the C++ libraries, then
+# set $ARROW_BOOST_VENDORED to "ON" or "1"
 
 case $# in
   3) ARTIFACT="$1"
@@ -49,6 +50,8 @@ set -o pipefail
 
 HERE=$(cd `dirname "${BASH_SOURCE[0]:-$0}"` && pwd)
 
+ARROW_BOOST_VENDORED=${ARROW_BOOST_VENDORED:=OFF}
+
 ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow'
 
 detect_cuda() {
@@ -207,6 +210,7 @@ ${ARROW_CMAKE_OPTIONS}
 -DARROW_GANDIVA=ON
 -DARROW_PARQUET=ON
 -DARROW_BOOST_USE_SHARED=ON
+-DARROW_BOOST_VENDORED=$ARROW_BOOST_VENDORED
 -DCMAKE_BUILD_TYPE=release
 -DARROW_BUILD_TESTS=ON
 -DARROW_CUDA=${ARROW_CUDA}



[arrow] branch master updated: ARROW-4486: [Python][CUDA] Add base argument to foreign_buffer

2019-03-14 Thread wesm
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 2b574f9  ARROW-4486: [Python][CUDA] Add base argument to foreign_buffer
2b574f9 is described below

commit 2b574f913784e77cc79ec7d002c94b1ae51116f1
Author: Pearu Peterson 
AuthorDate: Thu Mar 14 09:47:20 2019 -0500

ARROW-4486: [Python][CUDA] Add base argument to foreign_buffer

Resolves [ARROW-4486](https://issues.apache.org/jira/browse/ARROW-4486)

Author: Pearu Peterson 

Closes #3850 from pearu/arrow-4486 and squashes the following commits:

e96265df0  Add base argument to foreign_buffer, resolves 
ARROW-4486
---
 python/pyarrow/_cuda.pxd  |  5 -
 python/pyarrow/_cuda.pyx  | 26 +++---
 python/pyarrow/tests/test_cuda.py | 27 +++
 3 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/_cuda.pxd b/python/pyarrow/_cuda.pxd
index 3b8d966..1180601 100644
--- a/python/pyarrow/_cuda.pxd
+++ b/python/pyarrow/_cuda.pxd
@@ -41,8 +41,11 @@ cdef class IpcMemHandle:
 cdef class CudaBuffer(Buffer):
 cdef:
 shared_ptr[CCudaBuffer] cuda_buffer
+object base
 
-cdef void init_cuda(self, const shared_ptr[CCudaBuffer]& buffer)
+cdef void init_cuda(self,
+const shared_ptr[CCudaBuffer]& buffer,
+object base)
 
 
 cdef class HostBuffer(Buffer):
diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx
index fa84fc6..87be0e6 100644
--- a/python/pyarrow/_cuda.pyx
+++ b/python/pyarrow/_cuda.pyx
@@ -190,7 +190,7 @@ cdef class Context:
 check_status(self.context.get().Allocate(nbytes, ))
 return pyarrow_wrap_cudabuffer(cudabuf)
 
-def foreign_buffer(self, address, size):
+def foreign_buffer(self, address, size, base=None):
 """Create device buffer from address and size as a view.
 
 The caller is responsible for allocating and freeing the
@@ -206,6 +206,8 @@ cdef class Context:
   `get_device_address` method.
 size : int
   Specify the size of device buffer in bytes.
+base : {None, object}
+  Specify object that owns the referenced memory.
 
 Returns
 ---
@@ -222,7 +224,7 @@ cdef class Context:
 check_status(self.context.get().View(c_addr,
  c_size,
  ))
-return pyarrow_wrap_cudabuffer(cudabuf)
+return pyarrow_wrap_cudabuffer_base(cudabuf, base)
 
 def open_ipc_buffer(self, ipc_handle):
 """ Open existing CUDA IPC memory handle
@@ -309,7 +311,7 @@ cdef class Context:
 
 """
 if isinstance(obj, HostBuffer):
-return self.foreign_buffer(obj.address, obj.size)
+return self.foreign_buffer(obj.address, obj.size, base=obj)
 elif isinstance(obj, Buffer):
 return CudaBuffer.from_buffer(obj)
 elif isinstance(obj, CudaBuffer):
@@ -323,7 +325,7 @@ cdef class Context:
 start, end = get_contiguous_span(
 desc['shape'], desc.get('strides'),
 np.dtype(desc['typestr']).itemsize)
-return self.foreign_buffer(addr + start, end - start)
+return self.foreign_buffer(addr + start, end - start, base=obj)
 raise ArrowTypeError('cannot create device buffer view from'
  ' `%s` object' % (type(obj)))
 
@@ -387,9 +389,12 @@ cdef class CudaBuffer(Buffer):
 "`.device_buffer`"
 " method instead.")
 
-cdef void init_cuda(self, const shared_ptr[CCudaBuffer]& buffer):
+cdef void init_cuda(self,
+const shared_ptr[CCudaBuffer]& buffer,
+object base):
 self.cuda_buffer = buffer
 self.init( buffer)
+self.base = base
 
 @staticmethod
 def from_buffer(buf):
@@ -426,7 +431,7 @@ cdef class CudaBuffer(Buffer):
 ctx = Context.from_numba(mem.context)
 if mem.device_pointer.value is None and mem.size==0:
 return ctx.new_buffer(0)
-return ctx.foreign_buffer(mem.device_pointer.value, mem.size)
+return ctx.foreign_buffer(mem.device_pointer.value, mem.size, base=mem)
 
 def to_numba(self):
 """Return numba memory pointer of CudaBuffer instance.
@@ -949,9 +954,16 @@ cdef public api bint pyarrow_is_cudabuffer(object buffer):
 
 
 cdef public api object \
+pyarrow_wrap_cudabuffer_base(const shared_ptr[CCudaBuffer]& buf, base):
+cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
+result.init_cuda(buf, base)
+return result
+
+
+cdef public api object \
 pyarrow_wrap_cudabuffer(const shared_ptr[CCudaBuffer]& buf):
 cdef 

[arrow] branch master updated: [C++/Python/CI] Use more recent docker-compose in manylinux1 jobs [skip ci] (#3897)

2019-03-14 Thread kszucs
This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 3e6d968  [C++/Python/CI] Use more recent docker-compose in manylinux1 
jobs [skip ci] (#3897)
3e6d968 is described below

commit 3e6d968db7262bc2924cea6d248b2df572485f91
Author: Uwe L. Korn 
AuthorDate: Thu Mar 14 14:01:27 2019 +0100

[C++/Python/CI] Use more recent docker-compose in manylinux1 jobs [skip ci] 
(#3897)
---
 dev/tasks/python-wheels/travis.linux.yml | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/dev/tasks/python-wheels/travis.linux.yml 
b/dev/tasks/python-wheels/travis.linux.yml
index 64b9e7e..cccb30b 100644
--- a/dev/tasks/python-wheels/travis.linux.yml
+++ b/dev/tasks/python-wheels/travis.linux.yml
@@ -21,6 +21,12 @@ language: cpp
 services:
   - docker
 
+# Update docker to support newer docker-compose versions
+addons:
+  apt:
+packages:
+  - docker-ce
+
 # don't build twice
 if: tag IS blank
 
@@ -30,7 +36,7 @@ env:
 - TRAVIS_TAG={{ task.tag }}
 
 before_script:
-  - docker pull quay.io/xhochy/arrow_manylinux1_x86_64_base:latest
+  - sudo pip install -U docker-compose
 
 script:
   - git clone -b {{ arrow.branch }} {{ arrow.remote }} arrow



[arrow] branch master updated: ARROW-3364: [Docs] Add docker-compose integration documentation

2019-03-14 Thread wesm
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 9198f63  ARROW-3364: [Docs] Add docker-compose integration 
documentation
9198f63 is described below

commit 9198f630663e7f8ddaecbd4d9f037f61e5b450d3
Author: François Saint-Jacques 
AuthorDate: Thu Mar 14 07:54:56 2019 -0500

ARROW-3364: [Docs] Add docker-compose integration documentation

Author: François Saint-Jacques 

Closes #3882 from fsaintjacques/ARROW-3364-docker-compose-documentation and 
squashes the following commits:

08f91ff35  Simplify makefile
595d50ac0  Refactor ambiguous makefile run target
9d90bbaf8  ARROW-3364:  Add docker-compose 
integration documentation
---
 Makefile.docker| 92 --
 .../{building.rst => developers/documentation.rst} |  5 +-
 docs/source/{ => developers}/index.rst | 31 +---
 docs/source/developers/integration.rst | 67 
 docs/source/index.rst  |  4 +-
 5 files changed, 109 insertions(+), 90 deletions(-)

diff --git a/Makefile.docker b/Makefile.docker
index dc1f0bf..8e7d861 100644
--- a/Makefile.docker
+++ b/Makefile.docker
@@ -16,70 +16,48 @@
 # under the License.
 
 # build docker compose images:
+# $ make -f Makefile.docker build-cpp
+# To run the test suite
 # $ make -f Makefile.docker cpp
-# run the built image:
-# $ make -f Makefile.docker run cpp
 
-.PHONY: clean run cpp cpp-alpine go js java rust r
+LANGUAGES = cpp cpp-alpine cpp-cmake32 c_glib go java js python python-alpine 
rust r
+MISC = lint iwyu clang-format docs pandas-master
+SERVERS = dask hdfs-integration spark-integration
+
+# declare images dependencies
+DEPENDS_ON_CPP = build-c_glib build-python build-r
+DEPENDS_ON_CPP_ALPINE = build-python-alpine
+DEPENDS_ON_PYTHON = build-lint build-docs build-dask build-hdfs-integration 
build-spark-integration
+DEPENDS_ON_LINT = build-iwyu build-clang-format
+
+SERVICES = $(LANGUAGES) $(MISC) $(SERVERS)
+.PHONY: clean build-% run-% $(SERVICES)
 
 DC := docker-compose
 
 clean:
$(DC) down -v
 
-run:
-   $(DC) run --rm $(filter-out $@,$(MAKECMDGOALS))
-
-go:
-   $(DC) build go
-
-js:
-   $(DC) build js
-
-java:
-   $(DC) build java
-
-rust:
-   $(DC) build rust
-
-cpp:
-   $(DC) build cpp
-
-cpp-alpine:
-   $(DC) build cpp-alpine
-
-cpp-cmake32:
-   $(DC) build cpp-cmake32
-
-c_glib: cpp
-   $(DC) build c_glib
-
-r: cpp
-   $(DC) build r
-
-python: cpp
-   $(DC) build python
-
-python-alpine: cpp-alpine
-   $(DC) build python-alpine
-
-lint: python
-   $(DC) build lint
-
-iwyu: lint
-
-clang-format: lint
-
-docs: python
-
-dask: python
-   $(DC) build dask
-
-hdfs: python
-   $(DC) build hdfs-integration
+# Default build target if no dependencies
+build-%:
+   $(DC) build $*
+
+# The following targets create the dependencies of the form `build-X: build-Y`
+$(DEPENDS_ON_CPP): build-%: build-cpp
+   $(DC) build $*
+$(DEPENDS_ON_CPP_ALPINE): build-%: build-cpp-alpine
+   $(DC) build $*
+$(DEPENDS_ON_PYTHON): build-%: build-python
+   $(DC) build $*
+# The dependents of lint image don't build anything
+$(DEPENDS_ON_LINT): build-%: build-lint
+
+# panda master is a special case due to --no-cache
+build-pandas-master: build-python
+   $(DC) build --no-cache pandas-master
 
-spark: python
-   $(DC) build spark-integration
+run-%: build-%
+   $(DC) run --rm $*
 
-pandas-master: python
-   $(DC) build --no-cache pandas-master
+# Trick to get `service` expand to `run-service`
+$(SERVICES): % : run-%
diff --git a/docs/source/building.rst b/docs/source/developers/documentation.rst
similarity index 96%
rename from docs/source/building.rst
rename to docs/source/developers/documentation.rst
index 2239a19..1fbab43 100644
--- a/docs/source/building.rst
+++ b/docs/source/developers/documentation.rst
@@ -90,9 +90,6 @@ You can use Docker to build the documentation:
 
 .. code-block:: shell
 
-  docker-compose build cpp
-  docker-compose build python
-  docker-compose build docs
-  docker-compose run docs
+  make -f Makefile.docker docs
 
 The final output is located under ``docs/_build/html``.
diff --git a/docs/source/index.rst b/docs/source/developers/index.rst
similarity index 56%
copy from docs/source/index.rst
copy to docs/source/developers/index.rst
index 2b367b3..e99f7c5 100644
--- a/docs/source/index.rst
+++ b/docs/source/developers/index.rst
@@ -15,34 +15,11 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-Apache Arrow
-
-
-Apache Arrow is a cross-language development platform for in-memory data. It
-specifies a standardized language-independent columnar memory format for flat
-and hierarchical data, organized for 

[arrow] branch master updated: ARROW-4743: [Java] Add javadoc missing in classes and methods in java…

2019-03-14 Thread ravindra
This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new d2e1ee9  ARROW-4743: [Java] Add javadoc missing in classes and methods 
in java…
d2e1ee9 is described below

commit d2e1ee9edd70c05145adb0f4fc0da38de3be4e6d
Author: Micah Kornfield 
AuthorDate: Thu Mar 14 11:34:22 2019 +0530

ARROW-4743: [Java] Add javadoc missing in classes and methods in java…

This is the result finding errors after applying the following diff to 
suppressions.xml (note for some reason this only seems to highlight method 
level documentation issue not class ones, if anyone can point out the right 
configuration knob to automatically detect missing class docs as well I would 
appreciate it):
--- a/java/dev/checkstyle/suppressions.xml
+++ b/java/dev/checkstyle/suppressions.xml
@@ -24,17 +24,12 @@
   
   

-  
-  
-  
+  
   

   
   

-  
-  

Author: Micah Kornfield 

Closes #3792 from emkornfield/javadoc and squashes the following commits:

d01c123c  Address code review feedback
a64c11ec  ARROW-4743:  Add javadoc missing in classes and 
methods in java/memory
---
 .../src/main/java/io/netty/buffer/ArrowBuf.java| 42 ++
 .../io/netty/buffer/MutableWrappedByteBuf.java |  5 ++-
 .../io/netty/buffer/PooledByteBufAllocatorL.java   |  3 ++
 .../org/apache/arrow/memory/BaseAllocator.java | 16 +
 .../org/apache/arrow/memory/BoundsChecking.java|  7 
 .../apache/arrow/memory/OutOfMemoryException.java  |  8 -
 .../org/apache/arrow/memory/RootAllocator.java |  5 +--
 .../test/java/io/netty/buffer/TestArrowBuf.java|  3 +-
 8 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java 
b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java
index 607f8ca..bb6a940 100644
--- a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java
+++ b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java
@@ -41,6 +41,18 @@ import org.apache.arrow.util.Preconditions;
 
 import io.netty.util.internal.PlatformDependent;
 
+/**
+ * ArrowBuf is the abstraction around raw byte arrays that
+ * comprise arrow data structures.
+ *
+ *
+ * Specifically, it serves as a facade over
+ * {@linkplain UnsafeDirectLittleEndian} memory objects that hides the details
+ * of raw memory addresses.
+ *
+ * ArrowBuf supports reference counting and ledgering to closely track where
+ * memory is being used.
+ */
 public final class ArrowBuf extends AbstractByteBuf implements AutoCloseable {
 
   private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(ArrowBuf.class);
@@ -60,6 +72,17 @@ public final class ArrowBuf extends AbstractByteBuf 
implements AutoCloseable {
   new HistoricalLog(BaseAllocator.DEBUG_LOG_LENGTH, "ArrowBuf[%d]", id) : 
null;
   private volatile int length;
 
+  /**
+   * Constructs a new ArrowBuf
+   * @param refCnt The atomic integer to use for reference counting this 
buffer.
+   * @param ledger The ledger to use for tracking memory usage of this buffer.
+   * @param byteBuf The underlying storage for this buffer.
+   * @param manager The manager that handles replacing this buffer.
+   * @param alloc The allocator for the buffer (needed for superclass 
compatibility)
+   * @param offset The byte offset into byteBuf this buffer 
starts at.
+   * @param length The  byte length of this buffer
+   * @param isEmpty  Indicates if this buffer is empty which enables some 
optimizations.
+   */
   public ArrowBuf(
   final AtomicInteger refCnt,
   final BufferLedger ledger,
@@ -69,6 +92,7 @@ public final class ArrowBuf extends AbstractByteBuf 
implements AutoCloseable {
   final int offset,
   final int length,
   boolean isEmpty) {
+// TODO(emkornfield): Should this be byteBuf.maxCapacity - offset?
 super(byteBuf.maxCapacity());
 this.refCnt = refCnt;
 this.udle = byteBuf;
@@ -86,6 +110,7 @@ public final class ArrowBuf extends AbstractByteBuf 
implements AutoCloseable {
 
   }
 
+  /** Returns a debug friendly string for the given ByteBuf. */
   public static String bufferState(final ByteBuf buf) {
 final int cap = buf.capacity();
 final int mcap = buf.maxCapacity();
@@ -97,6 +122,10 @@ public final class ArrowBuf extends AbstractByteBuf 
implements AutoCloseable {
 cap, mcap, ri, rb, wi, wb);
   }
 
+  /**
+   * Returns this if size is less then {@link #capacity()}, 
otherwise
+   * delegates to {@link BufferManager#replace(ArrowBuf, int)} to get a new 
buffer.
+   */
   public ArrowBuf reallocIfNeeded(final int size) {
 Preconditions.checkArgument(size >= 0, "reallocation size must be 
non-negative");
 
@@ -928,6 +957,10 @@ public final class ArrowBuf extends 

[arrow] branch master updated: ARROW-4206: [Gandiva] support decimal divide and mod

2019-03-14 Thread ravindra
This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
 new 31aa19d  ARROW-4206: [Gandiva] support decimal divide and mod
31aa19d is described below

commit 31aa19da25352d5f6abdf3264d57306c3b52bb05
Author: Pindikura Ravindra 
AuthorDate: Thu Mar 14 11:29:42 2019 +0530

ARROW-4206: [Gandiva] support decimal divide and mod

Author: Pindikura Ravindra 

Closes #3813 from pravindra/gdivmod and squashes the following commits:

96ef4054  ARROW-4206:  add global symbol for new fns
a9ad13fe  ARROW-4206:  Add more tests/comments
697c2343  ARROW-4206:  Fix build errors
267f117e  ARROW-4206:  support decimal divide and mod
---
 cpp/src/arrow/util/basic_decimal.h  |   3 +
 cpp/src/arrow/util/decimal-test.cc  | 115 +-
 cpp/src/arrow/util/decimal.h|   5 +
 cpp/src/gandiva/decimal_ir.cc   |  58 +
 cpp/src/gandiva/decimal_ir.h|   4 +
 cpp/src/gandiva/decimal_xlarge.cc   | 121 --
 cpp/src/gandiva/decimal_xlarge.h|   8 +
 cpp/src/gandiva/function_registry_arithmetic.cc |   2 +
 cpp/src/gandiva/precompiled/CMakeLists.txt  |   7 +-
 cpp/src/gandiva/precompiled/decimal_ops.cc  |  84 +++
 cpp/src/gandiva/precompiled/decimal_ops.h   |  10 +
 cpp/src/gandiva/precompiled/decimal_ops_test.cc | 281 
 cpp/src/gandiva/precompiled/decimal_wrapper.cc  |  34 +++
 cpp/src/gandiva/tests/decimal_single_test.cc|  33 ++-
 14 files changed, 698 insertions(+), 67 deletions(-)

diff --git a/cpp/src/arrow/util/basic_decimal.h 
b/cpp/src/arrow/util/basic_decimal.h
index 7929b11..2e5857c 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -138,6 +138,9 @@ class ARROW_EXPORT BasicDecimal128 {
   /// - If 'round' is false, the right-most digits are simply dropped.
   BasicDecimal128 ReduceScaleBy(int32_t reduce_by, bool round = true) const;
 
+  // returns 1 for positive and zero decimal values, -1 for negative decimal 
values.
+  inline int64_t Sign() const { return 1 | (high_bits_ >> 63); }
+
   /// \brief count the number of leading binary zeroes.
   int32_t CountLeadingBinaryZeros() const;
 
diff --git a/cpp/src/arrow/util/decimal-test.cc 
b/cpp/src/arrow/util/decimal-test.cc
index db4d35f..4ba7d7f 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -23,12 +23,16 @@
 #include 
 
 #include 
+#include 
 
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/macros.h"
 
+using boost::multiprecision::int128_t;
+
 namespace arrow {
 
 class DecimalTestFixture : public ::testing::Test {
@@ -466,20 +470,113 @@ TEST(Decimal128Test, TestToInteger) {
   ASSERT_RAISES(Invalid, invalid_int64.ToInteger());
 }
 
+template 
+std::vector GetRandomNumbers(int32_t size) {
+  auto rand = random::RandomArrayGenerator(0x5487655);
+  auto x_array = rand.Numeric(size, 0, 
std::numeric_limits::max(), 0);
+
+  auto x_ptr = x_array->data()->template GetValues(1);
+  std::vector ret;
+  for (int i = 0; i < size; ++i) {
+ret.push_back(x_ptr[i]);
+  }
+  return ret;
+}
+
 TEST(Decimal128Test, Multiply) {
-  Decimal128 result;
+  ASSERT_EQ(Decimal128(60501), Decimal128(301) * Decimal128(201));
+
+  ASSERT_EQ(Decimal128(-60501), Decimal128(-301) * Decimal128(201));
+
+  ASSERT_EQ(Decimal128(-60501), Decimal128(301) * Decimal128(-201));
+
+  ASSERT_EQ(Decimal128(60501), Decimal128(-301) * Decimal128(-201));
+
+  // Test some random numbers.
+  for (auto x : GetRandomNumbers(16)) {
+for (auto y : GetRandomNumbers(16)) {
+  Decimal128 result = Decimal128(x) * Decimal128(y);
+  ASSERT_EQ(Decimal128(static_cast(x) * y), result)
+  << " x: " << x << " y: " << y;
+}
+  }
+
+  // Test some edge cases
+  for (auto x : std::vector{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, 
INT64_MAX}) {
+for (auto y :
+ std::vector{-INT32_MAX, -32, -2, -1, 0, 1, 2, 32, 
INT32_MAX}) {
+  Decimal128 result = Decimal128(x.str()) * Decimal128(y.str());
+  ASSERT_EQ(Decimal128((x * y).str()), result) << " x: " << x << " y: " << 
y;
+}
+  }
+}
+
+TEST(Decimal128Test, Divide) {
+  ASSERT_EQ(Decimal128(66), Decimal128(20100) / Decimal128(301));
+
+  ASSERT_EQ(Decimal128(-66), Decimal128(-20100) / Decimal128(301));
+
+  ASSERT_EQ(Decimal128(-66), Decimal128(20100) / Decimal128(-301));
 
-  result = Decimal128("301") * Decimal128("201");
-  ASSERT_EQ(result.ToIntegerString(), "60501");
+  ASSERT_EQ(Decimal128(66), Decimal128(-20100) / Decimal128(-301));
 
-  result = Decimal128("-301") * Decimal128("201");
-  ASSERT_EQ(result.ToIntegerString(), "-60501");
+  // Test some random numbers.
+