This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 62c5a30aaad35e50760a779d9ac2e2a87aaae84d Author: praveenbingo <[email protected]> AuthorDate: Mon Sep 17 17:33:06 2018 +0530 [Gandiva] Fix divide by zero errors. Fixed divide by zero errors for divide operation. --- cpp/src/gandiva/function_registry.cc | 13 +++- cpp/src/gandiva/precompiled/arithmetic_ops.cc | 19 +++++- cpp/src/gandiva/precompiled/arithmetic_ops_test.cc | 15 +++++ cpp/src/gandiva/precompiled/types.h | 3 + cpp/src/gandiva/tests/projector_test.cc | 70 ++++++++++++++++++++++ 5 files changed, 118 insertions(+), 2 deletions(-) diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 84a265f..71de601 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -50,6 +50,17 @@ using std::vector; RESULT_NULL_IF_NULL, STRINGIFY(NAME##_##TYPE##_##TYPE)) // Binary functions that : +// - have the same input type for both params +// - output type is same as the input type +// - NULL handling is of type NULL_INTERNAL +// +// The pre-compiled fn name includes the base name & input type names. eg. +// divide_int64_int64 +#define BINARY_SYMMETRIC_NULL_INTERNAL(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, TYPE(), true, \ + RESULT_NULL_INTERNAL, STRINGIFY(NAME##_##TYPE##_##TYPE)) + +// Binary functions that : // - have different input types, or output type // - NULL handling is of type NULL_IF_NULL // @@ -167,7 +178,7 @@ NativeFunction FunctionRegistry::pc_registry_[] = { NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, add), NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, subtract), NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, multiply), - NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, divide), + NUMERIC_TYPES(BINARY_SYMMETRIC_NULL_INTERNAL, divide), BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int32, int32), BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int64, int64), NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, equal), diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops.cc b/cpp/src/gandiva/precompiled/arithmetic_ops.cc index 7d05699..7ad1e35 100644 --- a/cpp/src/gandiva/precompiled/arithmetic_ops.cc +++ b/cpp/src/gandiva/precompiled/arithmetic_ops.cc @@ -64,7 +64,6 @@ extern "C" { NUMERIC_TYPES(BINARY_SYMMETRIC, add, +) NUMERIC_TYPES(BINARY_SYMMETRIC, subtract, -) NUMERIC_TYPES(BINARY_SYMMETRIC, multiply, *) -NUMERIC_TYPES(BINARY_SYMMETRIC, divide, /) MOD_OP(mod, int64, int32, int32) MOD_OP(mod, int64, int64, int64) @@ -158,4 +157,22 @@ boolean not_boolean(boolean in) { return !in; } NUMERIC_BOOL_DATE_FUNCTION(IS_DISTINCT_FROM) NUMERIC_BOOL_DATE_FUNCTION(IS_NOT_DISTINCT_FROM) +// divide - handles invalid args as nulls +#define DIVIDE_NULL_INTERNAL(TYPE) \ + FORCE_INLINE \ + TYPE divide_##TYPE##_##TYPE(TYPE in1, boolean is_valid1, TYPE in2, boolean is_valid2, \ + bool *out_valid) { \ + *out_valid = false; \ + if (!is_valid1 || !is_valid2) { \ + return 0; \ + } \ + if (in2 == 0) { \ + return 0; \ + } \ + *out_valid = true; \ + return in1 / in2; \ + } + +NUMERIC_FUNCTION(DIVIDE_NULL_INTERNAL) + } // extern "C" diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc b/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc index fd1de46..4c4573d 100644 --- a/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc +++ b/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc @@ -34,4 +34,19 @@ TEST(TestArithmeticOps, TestIsDistinctFrom) { TEST(TestArithmeticOps, TestMod) { EXPECT_EQ(mod_int64_int32(10, 0), 10); } +TEST(TestArithmeticOps, TestDivide) { + boolean is_valid; + int64 out = divide_int64_int64(10, true, 0, true, &is_valid); + EXPECT_EQ(out, 0); + EXPECT_EQ(is_valid, false); + + out = divide_int64_int64(10, true, 2, false, &is_valid); + EXPECT_EQ(out, 0); + EXPECT_EQ(is_valid, false); + + out = divide_int64_int64(10, true, 2, true, &is_valid); + EXPECT_EQ(out, 5); + EXPECT_EQ(is_valid, true); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index ac3f21f..154b144 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -122,6 +122,9 @@ int32 mem_compare(const char* left, int32 left_len, const char* right, int32 rig int32 mod_int64_int32(int64 left, int32 right); +int64 divide_int64_int64(int64 in1, boolean is_valid1, int64 in2, boolean is_valid2, + bool *out_valid); + } // extern "C" #endif // PRECOMPILED_TYPES_H diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 57497cd..5a676b2 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -522,4 +522,74 @@ TEST_F(TestProjector, TestZeroCopyNegative) { EXPECT_EQ(status.code(), StatusCode::Invalid); } +TEST_F(TestProjector, TestDivideZero) { + // schema for input fields + auto field0 = field("f0", int32()); + auto field1 = field("f2", int32()); + auto schema = arrow::schema({field0, field1}); + + // output fields + auto field_div = field("divide", int32()); + + // Build expression + auto div_expr = TreeExprBuilder::MakeExpression("divide", {field0, field1}, field_div); + + std::shared_ptr<Projector> projector; + Status status = Projector::Make(schema, {div_expr}, &projector); + EXPECT_TRUE(status.ok()) << status.message(); + + // Create a row-batch with some sample data + int num_records = 4; + auto array0 = MakeArrowArrayInt32({2, 3, 4, 5}, {true, true, true, true}); + auto array1 = MakeArrowArrayInt32({1, 2, 2, 0}, {true, true, false, true}); + // expected output + auto exp_div = MakeArrowArrayInt32({2, 1, 0, 0}, {true, true, false, false}); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()) << status.message(); + + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(exp_div, outputs.at(0)); +} + +TEST_F(TestProjector, TestModZero) { + // schema for input fields + auto field0 = field("f0", arrow::int64()); + auto field1 = field("f2", int32()); + auto schema = arrow::schema({field0, field1}); + + // output fields + auto field_div = field("mod", int32()); + + // Build expression + auto mod_expr = TreeExprBuilder::MakeExpression("mod", {field0, field1}, field_div); + + std::shared_ptr<Projector> projector; + Status status = Projector::Make(schema, {mod_expr}, &projector); + EXPECT_TRUE(status.ok()) << status.message(); + + // Create a row-batch with some sample data + int num_records = 4; + auto array0 = MakeArrowArrayInt64({2, 3, 4, 5}, {true, true, true, true}); + auto array1 = MakeArrowArrayInt32({1, 2, 2, 0}, {true, true, false, true}); + // expected output + auto exp_mod = MakeArrowArrayInt32({0, 1, 0, 5}, {true, true, false, true}); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()) << status.message(); + + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(exp_mod, outputs.at(0)); +} + } // namespace gandiva
