This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 25757cbc01 ARROW-16881: [Gandiva][C++] Fix castINTERVALYEAR
implementation (#13421)
25757cbc01 is described below
commit 25757cbc01bb52d0c69d75f6156ba3f2bc71a186
Author: Johnnathan Almeida <[email protected]>
AuthorDate: Thu Jul 21 02:44:23 2022 -0300
ARROW-16881: [Gandiva][C++] Fix castINTERVALYEAR implementation (#13421)
Fix error in LLVM where didn't find these two functions.
Fix regex to allow negative digits for Interval Year.
Authored-by: Johnnathan <[email protected]>
Signed-off-by: Pindikura Ravindra <[email protected]>
---
cpp/src/gandiva/function_holder_registry.h | 6 +-
cpp/src/gandiva/interval_holder.cc | 20 +++----
cpp/src/gandiva/interval_holder_test.cc | 89 ++++++++++++++++++++++++++++++
3 files changed, 104 insertions(+), 11 deletions(-)
diff --git a/cpp/src/gandiva/function_holder_registry.h
b/cpp/src/gandiva/function_holder_registry.h
index 97a03db347..7220f0d9d0 100644
--- a/cpp/src/gandiva/function_holder_registry.h
+++ b/cpp/src/gandiva/function_holder_registry.h
@@ -50,7 +50,11 @@ class FunctionHolderRegistry {
static Status Make(const std::string& name, const FunctionNode& node,
FunctionHolderPtr* holder) {
- auto found = makers().find(name);
+ std::string data = name;
+ std::transform(data.begin(), data.end(), data.begin(),
+ [](unsigned char c) { return std::tolower(c); });
+
+ auto found = makers().find(data);
if (found == makers().end()) {
return Status::Invalid("function holder not registered for function " +
name);
}
diff --git a/cpp/src/gandiva/interval_holder.cc
b/cpp/src/gandiva/interval_holder.cc
index c976f42c3b..d63a11a10d 100644
--- a/cpp/src/gandiva/interval_holder.cc
+++ b/cpp/src/gandiva/interval_holder.cc
@@ -29,19 +29,19 @@ static const RE2 period_only_contains_numbers(R"(\d+)");
// pre-compiled pattern for matching periods in 8601 formats that not contains
weeks.
static const RE2 iso8601_complete_period(
- R"(P([[:digit:]]+Y|[[:digit:]]+[,.][[:digit:]]+Y)?)"
- R"(([[:digit:]]+M|[[:digit:]]+[,.][[:digit:]]+M)?)"
- R"(([[:digit:]]+D|[[:digit:]]+[,.][[:digit:]]+D)?)"
- R"(T([[:digit:]]+H|[[:digit:]]+[,.][[:digit:]]+H)?)"
- R"(([[:digit:]]+M|[[:digit:]]+[,.][[:digit:]]+M)?)"
- R"(([[:digit:]]+S|[[:digit:]]+[,.][[:digit:]]+S)?)");
+ R"(P(-?[[:digit:]]+Y|-?[[:digit:]]+[,.][[:digit:]]+Y)?)"
+ R"((-?[[:digit:]]+M|-?[[:digit:]]+[,.][[:digit:]]+M)?)"
+ R"((-?[[:digit:]]+D|-?[[:digit:]]+[,.][[:digit:]]+D)?)"
+ R"(T(-?[[:digit:]]+H|-?[[:digit:]]+[,.][[:digit:]]+H)?)"
+ R"((-?[[:digit:]]+M|-?[[:digit:]]+[,.][[:digit:]]+M)?)"
+ R"((-?[[:digit:]]+S|-?[[:digit:]]+[,.][[:digit:]]+S)?)");
// pre-compiled pattern for matching periods in 8601 formats that not contain
time
// (hours, minutes and seconds) information.
static const RE2 iso8601_period_without_time(
- R"(P([[:digit:]]+Y|[[:digit:]]+[,.][[:digit:]]+Y)?)"
- R"(([[:digit:]]+M|[[:digit:]]+[,.][[:digit:]]+M)?)"
- R"(([[:digit:]]+D|[[:digit:]]+[,.][[:digit:]]+D)?)");
+ R"(P(-?[[:digit:]]+Y|-?[[:digit:]]+[,.][[:digit:]]+Y)?)"
+ R"((-?[[:digit:]]+M|-?[[:digit:]]+[,.][[:digit:]]+M)?)"
+ R"((-?[[:digit:]]+D|-?[[:digit:]]+[,.][[:digit:]]+D)?)");
// pre-compiled pattern for matching periods in 8601 formats that not contain
time
// (hours, minutes and seconds) information.
@@ -51,7 +51,7 @@ static const std::regex
period_not_contains_time(R"(^((?!T).)*$)");
// them. The ISO8601 specification defines that if the string contains a week,
it can not
// have other time granularities information, like day, years and months.
static const RE2 iso8601_period_with_weeks(
- R"(P([[:digit:]]+W|[[:digit:]]+[,.][[:digit:]]+W){1})");
+ R"(P(-?[[:digit:]]+W|-?[[:digit:]]+[,.][[:digit:]]+W){1})");
// It considers that a day has exactly 24 hours of duration
static const int64_t kMillisInDay = 86400000;
diff --git a/cpp/src/gandiva/interval_holder_test.cc
b/cpp/src/gandiva/interval_holder_test.cc
index 34636e4998..fbfd6335f4 100644
--- a/cpp/src/gandiva/interval_holder_test.cc
+++ b/cpp/src/gandiva/interval_holder_test.cc
@@ -61,6 +61,22 @@ TEST_F(TestIntervalHolder, TestMatchAllPeriods) {
EXPECT_TRUE(out_valid);
EXPECT_FALSE(execution_context_.has_error());
+ data = "1";
+ response = cast_interval_day(&execution_context_, data.data(), 1, true,
&out_valid);
+ qty_days_in_response = 0;
+ qty_millis_in_response = 1;
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response, (qty_millis_in_response << 32) | qty_days_in_response);
+
+ data = "PT0.001S";
+ response = cast_interval_day(&execution_context_, data.data(), 8, true,
&out_valid);
+ qty_days_in_response = 0;
+ qty_millis_in_response = 1;
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response, (qty_millis_in_response << 32) | qty_days_in_response);
+
// Pass only years and days to cast
data = "P12Y15D";
response = cast_interval_day(&execution_context_, data.data(), 7, true,
&out_valid);
@@ -197,6 +213,79 @@ TEST_F(TestIntervalHolder, TestMatchAllPeriods) {
EXPECT_TRUE(out_valid);
EXPECT_FALSE(execution_context_.has_error());
EXPECT_EQ(response_interval_yrs, 35);
+
+ // Pass negative value
+ data = "P-1D";
+ response = cast_interval_day(&execution_context_, data.data(), 4, true,
&out_valid);
+ qty_days_in_response = -1;
+ qty_millis_in_response = 0;
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response,
+ (qty_millis_in_response << 32) | (qty_days_in_response &
0x00000000FFFFFFFF));
+
+ data = "P-2D";
+ response = cast_interval_day(&execution_context_, data.data(), 4, true,
&out_valid);
+ qty_days_in_response = -2;
+ qty_millis_in_response = 0;
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response,
+ (qty_millis_in_response << 32) | (qty_days_in_response &
0x00000000FFFFFFFF));
+
+ data = "P-1W";
+ response = cast_interval_day(&execution_context_, data.data(), 4, true,
&out_valid);
+ qty_days_in_response = -7;
+ qty_millis_in_response = 0;
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response,
+ (qty_millis_in_response << 32) | (qty_days_in_response &
0x00000000FFFFFFFF));
+
+ data = "P-1M";
+ response = cast_interval_day(&execution_context_, data.data(), 4, true,
&out_valid);
+ qty_days_in_response = 0;
+ qty_millis_in_response = 0;
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response,
+ (qty_millis_in_response << 32) | (qty_days_in_response &
0x00000000FFFFFFFF));
+
+ response_interval_yrs =
+ cast_interval_year(&execution_context_, data.data(), 4, true,
&out_valid);
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response_interval_yrs, -1);
+
+ data = "P-1Y";
+ response = cast_interval_day(&execution_context_, data.data(), 4, true,
&out_valid);
+ qty_days_in_response = 0;
+ qty_millis_in_response = 0;
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response,
+ (qty_millis_in_response << 32) | (qty_days_in_response &
0x00000000FFFFFFFF));
+
+ response_interval_yrs =
+ cast_interval_year(&execution_context_, data.data(), 4, true,
&out_valid);
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response_interval_yrs, -12);
+
+ data = "P-1Y-2M";
+ response = cast_interval_day(&execution_context_, data.data(), 7, true,
&out_valid);
+ qty_days_in_response = 0;
+ qty_millis_in_response = 0;
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response,
+ (qty_millis_in_response << 32) | (qty_days_in_response &
0x00000000FFFFFFFF));
+
+ response_interval_yrs =
+ cast_interval_year(&execution_context_, data.data(), 7, true,
&out_valid);
+ EXPECT_TRUE(out_valid);
+ EXPECT_FALSE(execution_context_.has_error());
+ EXPECT_EQ(response_interval_yrs, -14);
}
TEST_F(TestIntervalHolder, TestMatchErrorsForCastIntervalDay) {