This is an automated email from the ASF dual-hosted git repository.
praveenbingo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1c62117 ARROW-11988: [C++][Gandiva] Implements last_day function
1c62117 is described below
commit 1c62117285f69e2bb15de50af9bbea2b7a215f4d
Author: Anthony Louis <[email protected]>
AuthorDate: Sat Mar 27 16:29:25 2021 +0530
ARROW-11988: [C++][Gandiva] Implements last_day function
Implements the `last_day` function inside the Gandiva. The function gets a
timestamp and returns the date of the last day of the month defined in
timestamp.
JIRA issue: https://issues.apache.org/jira/browse/ARROW-11988
Closes #9727 from anthonylouisbsb/feature/add-last-day-function and
squashes the following commits:
9f22dc3ca <Anthony Louis> Fix problems with formatter
3cdb3c534 <Anthony Louis> Fix linter warnings about the line length
95bb36e6f <Anthony Louis> Fix identation for code
21b504f0a <Anthony Louis> Fix integration tests for last day function
7abc05e87 <Anthony Louis> Add integration test for last day function
e017ceb5e <Anthony Louis> Add functions to retrieve last day inside the
registry
c87e24837 <Anthony Louis> Add macro for last_day func
3be2982f0 <Anthony Louis> Add unit test for the las day method
9dbb50192 <Anthony Louis> Create method to retrieve last time for timestamp
Authored-by: Anthony Louis <[email protected]>
Signed-off-by: Praveen <[email protected]>
---
cpp/src/gandiva/function_registry_common.h | 9 +++++
cpp/src/gandiva/function_registry_datetime.cc | 3 +-
cpp/src/gandiva/precompiled/time.cc | 17 +++++++++
cpp/src/gandiva/precompiled/time_test.cc | 25 ++++++++++++++
cpp/src/gandiva/precompiled/types.h | 1 +
cpp/src/gandiva/tests/date_time_test.cc | 50 +++++++++++++++++++++++++++
6 files changed, 104 insertions(+), 1 deletion(-)
diff --git a/cpp/src/gandiva/function_registry_common.h
b/cpp/src/gandiva/function_registry_common.h
index b51a40d..d09ee23 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -165,6 +165,15 @@ typedef std::unordered_map<const FunctionSignature*, const
NativeFunction*, KeyH
NativeFunction(#NAME, std::vector<std::string> ALIASES,
DataTypeVector{TYPE()}, \
TYPE(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))
+// Last day functions (used with data/time types) that :
+// - NULL handling is of type NULL_IF_NULL
+//
+// The pre-compiled fn name includes the base name & input type name. eg:
+// - last_day_from_date64
+#define LAST_DAY_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)
\
+ NativeFunction(#NAME, std::vector<std::string> ALIASES,
DataTypeVector{TYPE()}, \
+ date64(), kResultNullIfNull,
ARROW_STRINGIFY(NAME##_from_##TYPE))
+
// Hash32 functions that :
// - NULL handling is of type NULL_NEVER
//
diff --git a/cpp/src/gandiva/function_registry_datetime.cc
b/cpp/src/gandiva/function_registry_datetime.cc
index 0688970..8e89db8 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -82,7 +82,8 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
NativeFunction("extractDay", {}, DataTypeVector{day_time_interval()},
int64(),
kResultNullIfNull, "extractDay_daytimeinterval"),
- };
+
+ DATE_TYPES(LAST_DAY_SAFE_NULL_IF_NULL, last_day, {})};
return date_time_fn_registry_;
}
diff --git a/cpp/src/gandiva/precompiled/time.cc
b/cpp/src/gandiva/precompiled/time.cc
index 8c1c96d..b368886 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -451,6 +451,23 @@ EXTRACT_HOUR_TIME(time32)
DATE_TRUNC_FUNCTIONS(date64)
DATE_TRUNC_FUNCTIONS(timestamp)
+#define LAST_DAY_FUNC(TYPE) \
+ FORCE_INLINE \
+ gdv_date64 last_day_from_##TYPE(gdv_date64 millis) { \
+ EpochTimePoint received_day(millis); \
+ const auto& day_without_hours_and_sec = received_day.ClearTimeOfDay(); \
+ \
+ int received_day_in_month = day_without_hours_and_sec.TmMday(); \
+ const auto& first_day_in_month = \
+ day_without_hours_and_sec.AddDays(1 - received_day_in_month); \
+ \
+ const auto& month_last_day = first_day_in_month.AddMonths(1).AddDays(-1); \
+ \
+ return month_last_day.MillisSinceEpoch(); \
+ }
+
+DATE_TYPES(LAST_DAY_FUNC)
+
FORCE_INLINE
gdv_date64 castDATE_int64(gdv_int64 in) { return in; }
diff --git a/cpp/src/gandiva/precompiled/time_test.cc
b/cpp/src/gandiva/precompiled/time_test.cc
index 6e4ee80..295c023 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -699,4 +699,29 @@ TEST(TestTime, TestCastTimestampToDate) {
EXPECT_EQ(StringToTimestamp("2000-05-01 00:00:00"), out);
}
+TEST(TestTime, TestLastDay) {
+ // leap year test
+ gdv_timestamp ts = StringToTimestamp("2016-02-11 03:20:34");
+ auto out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2016-02-29 00:00:00"), out);
+
+ ts = StringToTimestamp("2016-02-29 23:59:59");
+ out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2016-02-29 00:00:00"), out);
+
+ ts = StringToTimestamp("2016-01-30 23:59:00");
+ out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2016-01-31 00:00:00"), out);
+
+ // normal year
+ ts = StringToTimestamp("2017-02-03 23:59:59");
+ out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2017-02-28 00:00:00"), out);
+
+ // december
+ ts = StringToTimestamp("2015-12-03 03:12:59");
+ out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2015-12-31 00:00:00"), out);
+}
+
} // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h
b/cpp/src/gandiva/precompiled/types.h
index 81bd3a2..bc17208 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -252,6 +252,7 @@ gdv_timestamp castTIMESTAMP_date64(gdv_date64);
gdv_timestamp castTIMESTAMP_int64(gdv_int64);
gdv_date64 castDATE_timestamp(gdv_timestamp);
const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64,
gdv_int32*);
+gdv_date64 last_day_from_timestamp(gdv_date64 millis);
gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale);
diff --git a/cpp/src/gandiva/tests/date_time_test.cc
b/cpp/src/gandiva/tests/date_time_test.cc
index fdf2a72..53c3726 100644
--- a/cpp/src/gandiva/tests/date_time_test.cc
+++ b/cpp/src/gandiva/tests/date_time_test.cc
@@ -537,4 +537,54 @@ TEST_F(TestProjector, TestMonthsBetween) {
EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
}
+TEST_F(TestProjector, TestLastDay) {
+ auto f0 = field("f0", arrow::date64());
+ auto schema = arrow::schema({f0});
+
+ // output fields
+ auto output = field("out", arrow::date64());
+
+ auto last_day_expr = TreeExprBuilder::MakeExpression("last_day", {f0},
output);
+
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, {last_day_expr}, TestConfiguration(),
&projector);
+ std::cout << status.message();
+ ASSERT_TRUE(status.ok());
+
+ time_t epoch = Epoch();
+
+ // Create a row-batch with some sample data
+ // Used a leap year as example.
+ int num_records = 5;
+ auto validity = {true, true, true, true, true};
+ std::vector<int64_t> f0_data = {MillisSince(epoch, 2016, 2, 3, 8, 20, 10,
34),
+ MillisSince(epoch, 2016, 2, 29, 23, 59, 59,
59),
+ MillisSince(epoch, 2016, 1, 30, 1, 15, 20,
0),
+ MillisSince(epoch, 2017, 2, 3, 23, 15, 20,
0),
+ MillisSince(epoch, 2015, 12, 30, 22, 50, 11,
0)};
+
+ auto array0 =
+ MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f0_data,
validity);
+
+ std::vector<int64_t> f0_output_data = {MillisSince(epoch, 2016, 2, 29, 0, 0,
0, 0),
+ MillisSince(epoch, 2016, 2, 29, 0, 0,
0, 0),
+ MillisSince(epoch, 2016, 1, 31, 0, 0,
0, 0),
+ MillisSince(epoch, 2017, 2, 28, 0, 0,
0, 0),
+ MillisSince(epoch, 2015, 12, 31, 0,
0, 0, 0)};
+
+ // expected output
+ auto exp_output = MakeArrowArrayDate64(f0_output_data, validity);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+
+ // Validate results
+ EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
+}
+
} // namespace gandiva