This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8e34b64 ARROW-12880: [C++][Gandiva] Add castTIME(int32),
castTIMESTAMP(int64) and castTIME(utf8) functions
8e34b64 is described below
commit 8e34b64f60120bdee5991148f765cd4452f0e0d7
Author: Anthony Louis <[email protected]>
AuthorDate: Mon Jan 24 11:20:25 2022 +0530
ARROW-12880: [C++][Gandiva] Add castTIME(int32), castTIMESTAMP(int64) and
castTIME(utf8) functions
Adds the implementation for the **castTIME(int32)** and **castTIME(utf8)**
functions and add tests for the **castTIMESTAMP(int64)** function.
Closes #10402 from anthonylouisbsb/feature/add-cast-time-timestamp
Lead-authored-by: Anthony Louis <[email protected]>
Co-authored-by: Anthony Louis <[email protected]>
Signed-off-by: Pindikura Ravindra <[email protected]>
---
cpp/src/gandiva/function_registry_datetime.cc | 7 ++
cpp/src/gandiva/precompiled/time.cc | 97 +++++++++++++++++++++++++++
cpp/src/gandiva/precompiled/time_test.cc | 68 ++++++++++++++++++-
cpp/src/gandiva/precompiled/types.h | 2 +
cpp/src/gandiva/tests/date_time_test.cc | 56 +++++++++++++++-
5 files changed, 225 insertions(+), 5 deletions(-)
diff --git a/cpp/src/gandiva/function_registry_datetime.cc
b/cpp/src/gandiva/function_registry_datetime.cc
index 54e2fa8..0d38135 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -89,9 +89,16 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
NativeFunction("castDATE", {"to_date"}, DataTypeVector{timestamp()},
date64(),
kResultNullIfNull, "castDATE_timestamp"),
+ NativeFunction("castTIME", {}, DataTypeVector{utf8()}, time32(),
kResultNullIfNull,
+ "castTIME_utf8",
+ NativeFunction::kNeedsContext |
NativeFunction::kCanReturnErrors),
+
NativeFunction("castTIME", {}, DataTypeVector{timestamp()}, time32(),
kResultNullIfNull, "castTIME_timestamp"),
+ NativeFunction("castTIME", {}, DataTypeVector{int32()}, time32(),
kResultNullIfNull,
+ "castTIME_int32"),
+
NativeFunction("castBIGINT", {}, DataTypeVector{day_time_interval()},
int64(),
kResultNullIfNull, "castBIGINT_daytimeinterval"),
diff --git a/cpp/src/gandiva/precompiled/time.cc
b/cpp/src/gandiva/precompiled/time.cc
index 3835c0b..ae443ac 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -764,6 +764,92 @@ gdv_date64 castDATE_timestamp(gdv_timestamp
timestamp_in_millis) {
return tp.ClearTimeOfDay().MillisSinceEpoch();
}
+/*
+ * Input consists of mandatory and optional fields.
+ * Mandatory fields are hours, minutes.
+ * The seconds and subseconds are optional.
+ * Format is hours:minutes[:seconds.millis]
+ */
+gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) {
+ using gandiva::TimeFields;
+ using std::chrono::hours;
+ using std::chrono::milliseconds;
+ using std::chrono::minutes;
+ using std::chrono::seconds;
+
+ const int32_t kDisplacementHours = 4;
+ int32_t time_fields[kDisplacementHours] = {0, 0, 0, 0};
+ int32_t sub_seconds_len = 0;
+ int32_t time_field_idx = TimeFields::kHours, index = 0, value = 0;
+
+ bool has_invalid_digit = false;
+ while (time_field_idx < TimeFields::kDisplacementHours && index < length) {
+ if (isdigit(input[index])) {
+ value = (value * 10) + (input[index] - '0');
+
+ if (time_field_idx == TimeFields::kSubSeconds) {
+ sub_seconds_len++;
+ }
+ } else {
+ time_fields[time_field_idx - TimeFields::kHours] = value;
+ value = 0;
+
+ switch (input[index]) {
+ case '.':
+ case ':':
+ time_field_idx++;
+ break;
+ default:
+ has_invalid_digit = true;
+ break;
+ }
+ }
+
+ index++;
+ }
+
+ if (has_invalid_digit) {
+ const char* msg = "Invalid character in time ";
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+
+ // Check if the hours and minutes were defined and store the last value
+ if (time_field_idx < TimeFields::kDisplacementHours) {
+ time_fields[time_field_idx - TimeFields::kHours] = value;
+ }
+
+ // adjust the milliseconds
+ if (sub_seconds_len > 0) {
+ if (sub_seconds_len > 3) {
+ const char* msg = "Invalid millis for time value ";
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+
+ while (sub_seconds_len < 3) {
+ time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10;
+ sub_seconds_len++;
+ }
+ }
+
+ int32_t input_hours = time_fields[TimeFields::kHours - TimeFields::kHours];
+ int32_t input_minutes = time_fields[TimeFields::kMinutes -
TimeFields::kHours];
+ int32_t input_seconds = time_fields[TimeFields::kSeconds -
TimeFields::kHours];
+ int32_t input_subseconds = time_fields[TimeFields::kSubSeconds -
TimeFields::kHours];
+
+ if (!is_valid_time(input_hours, input_minutes, input_seconds)) {
+ const char* msg = "Not a valid time value ";
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+
+ auto time_info = hours(input_hours) + minutes(input_minutes) +
seconds(input_seconds) +
+ milliseconds(input_subseconds);
+
+ return static_cast<gdv_time32>(time_info.count());
+}
+
gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis) {
// Retrieves a timestamp and returns the number of milliseconds since the
midnight
EpochTimePoint tp(timestamp_in_millis);
@@ -775,6 +861,17 @@ gdv_time32 castTIME_timestamp(gdv_timestamp
timestamp_in_millis) {
return static_cast<int32_t>(millis_since_midnight);
}
+// Gets an arbitrary number and return the number of milliseconds since
midnight
+gdv_time32 castTIME_int32(int32_t int_val) {
+ if (int_val < 0) {
+ return 0;
+ }
+
+ auto millis_since_midnight = static_cast<gdv_time32>(int_val %
MILLIS_IN_DAY);
+
+ return millis_since_midnight;
+}
+
const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in,
gdv_int64 length, gdv_int32* out_len) {
gdv_int64 year = extractYear_timestamp(in);
diff --git a/cpp/src/gandiva/precompiled/time_test.cc
b/cpp/src/gandiva/precompiled/time_test.cc
index 231e72f..b188d3a 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -16,10 +16,10 @@
// under the License.
#include <gtest/gtest.h>
-#include <time.h>
-#include "../execution_context.h"
+#include "gandiva/execution_context.h"
#include "gandiva/precompiled/testing.h"
+#include "gandiva/precompiled/time_constants.h"
#include "gandiva/precompiled/types.h"
namespace gandiva {
@@ -132,6 +132,48 @@ TEST(TestTime, TestCastTimestamp) {
context.Reset();
}
+TEST(TestTime, TestCastTimeUtf8) {
+ ExecutionContext context;
+ auto context_ptr = reinterpret_cast<int64_t>(&context);
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30", 7), 35130000);
+ EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.920", 11), 35130920);
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.1", 9),
+ castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.10", 10),
+ castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.100", 11),
+ castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+ // error cases
+ EXPECT_EQ(castTIME_utf8(context_ptr, "24H00H00", 8), 0);
+ EXPECT_EQ(context.get_error(), "Invalid character in time 24H00H00");
+ context.Reset();
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "24:00:00", 8), 0);
+ EXPECT_EQ(context.get_error(), "Not a valid time value 24:00:00");
+ context.Reset();
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "00:60:00", 8), 0);
+ EXPECT_EQ(context.get_error(), "Not a valid time value 00:60:00");
+ context.Reset();
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:100", 9), 0);
+ EXPECT_EQ(context.get_error(), "Not a valid time value 00:00:100");
+ context.Reset();
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), 0);
+ EXPECT_EQ(context.get_error(), "Invalid millis for time value
00:00:00.0001");
+ context.Reset();
+
+ EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), 0);
+ EXPECT_EQ(context.get_error(), "Invalid millis for time value
00:00:00.1000");
+ context.Reset();
+}
+
#ifndef _WIN32
// TODO(wesm): ARROW-4495. Need to address TZ database issues on Windows
@@ -790,6 +832,28 @@ TEST(TestTime, TestCastTimestampToTime) {
EXPECT_EQ(expected_response, out);
}
+TEST(TestTime, TestIntToTime) {
+ int32_t val = 1000;
+ int32_t expected_response = val;
+ auto out = castTIME_int32(val);
+ EXPECT_EQ(expected_response, out);
+
+ val = MILLIS_IN_DAY - 1;
+ expected_response = val;
+ out = castTIME_int32(val);
+ EXPECT_EQ(expected_response, out);
+
+ val = MILLIS_IN_DAY + 1;
+ expected_response = 1;
+ out = castTIME_int32(val);
+ EXPECT_EQ(expected_response, out);
+
+ val = -1;
+ expected_response = 0;
+ out = castTIME_int32(val);
+ EXPECT_EQ(expected_response, out);
+}
+
TEST(TestTime, TestLastDay) {
// leap year test
gdv_timestamp ts = StringToTimestamp("2016-02-11 03:20:34");
diff --git a/cpp/src/gandiva/precompiled/types.h
b/cpp/src/gandiva/precompiled/types.h
index b211a1d..e29e782 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -397,7 +397,9 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t execution_context,
const char* input,
gdv_timestamp castTIMESTAMP_date64(gdv_date64);
gdv_timestamp castTIMESTAMP_int64(gdv_int64);
gdv_date64 castDATE_timestamp(gdv_timestamp);
+gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length);
gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis);
+gdv_time32 castTIME_int32(int32_t int_val);
const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64,
gdv_int32*);
gdv_date64 last_day_from_timestamp(gdv_date64 millis);
diff --git a/cpp/src/gandiva/tests/date_time_test.cc
b/cpp/src/gandiva/tests/date_time_test.cc
index b840e73..a9d0ae6 100644
--- a/cpp/src/gandiva/tests/date_time_test.cc
+++ b/cpp/src/gandiva/tests/date_time_test.cc
@@ -16,8 +16,9 @@
// under the License.
#include <gtest/gtest.h>
-#include <math.h>
-#include <time.h>
+
+#include <cmath>
+#include <ctime>
#include "arrow/memory_pool.h"
#include "gandiva/precompiled/time_constants.h"
@@ -549,6 +550,56 @@ TEST_F(TestProjector, TestMonthsBetween) {
EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
}
+TEST_F(TestProjector, TestCastTimestampFromInt64) {
+ auto f0 = field("f0", arrow::int64());
+ auto schema = arrow::schema({f0});
+
+ // output fields
+ auto output = field("out", arrow::timestamp(arrow::TimeUnit::MILLI));
+
+ auto casttimestamp_expr =
+ TreeExprBuilder::MakeExpression("castTIMESTAMP", {f0}, output);
+
+ std::shared_ptr<Projector> projector;
+ auto status =
+ Projector::Make(schema, {casttimestamp_expr}, TestConfiguration(),
&projector);
+ std::cout << status.message();
+ ASSERT_TRUE(status.ok());
+
+ time_t epoch = Epoch();
+
+ int num_records = 5;
+ auto validity = {true, true, true, true, true};
+ std::vector<int64_t> f0_data = {MillisSince(epoch, 2016, 2, 3, 8, 20, 10,
34),
+ MillisSince(epoch, 2016, 2, 29, 23, 59, 59,
59),
+ MillisSince(epoch, 2016, 1, 30, 1, 15, 20,
0),
+ MillisSince(epoch, 2017, 2, 3, 23, 15, 20,
0),
+ MillisSince(epoch, 1970, 12, 30, 22, 50, 11,
0)};
+
+ auto array0 = MakeArrowArrayInt64(f0_data, validity);
+
+ std::vector<int64_t> f0_output_data = {MillisSince(epoch, 2016, 2, 3, 8, 20,
10, 34),
+ MillisSince(epoch, 2016, 2, 29, 23,
59, 59, 59),
+ MillisSince(epoch, 2016, 1, 30, 1,
15, 20, 0),
+ MillisSince(epoch, 2017, 2, 3, 23,
15, 20, 0),
+ MillisSince(epoch, 1970, 12, 30, 22,
50, 11, 0)};
+
+ // expected output
+ auto exp_output = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
+ timestamp(arrow::TimeUnit::MILLI), f0_output_data, validity);
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+
+ // Validate results
+ EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
+}
+
TEST_F(TestProjector, TestLastDay) {
auto f0 = field("f0", arrow::date64());
auto schema = arrow::schema({f0});
@@ -598,5 +649,4 @@ TEST_F(TestProjector, TestLastDay) {
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
}
-
} // namespace gandiva