This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e34b64  ARROW-12880: [C++][Gandiva] Add castTIME(int32), 
castTIMESTAMP(int64) and castTIME(utf8) functions
8e34b64 is described below

commit 8e34b64f60120bdee5991148f765cd4452f0e0d7
Author: Anthony Louis <[email protected]>
AuthorDate: Mon Jan 24 11:20:25 2022 +0530

    ARROW-12880: [C++][Gandiva] Add castTIME(int32), castTIMESTAMP(int64) and 
castTIME(utf8) functions
    
    Adds the implementation for the **castTIME(int32)** and **castTIME(utf8)** 
functions and add tests for the **castTIMESTAMP(int64)** function.
    
    Closes #10402 from anthonylouisbsb/feature/add-cast-time-timestamp
    
    Lead-authored-by: Anthony Louis <[email protected]>
    Co-authored-by: Anthony Louis <[email protected]>
    Signed-off-by: Pindikura Ravindra <[email protected]>
---
 cpp/src/gandiva/function_registry_datetime.cc |  7 ++
 cpp/src/gandiva/precompiled/time.cc           | 97 +++++++++++++++++++++++++++
 cpp/src/gandiva/precompiled/time_test.cc      | 68 ++++++++++++++++++-
 cpp/src/gandiva/precompiled/types.h           |  2 +
 cpp/src/gandiva/tests/date_time_test.cc       | 56 +++++++++++++++-
 5 files changed, 225 insertions(+), 5 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_datetime.cc 
b/cpp/src/gandiva/function_registry_datetime.cc
index 54e2fa8..0d38135 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -89,9 +89,16 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
       NativeFunction("castDATE", {"to_date"}, DataTypeVector{timestamp()}, 
date64(),
                      kResultNullIfNull, "castDATE_timestamp"),
 
+      NativeFunction("castTIME", {}, DataTypeVector{utf8()}, time32(), 
kResultNullIfNull,
+                     "castTIME_utf8",
+                     NativeFunction::kNeedsContext | 
NativeFunction::kCanReturnErrors),
+
       NativeFunction("castTIME", {}, DataTypeVector{timestamp()}, time32(),
                      kResultNullIfNull, "castTIME_timestamp"),
 
+      NativeFunction("castTIME", {}, DataTypeVector{int32()}, time32(), 
kResultNullIfNull,
+                     "castTIME_int32"),
+
       NativeFunction("castBIGINT", {}, DataTypeVector{day_time_interval()}, 
int64(),
                      kResultNullIfNull, "castBIGINT_daytimeinterval"),
 
diff --git a/cpp/src/gandiva/precompiled/time.cc 
b/cpp/src/gandiva/precompiled/time.cc
index 3835c0b..ae443ac 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -764,6 +764,92 @@ gdv_date64 castDATE_timestamp(gdv_timestamp 
timestamp_in_millis) {
   return tp.ClearTimeOfDay().MillisSinceEpoch();
 }
 
+/*
+ * Input consists of mandatory and optional fields.
+ * Mandatory fields are hours, minutes.
+ * The seconds and subseconds are optional.
+ * Format is hours:minutes[:seconds.millis]
+ */
+gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) {
+  using gandiva::TimeFields;
+  using std::chrono::hours;
+  using std::chrono::milliseconds;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+
+  const int32_t kDisplacementHours = 4;
+  int32_t time_fields[kDisplacementHours] = {0, 0, 0, 0};
+  int32_t sub_seconds_len = 0;
+  int32_t time_field_idx = TimeFields::kHours, index = 0, value = 0;
+
+  bool has_invalid_digit = false;
+  while (time_field_idx < TimeFields::kDisplacementHours && index < length) {
+    if (isdigit(input[index])) {
+      value = (value * 10) + (input[index] - '0');
+
+      if (time_field_idx == TimeFields::kSubSeconds) {
+        sub_seconds_len++;
+      }
+    } else {
+      time_fields[time_field_idx - TimeFields::kHours] = value;
+      value = 0;
+
+      switch (input[index]) {
+        case '.':
+        case ':':
+          time_field_idx++;
+          break;
+        default:
+          has_invalid_digit = true;
+          break;
+      }
+    }
+
+    index++;
+  }
+
+  if (has_invalid_digit) {
+    const char* msg = "Invalid character in time ";
+    set_error_for_date(length, input, msg, context);
+    return 0;
+  }
+
+  // Check if the hours and minutes were defined and store the last value
+  if (time_field_idx < TimeFields::kDisplacementHours) {
+    time_fields[time_field_idx - TimeFields::kHours] = value;
+  }
+
+  // adjust the milliseconds
+  if (sub_seconds_len > 0) {
+    if (sub_seconds_len > 3) {
+      const char* msg = "Invalid millis for time value ";
+      set_error_for_date(length, input, msg, context);
+      return 0;
+    }
+
+    while (sub_seconds_len < 3) {
+      time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10;
+      sub_seconds_len++;
+    }
+  }
+
+  int32_t input_hours = time_fields[TimeFields::kHours - TimeFields::kHours];
+  int32_t input_minutes = time_fields[TimeFields::kMinutes - 
TimeFields::kHours];
+  int32_t input_seconds = time_fields[TimeFields::kSeconds - 
TimeFields::kHours];
+  int32_t input_subseconds = time_fields[TimeFields::kSubSeconds - 
TimeFields::kHours];
+
+  if (!is_valid_time(input_hours, input_minutes, input_seconds)) {
+    const char* msg = "Not a valid time value ";
+    set_error_for_date(length, input, msg, context);
+    return 0;
+  }
+
+  auto time_info = hours(input_hours) + minutes(input_minutes) + 
seconds(input_seconds) +
+                   milliseconds(input_subseconds);
+
+  return static_cast<gdv_time32>(time_info.count());
+}
+
 gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis) {
   // Retrieves a timestamp and returns the number of milliseconds since the 
midnight
   EpochTimePoint tp(timestamp_in_millis);
@@ -775,6 +861,17 @@ gdv_time32 castTIME_timestamp(gdv_timestamp 
timestamp_in_millis) {
   return static_cast<int32_t>(millis_since_midnight);
 }
 
+// Gets an arbitrary number and return the number of milliseconds since 
midnight
+gdv_time32 castTIME_int32(int32_t int_val) {
+  if (int_val < 0) {
+    return 0;
+  }
+
+  auto millis_since_midnight = static_cast<gdv_time32>(int_val % 
MILLIS_IN_DAY);
+
+  return millis_since_midnight;
+}
+
 const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in,
                                         gdv_int64 length, gdv_int32* out_len) {
   gdv_int64 year = extractYear_timestamp(in);
diff --git a/cpp/src/gandiva/precompiled/time_test.cc 
b/cpp/src/gandiva/precompiled/time_test.cc
index 231e72f..b188d3a 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -16,10 +16,10 @@
 // under the License.
 
 #include <gtest/gtest.h>
-#include <time.h>
 
-#include "../execution_context.h"
+#include "gandiva/execution_context.h"
 #include "gandiva/precompiled/testing.h"
+#include "gandiva/precompiled/time_constants.h"
 #include "gandiva/precompiled/types.h"
 
 namespace gandiva {
@@ -132,6 +132,48 @@ TEST(TestTime, TestCastTimestamp) {
   context.Reset();
 }
 
+TEST(TestTime, TestCastTimeUtf8) {
+  ExecutionContext context;
+  auto context_ptr = reinterpret_cast<int64_t>(&context);
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30", 7), 35130000);
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.920", 11), 35130920);
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.1", 9),
+            castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.10", 10),
+            castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.100", 11),
+            castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+  // error cases
+  EXPECT_EQ(castTIME_utf8(context_ptr, "24H00H00", 8), 0);
+  EXPECT_EQ(context.get_error(), "Invalid character in time 24H00H00");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "24:00:00", 8), 0);
+  EXPECT_EQ(context.get_error(), "Not a valid time value 24:00:00");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "00:60:00", 8), 0);
+  EXPECT_EQ(context.get_error(), "Not a valid time value 00:60:00");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:100", 9), 0);
+  EXPECT_EQ(context.get_error(), "Not a valid time value 00:00:100");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), 0);
+  EXPECT_EQ(context.get_error(), "Invalid millis for time value 
00:00:00.0001");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), 0);
+  EXPECT_EQ(context.get_error(), "Invalid millis for time value 
00:00:00.1000");
+  context.Reset();
+}
+
 #ifndef _WIN32
 
 // TODO(wesm): ARROW-4495. Need to address TZ database issues on Windows
@@ -790,6 +832,28 @@ TEST(TestTime, TestCastTimestampToTime) {
   EXPECT_EQ(expected_response, out);
 }
 
+TEST(TestTime, TestIntToTime) {
+  int32_t val = 1000;
+  int32_t expected_response = val;
+  auto out = castTIME_int32(val);
+  EXPECT_EQ(expected_response, out);
+
+  val = MILLIS_IN_DAY - 1;
+  expected_response = val;
+  out = castTIME_int32(val);
+  EXPECT_EQ(expected_response, out);
+
+  val = MILLIS_IN_DAY + 1;
+  expected_response = 1;
+  out = castTIME_int32(val);
+  EXPECT_EQ(expected_response, out);
+
+  val = -1;
+  expected_response = 0;
+  out = castTIME_int32(val);
+  EXPECT_EQ(expected_response, out);
+}
+
 TEST(TestTime, TestLastDay) {
   // leap year test
   gdv_timestamp ts = StringToTimestamp("2016-02-11 03:20:34");
diff --git a/cpp/src/gandiva/precompiled/types.h 
b/cpp/src/gandiva/precompiled/types.h
index b211a1d..e29e782 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -397,7 +397,9 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t execution_context, 
const char* input,
 gdv_timestamp castTIMESTAMP_date64(gdv_date64);
 gdv_timestamp castTIMESTAMP_int64(gdv_int64);
 gdv_date64 castDATE_timestamp(gdv_timestamp);
+gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length);
 gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis);
+gdv_time32 castTIME_int32(int32_t int_val);
 const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64, 
gdv_int32*);
 gdv_date64 last_day_from_timestamp(gdv_date64 millis);
 
diff --git a/cpp/src/gandiva/tests/date_time_test.cc 
b/cpp/src/gandiva/tests/date_time_test.cc
index b840e73..a9d0ae6 100644
--- a/cpp/src/gandiva/tests/date_time_test.cc
+++ b/cpp/src/gandiva/tests/date_time_test.cc
@@ -16,8 +16,9 @@
 // under the License.
 
 #include <gtest/gtest.h>
-#include <math.h>
-#include <time.h>
+
+#include <cmath>
+#include <ctime>
 
 #include "arrow/memory_pool.h"
 #include "gandiva/precompiled/time_constants.h"
@@ -549,6 +550,56 @@ TEST_F(TestProjector, TestMonthsBetween) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestCastTimestampFromInt64) {
+  auto f0 = field("f0", arrow::int64());
+  auto schema = arrow::schema({f0});
+
+  // output fields
+  auto output = field("out", arrow::timestamp(arrow::TimeUnit::MILLI));
+
+  auto casttimestamp_expr =
+      TreeExprBuilder::MakeExpression("castTIMESTAMP", {f0}, output);
+
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {casttimestamp_expr}, TestConfiguration(), 
&projector);
+  std::cout << status.message();
+  ASSERT_TRUE(status.ok());
+
+  time_t epoch = Epoch();
+
+  int num_records = 5;
+  auto validity = {true, true, true, true, true};
+  std::vector<int64_t> f0_data = {MillisSince(epoch, 2016, 2, 3, 8, 20, 10, 
34),
+                                  MillisSince(epoch, 2016, 2, 29, 23, 59, 59, 
59),
+                                  MillisSince(epoch, 2016, 1, 30, 1, 15, 20, 
0),
+                                  MillisSince(epoch, 2017, 2, 3, 23, 15, 20, 
0),
+                                  MillisSince(epoch, 1970, 12, 30, 22, 50, 11, 
0)};
+
+  auto array0 = MakeArrowArrayInt64(f0_data, validity);
+
+  std::vector<int64_t> f0_output_data = {MillisSince(epoch, 2016, 2, 3, 8, 20, 
10, 34),
+                                         MillisSince(epoch, 2016, 2, 29, 23, 
59, 59, 59),
+                                         MillisSince(epoch, 2016, 1, 30, 1, 
15, 20, 0),
+                                         MillisSince(epoch, 2017, 2, 3, 23, 
15, 20, 0),
+                                         MillisSince(epoch, 1970, 12, 30, 22, 
50, 11, 0)};
+
+  // expected output
+  auto exp_output = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
+      timestamp(arrow::TimeUnit::MILLI), f0_output_data, validity);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
+}
+
 TEST_F(TestProjector, TestLastDay) {
   auto f0 = field("f0", arrow::date64());
   auto schema = arrow::schema({f0});
@@ -598,5 +649,4 @@ TEST_F(TestProjector, TestLastDay) {
   // Validate results
   EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
 }
-
 }  // namespace gandiva

Reply via email to