This is an automated email from the ASF dual-hosted git repository.

szaszm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit 5ff1c7cb15ca6f17b01b9cff2d1e49920ae6c1d9
Author: Martin Zink <[email protected]>
AuthorDate: Fri Apr 14 15:07:14 2023 +0200

    MINIFICPP-2063 RFC3339 parsing with expression language
    
    Closes #1528
    Signed-off-by: Marton Szasz <[email protected]>
---
 EXPRESSIONS.md                                     |  33 +++---
 extensions/expression-language/Expression.cpp      |  57 ++++++----
 .../tests/ExpressionLanguageTests.cpp              | 117 +++++++++++++++++----
 libminifi/include/utils/TimeUtil.h                 |   2 +
 libminifi/src/utils/TimeUtil.cpp                   |  34 ++++++
 libminifi/test/unit/TimeUtilTests.cpp              |  57 ++++++++++
 6 files changed, 244 insertions(+), 56 deletions(-)

diff --git a/EXPRESSIONS.md b/EXPRESSIONS.md
index 97ba68341..00d6306a0 100644
--- a/EXPRESSIONS.md
+++ b/EXPRESSIONS.md
@@ -1860,9 +1860,9 @@ argument.
 
 **Arguments**:
 
-| Argument | Description |
-| - | - |
-| format | The format to use in the strftime syntax |
+| Argument  | Description                                                      
                                                |
+|-----------|------------------------------------------------------------------------------------------------------------------|
+| format    | The format to use in the strftime syntax                         
                                                |
 | time zone | Optional argument that specifies the time zone to use from the 
IANA Time Zone Database (e.g. 'America/New_York') |
 
 **Return Type**: String
@@ -1872,23 +1872,26 @@ argument.
 If the attribute "time" has the value "1420058163264", then the following
 Expressions will yield the following results:
 
-| Expression | Value |
-| - | - |
-| `${time:format("%Y/%m/%d %H:%M:%S", "GMT")}` | `2014/12/31 20:36:03` |
-| `${time:format("%Y", "America/Los_Angeles")}` | `2014` |
+| Expression                                    | Value                 |
+|-----------------------------------------------|-----------------------|
+| `${time:format("%Y/%m/%d %H:%M:%S", "GMT")}`  | `2014/12/31 20:36:03` |
+| `${time:format("%Y", "America/Los_Angeles")}` | `2014`                |
 
 ### toDate
 
 **Description**: Converts a String into a date represented by the number of
 milliseconds since the UNIX epoch, based on the format specified by the
-argument. The argument must be a String that is a valid strftime syntax. The
+argument. The first argument must be a String that is a valid strftime syntax. 
The
 Subject is expected to be a String that is formatted according the argument.
 The date will be evaluated using the local time zone unless specified in the
 second optional argument.
+If called without arguments it will parse the subject as a RFC3339 formatted 
datetime.  
 
 **Subject Type**: String
 
-| format | The format to use in the strftime syntax |
+| Argument  | Description                                                      
                                                                          |
+|-----------|--------------------------------------------------------------------------------------------------------------------------------------------|
+| format    | Optional argument that specifies the format to use in the 
strftime syntax                                                                 
 |
 | time zone | Optional argument that specifies the time zone to use when 
parsing the subject, from the IANA Time Zone Database (e.g. 'America/New_York') 
|
 
 **Return Type**: Number
@@ -1911,7 +1914,7 @@ chaining together the two functions:
 
 **Description**: Returns the current date and time as a Date data type object.
 
-**Subject Type**: String
+**Subject Type**: No Subject
 
 **Arguments**: No arguments
 
@@ -1919,9 +1922,9 @@ chaining together the two functions:
 
 **Examples**:
 
-| Expression | Value |
-| - | - |
-| `${now()}` | `Count of milliseconds since the UNIX epoch` |
-| `${now():minus(86400000)` | `A number presenting the time 24 hours ago` |
-| `${now():format('Y')}` | `The current year` |
+| Expression                              | Value                              
                                                     |
+|-----------------------------------------|-----------------------------------------------------------------------------------------|
+| `${now()}`                              | `Count of milliseconds since the 
UNIX epoch`                                            |
+| `${now():minus(86400000)`               | `A number presenting the time 24 
hours ago`                                             |
+| `${now():format('Y')}`                  | `The current year`                 
                                                     |
 | `${now():minus(86400000):format('%a')}` | `The day of the week that was 
yesterday, as a 3-letter abbreviation (For example, Wed)` |
diff --git a/extensions/expression-language/Expression.cpp 
b/extensions/expression-language/Expression.cpp
index 5bb59506d..bde9039fb 100644
--- a/extensions/expression-language/Expression.cpp
+++ b/extensions/expression-language/Expression.cpp
@@ -633,35 +633,46 @@ Value expr_escapeCsv(const std::vector<Value> &args) {
 }
 
 Value expr_format(const std::vector<Value> &args) {
-  std::chrono::milliseconds dur(args[0].asUnsignedLong());
-  std::chrono::system_clock::time_point dt(dur);
-  auto zone = date::current_zone();
-  if (args.size() > 2) {
-    zone = date::locate_zone(args[2].asString());
-  }
-  auto t = date::make_zoned(zone, dt);
-  std::stringstream result_s;
-  result_s << date::format(args[1].asString(), t);
-  return Value(result_s.str());
+  using std::chrono::milliseconds;
+
+  date::sys_time<milliseconds> 
utc_time_point{milliseconds(args[0].asUnsignedLong())};
+  auto format_string = args[1].asString();
+  auto zone = args.size() > 2 ? date::locate_zone(args[2].asString()) : 
date::current_zone();
+
+  auto zoned_time_point = date::make_zoned(zone, utc_time_point);
+  std::ostringstream result_stream;
+  result_stream << date::format(args[1].asString(), zoned_time_point);
+  return Value(result_stream.str());
 }
 
 Value expr_toDate(const std::vector<Value> &args) {
-  auto arg_0 = args[0].asString();
-  std::istringstream arg_s { arg_0 };
-  date::sys_time<std::chrono::milliseconds> t;
-  date::from_stream(arg_s, args[1].asString().c_str(), t);
-  auto zone = date::current_zone();
-  if (args.size() > 2) {
-    zone = date::locate_zone(args[2].asString());
+  using std::chrono::milliseconds;
+  auto input_string = args[0].asString();
+
+  if (args.size() == 1) {
+    if (auto parsed_rfc3339 = 
org::apache::nifi::minifi::utils::timeutils::parseRfc3339(input_string))
+      return 
Value(int64_t{std::chrono::duration_cast<milliseconds>(parsed_rfc3339->time_since_epoch()).count()});
+    else
+      throw std::runtime_error(fmt::format("Failed to parse \"{}\" as an 
RFC3339 formatted datetime", input_string));
   }
-  auto utc = date::locate_zone("UTC");
-  auto utct = date::make_zoned(utc, t);
-  auto zt = date::make_zoned(zone, utct.get_local_time());
-  return 
Value(int64_t{std::chrono::duration_cast<std::chrono::milliseconds>(zt.get_sys_time().time_since_epoch()).count()});
+  auto format_string = args[1].asString();
+  auto zone = args.size() > 2 ? date::locate_zone(args[2].asString()) : 
date::current_zone();
+
+  std::istringstream input_stream{ input_string };
+  date::sys_time<milliseconds> time_point;
+  date::from_stream(input_stream, format_string.c_str(), time_point);
+  if (input_stream.fail() || (input_stream.peek() && !input_stream.eof()))
+    throw std::runtime_error(fmt::format(R"(Failed to parse "{}", with "{}" 
format)", input_string, format_string));
+
+  auto utc_zone = date::locate_zone("UTC");
+  auto utc_time_point = date::make_zoned(utc_zone, time_point);
+  auto zoned_time_point = date::make_zoned(zone, 
utc_time_point.get_local_time());
+  return 
Value(int64_t{std::chrono::duration_cast<milliseconds>(zoned_time_point.get_sys_time().time_since_epoch()).count()});
 }
 
 Value expr_now(const std::vector<Value>& /*args*/) {
-  return 
Value(int64_t{std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count()});
+  using std::chrono::milliseconds;
+  return 
Value(int64_t{std::chrono::duration_cast<milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count()});
 }
 
 Value expr_unescapeCsv(const std::vector<Value> &args) {
@@ -1510,7 +1521,7 @@ Expression make_dynamic_function(const std::string 
&function_name, const std::ve
   } else if (function_name == "format") {
     return make_dynamic_function_incomplete<expr_format>(function_name, args, 
1);
   } else if (function_name == "toDate") {
-    return make_dynamic_function_incomplete<expr_toDate>(function_name, args, 
1);
+    return make_dynamic_function_incomplete<expr_toDate>(function_name, args, 
0);
   } else if (function_name == "now") {
     return make_dynamic_function_incomplete<expr_now>(function_name, args, 0);
   } else {
diff --git a/extensions/expression-language/tests/ExpressionLanguageTests.cpp 
b/extensions/expression-language/tests/ExpressionLanguageTests.cpp
index a7dc2cbd2..f7b162c0e 100644
--- a/extensions/expression-language/tests/ExpressionLanguageTests.cpp
+++ b/extensions/expression-language/tests/ExpressionLanguageTests.cpp
@@ -1226,22 +1226,48 @@ TEST_CASE("Parse Date", "[expressionParseDate]") {
 #ifdef WIN32
   expression::dateSetInstall(TZ_DATA_DIR);
 #endif
-  auto expr = expression::compile("${message:toDate('%Y/%m/%d', 
'America/Los_Angeles')}");
-
   auto flow_file_a = std::make_shared<core::FlowFile>();
   flow_file_a->addAttribute("message", "2014/04/30");
-  REQUIRE("1398841200000" == expr(expression::Parameters{ flow_file_a 
}).asString());
+  CHECK("1398841200000" == expression::compile("${message:toDate('%Y/%m/%d', 
'America/Los_Angeles')}")(expression::Parameters{ flow_file_a }).asString());
+
+  flow_file_a->addAttribute("trillion_utc", "2001/09/09 01:46:40.000Z");
+  flow_file_a->addAttribute("trillion_paris", "2001/09/09 03:46:40.000Z");
+  flow_file_a->addAttribute("trillion_la", "2001/09/08 18:46:40.000Z");
+  CHECK("1000000000000" == 
expression::compile("${trillion_utc:toDate('%Y/%m/%d %H:%M:%SZ', 
'UTC')}")(expression::Parameters{ flow_file_a }).asString());
+  CHECK("1000000000000" == 
expression::compile("${trillion_paris:toDate('%Y/%m/%d %H:%M:%SZ', 
'Europe/Paris')}")(expression::Parameters{ flow_file_a }).asString());
+  CHECK("1000000000000" == expression::compile("${trillion_la:toDate('%Y/%m/%d 
%H:%M:%SZ', 'America/Los_Angeles')}")(expression::Parameters{ flow_file_a 
}).asString());
+
+  flow_file_a->addAttribute("timestamp_with_zone_info_00_00", 
"2023-03-02T03:49:55.190+08:45");
+  flow_file_a->addAttribute("timestamp_with_zone_info_08_45", 
"2023-03-02T03:49:55.190+08:45");
+
+  CHECK("1677697495190" == 
expression::compile("${timestamp_with_zone_info_00_00:toDate('%FT%T%Ez', 
'UTC')}")(expression::Parameters{ flow_file_a }).asString());
+  CHECK("1677697495190" == 
expression::compile("${timestamp_with_zone_info_08_45:toDate('%FT%T%Ez', 
'UTC')}")(expression::Parameters{ flow_file_a }).asString());
+
+  flow_file_a->addAttribute("invalid_timestamp_1", " 
2023-03-02T03:49:55.190+08:45");
+  flow_file_a->addAttribute("invalid_timestamp_2", 
"2023-03-02T03:49:55.190+08:45 ");
+  flow_file_a->addAttribute("invalid_timestamp_3", "2023-03-02 
03:49:55.190+08:45 ");
+
+  
REQUIRE_THROWS_AS(expression::compile("${invalid_timestamp_1:toDate('%FT%T%Ez', 
'UTC')}")(expression::Parameters{ flow_file_a }), std::runtime_error);
+  
REQUIRE_THROWS_AS(expression::compile("${invalid_timestamp_2:toDate('%FT%T%Ez', 
'UTC')}")(expression::Parameters{ flow_file_a }), std::runtime_error);
+  
REQUIRE_THROWS_AS(expression::compile("${invalid_timestamp_3:toDate('%FT%T%Ez', 
'UTC')}")(expression::Parameters{ flow_file_a }), std::runtime_error);
 }
 
 TEST_CASE("Reformat Date", "[expressionReformatDate]") {
 #ifdef WIN32
   expression::dateSetInstall(TZ_DATA_DIR);
 #endif
-  auto expr = expression::compile("${message:toDate('%Y/%m/%d', 
'GMT'):format('%m-%d-%Y', 'America/New_York')}");
-
   auto flow_file_a = std::make_shared<core::FlowFile>();
   flow_file_a->addAttribute("message", "2014/03/14");
-  REQUIRE("03-13-2014" == expr(expression::Parameters{ flow_file_a 
}).asString());
+  flow_file_a->addAttribute("blue", "20130917162643");
+
+  CHECK("03-13-2014" == expression::compile("${message:toDate('%Y/%m/%d', 
'UTC'):format('%m-%d-%Y', 'America/New_York')}")(expression::Parameters{ 
flow_file_a }).asString());
+
+  auto blue_utc_expr = expression::compile("${blue:toDate('%Y%m%d%H%M%S', 
'UTC'):format('%Y/%m/%d %H:%M:%SZ', 'UTC')}");
+  auto blue_paris_expr = expression::compile("${blue:toDate('%Y%m%d%H%M%S', 
'UTC'):format('%Y/%m/%d %H:%M:%SZ', 'Europe/Paris')}");
+  auto blue_la_expr = expression::compile("${blue:toDate('%Y%m%d%H%M%S', 
'UTC'):format('%Y/%m/%d %H:%M:%SZ', 'America/Los_Angeles')}");
+  CHECK("2013/09/17 16:26:43.000Z" == blue_utc_expr(expression::Parameters{ 
flow_file_a }).asString());
+  CHECK("2013/09/17 18:26:43.000Z" == blue_paris_expr(expression::Parameters{ 
flow_file_a }).asString());
+  CHECK("2013/09/17 09:26:43.000Z" == blue_la_expr(expression::Parameters{ 
flow_file_a }).asString());
 }
 
 TEST_CASE("Now Date", "[expressionNowDate]") {
@@ -1249,22 +1275,77 @@ TEST_CASE("Now Date", "[expressionNowDate]") {
   expression::dateSetInstall(TZ_DATA_DIR);
 #endif
   auto expr = expression::compile("${now():format('%Y')}");
-
-  auto flow_file_a = std::make_shared<core::FlowFile>();
-  flow_file_a->addAttribute("message", "2014/03/14");
-  date::year_month_day 
date{std::chrono::floor<std::chrono::days>(std::chrono::system_clock::now())};
-
-  REQUIRE(date.year().operator int() == expr(expression::Parameters{ 
flow_file_a }).asSignedLong());
+  auto current_year = 
date::year_month_day{std::chrono::floor<std::chrono::days>(std::chrono::system_clock::now())}.year().operator
 int();
+
+  CHECK(current_year == expr(expression::Parameters{ }).asSignedLong());
+}
+
+TEST_CASE("Parse RFC3339 with Expression Language toDate") {
+  using date::sys_days;
+  using org::apache::nifi::minifi::utils::timeutils::parseRfc3339;
+  using namespace date::literals;
+  using namespace std::literals::chrono_literals;
+  using std::chrono::milliseconds;
+
+  milliseconds expected_second = 
std::chrono::floor<milliseconds>((sys_days(2023_y / 03 / 01) + 19h + 04min + 
55s).time_since_epoch());
+  milliseconds expected_tenth_second = 
std::chrono::floor<milliseconds>((sys_days(2023_y / 03 / 01) + 19h + 04min + 
55s + 100ms).time_since_epoch());
+  milliseconds expected_milli_second = 
std::chrono::floor<milliseconds>((sys_days(2023_y / 03 / 01) + 19h + 04min + 
55s + 190ms).time_since_epoch());
+
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55Z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55.1Z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_tenth_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55.19Z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55.190Z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55.190999Z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01t19:04:55z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01t19:04:55.190z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T20:04:55+01:00'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T20:04:55.190+01:00'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T20:04:55.190999+01:00'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
20:04:55+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
20:04:55.1+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_tenth_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
20:04:55.19+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
20:04:55.190+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
20:04:55.190999+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55Z'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01_19:04:55Z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55z'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01_19:04:55z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55.1Z'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_tenth_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55.19Z'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55.190Z'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01_19:04:55.190Z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55.190999Z'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01_19:04:55.190999Z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55.190z'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01_19:04:55.190z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55.190999z'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01_19:04:55.190999z'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55-00:00'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_second.count());
+  CHECK(expression::compile("${literal('2023-03-01 
19:04:55.190-00:00'):toDate()}")(expression::Parameters()).asSignedLong() == 
expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55-00:00'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55.190-00:00'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-02T03:49:55+08:45'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55+00:00'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T19:04:55.190+00:00'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_milli_second.count());
+  
CHECK(expression::compile("${literal('2023-03-01T18:04:55-01:00'):toDate()}")(expression::Parameters()).asSignedLong()
 == expected_second.count());
+
+  
REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01T19:04:55Zbanana'):toDate()}")(expression::Parameters()),
 std::runtime_error);
+  
REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01T19:04:55'):toDate()}")(expression::Parameters()),
 std::runtime_error);
+  
REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01T19:04:55T'):toDate()}")(expression::Parameters()),
 std::runtime_error);
+  REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01T19:04:55Z 
'):toDate()}")(expression::Parameters()), std::runtime_error);
+  REQUIRE_THROWS_AS(expression::compile("${literal(' 
2023-03-01T19:04:55Z'):toDate()}")(expression::Parameters()), 
std::runtime_error);
+  
REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01'):toDate()}")(expression::Parameters()),
 std::runtime_error);
 }
 
 TEST_CASE("Format Date", "[expressionFormatDate]") {
-  auto expr_gmt = expression::compile("${message:format('%m-%d-%Y', 'GMT')}");
-  auto expr_utc = expression::compile("${message:format('%m-%d-%Y', 'UTC')}");
-
+#ifdef WIN32
+  expression::dateSetInstall(TZ_DATA_DIR);
+#endif
   auto flow_file_a = std::make_shared<core::FlowFile>();
-  flow_file_a->addAttribute("message", "1394755200000");
-  REQUIRE("03-14-2014" == expr_gmt(expression::Parameters{ flow_file_a 
}).asString());
-  REQUIRE("03-14-2014" == expr_utc(expression::Parameters{ flow_file_a 
}).asString());
+  flow_file_a->addAttribute("trillion_milliseconds", "1000000000000");
+  CHECK(expression::compile("${trillion_milliseconds:format('%Y/%m/%d 
%H:%M:%SZ', 'UTC')}")(expression::Parameters{ flow_file_a }).asString() == 
"2001/09/09 01:46:40.000Z");
+  CHECK(expression::compile("${trillion_milliseconds:format('%Y/%m/%d 
%H:%M:%SZ', 'Europe/Paris')}")(expression::Parameters{ flow_file_a 
}).asString() == "2001/09/09 03:46:40.000Z");
+  CHECK(expression::compile("${trillion_milliseconds:format('%Y/%m/%d 
%H:%M:%SZ', 'America/Los_Angeles')}")(expression::Parameters{ flow_file_a 
}).asString() == "2001/09/08 18:46:40.000Z");
 }
 
 TEST_CASE("IP", "[expressionIP]") {
diff --git a/libminifi/include/utils/TimeUtil.h 
b/libminifi/include/utils/TimeUtil.h
index 2aab23ba4..2080c1ebe 100644
--- a/libminifi/include/utils/TimeUtil.h
+++ b/libminifi/include/utils/TimeUtil.h
@@ -115,6 +115,8 @@ inline std::optional<std::chrono::sys_seconds> 
parseDateTimeStr(const std::strin
   return tp;
 }
 
+std::optional<std::chrono::system_clock::time_point> parseRfc3339(const 
std::string& str);
+
 inline std::string getDateTimeStr(std::chrono::sys_seconds tp) {
   return date::format("%Y-%m-%dT%H:%M:%SZ", tp);
 }
diff --git a/libminifi/src/utils/TimeUtil.cpp b/libminifi/src/utils/TimeUtil.cpp
index 04df8d6fb..58aaec3a2 100644
--- a/libminifi/src/utils/TimeUtil.cpp
+++ b/libminifi/src/utils/TimeUtil.cpp
@@ -16,9 +16,11 @@
  */
 
 #include "utils/TimeUtil.h"
+#include "range/v3/algorithm/contains.hpp"
 
 namespace org::apache::nifi::minifi::utils::timeutils {
 
+using namespace std::literals::chrono_literals;
 static std::mutex global_clock_mtx;
 static std::shared_ptr<SteadyClock> 
global_clock{std::make_shared<SteadyClock>()};
 
@@ -33,4 +35,36 @@ void setClock(std::shared_ptr<SteadyClock> clock) {
   global_clock = std::move(clock);
 }
 
+std::optional<std::chrono::system_clock::time_point> parseRfc3339(const 
std::string& str) {
+  std::istringstream stream(str);
+  date::year_month_day date_part;
+  date::from_stream(stream, "%F", date_part);
+
+  if (stream.fail())
+    return std::nullopt;
+
+  constexpr std::string_view accepted_delimiters = "tT_ ";
+  char delimiter_char;
+  stream.get(delimiter_char);
+
+  if (stream.fail() || !ranges::contains(accepted_delimiters, delimiter_char))
+    return std::nullopt;
+
+  std::chrono::system_clock::duration time_part;
+  std::chrono::minutes offset = 0min;
+  if (str.ends_with('Z') || str.ends_with('z')) {
+    date::from_stream(stream, "%T", time_part);
+    if (stream.fail())
+      return std::nullopt;
+    stream.get();
+  } else {
+    date::from_stream(stream, "%T%Ez", time_part, {}, &offset);
+  }
+
+  if (stream.fail() || (stream.peek() && !stream.eof()))
+    return std::nullopt;
+
+  return date::sys_days(date_part) + time_part - offset;
+}
+
 }  // namespace org::apache::nifi::minifi::utils::timeutils
diff --git a/libminifi/test/unit/TimeUtilTests.cpp 
b/libminifi/test/unit/TimeUtilTests.cpp
index 883574df0..ef9d9ca37 100644
--- a/libminifi/test/unit/TimeUtilTests.cpp
+++ b/libminifi/test/unit/TimeUtilTests.cpp
@@ -254,3 +254,60 @@ TEST_CASE("Test roundToNextSecond", "[roundingTests]") {
   CHECK(parseLocalTimePoint("2022-06-21 11:00:01") == 
roundToNextSecond(parseLocalTimePoint("2022-06-21 11:00:00")));
   CHECK(parseLocalTimePoint("2022-07-21 12:12:01") == 
roundToNextSecond(parseLocalTimePoint("2022-07-21 12:12:00")));
 }
+
+TEST_CASE("Parse RFC3339", "[parseRfc3339]") {
+  using date::sys_days;
+  using org::apache::nifi::minifi::utils::timeutils::parseRfc3339;
+  using namespace date::literals;
+  using namespace std::literals::chrono_literals;
+
+  auto expected_second = sys_days(2023_y / 03 / 01) + 19h + 04min + 55s;
+  auto expected_tenth_second = sys_days(2023_y / 03 / 01) + 19h + 04min + 55s 
+ 100ms;
+  auto expected_milli_second = sys_days(2023_y / 03 / 01) + 19h + 04min + 55s 
+ 190ms;
+  auto expected_micro_second = sys_days(2023_y / 03 / 01) + 19h + 04min + 55s 
+ 190999us;
+
+  CHECK(parseRfc3339("2023-03-01T19:04:55Z") == expected_second);
+  CHECK(parseRfc3339("2023-03-01T19:04:55.1Z") == expected_tenth_second);
+  CHECK(parseRfc3339("2023-03-01T19:04:55.19Z") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01T19:04:55.190Z") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01T19:04:55.190999Z") == expected_micro_second);
+  CHECK(parseRfc3339("2023-03-01t19:04:55z") == expected_second);
+  CHECK(parseRfc3339("2023-03-01t19:04:55.190z") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01T20:04:55+01:00") == expected_second);
+  CHECK(parseRfc3339("2023-03-01T20:04:55.190+01:00") == 
expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01T20:04:55.190999+01:00") == 
expected_micro_second);
+  CHECK(parseRfc3339("2023-03-01 20:04:55+01:00") == expected_second);
+  CHECK(parseRfc3339("2023-03-01 20:04:55.1+01:00") == expected_tenth_second);
+  CHECK(parseRfc3339("2023-03-01 20:04:55.19+01:00") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01 20:04:55.190+01:00") == 
expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01 20:04:55.190999+01:00") == 
expected_micro_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55Z") == expected_second);
+  CHECK(parseRfc3339("2023-03-01_19:04:55Z") == expected_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55z") == expected_second);
+  CHECK(parseRfc3339("2023-03-01_19:04:55z") == expected_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55.1Z") == expected_tenth_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55.19Z") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55.190Z") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01_19:04:55.190Z") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55.190999Z") == expected_micro_second);
+  CHECK(parseRfc3339("2023-03-01_19:04:55.190999Z") == expected_micro_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55.190z") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01_19:04:55.190z") == expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55.190999z") == expected_micro_second);
+  CHECK(parseRfc3339("2023-03-01_19:04:55.190999z") == expected_micro_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55-00:00") == expected_second);
+  CHECK(parseRfc3339("2023-03-01 19:04:55.190-00:00") == 
expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01T19:04:55-00:00") == expected_second);
+  CHECK(parseRfc3339("2023-03-01T19:04:55.190-00:00") == 
expected_milli_second);
+  CHECK(parseRfc3339("2023-03-02T03:49:55+08:45") == expected_second);
+  CHECK(parseRfc3339("2023-03-01T19:04:55+00:00") == expected_second);
+  CHECK(parseRfc3339("2023-03-01T19:04:55.190+00:00") == 
expected_milli_second);
+  CHECK(parseRfc3339("2023-03-01T18:04:55-01:00") == expected_second);
+
+  CHECK_FALSE(parseRfc3339("2023-03-01T19:04:55Zbanana"));
+  CHECK_FALSE(parseRfc3339("2023-03-01T19:04:55"));
+  CHECK_FALSE(parseRfc3339("2023-03-01T19:04:55T"));
+  CHECK_FALSE(parseRfc3339("2023-03-01T19:04:55Z "));
+  CHECK_FALSE(parseRfc3339(" 2023-03-01T19:04:55Z"));
+  CHECK_FALSE(parseRfc3339("2023-03-01"));
+}

Reply via email to