This is an automated email from the ASF dual-hosted git repository. szaszm pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit 5ff1c7cb15ca6f17b01b9cff2d1e49920ae6c1d9 Author: Martin Zink <[email protected]> AuthorDate: Fri Apr 14 15:07:14 2023 +0200 MINIFICPP-2063 RFC3339 parsing with expression language Closes #1528 Signed-off-by: Marton Szasz <[email protected]> --- EXPRESSIONS.md | 33 +++--- extensions/expression-language/Expression.cpp | 57 ++++++---- .../tests/ExpressionLanguageTests.cpp | 117 +++++++++++++++++---- libminifi/include/utils/TimeUtil.h | 2 + libminifi/src/utils/TimeUtil.cpp | 34 ++++++ libminifi/test/unit/TimeUtilTests.cpp | 57 ++++++++++ 6 files changed, 244 insertions(+), 56 deletions(-) diff --git a/EXPRESSIONS.md b/EXPRESSIONS.md index 97ba68341..00d6306a0 100644 --- a/EXPRESSIONS.md +++ b/EXPRESSIONS.md @@ -1860,9 +1860,9 @@ argument. **Arguments**: -| Argument | Description | -| - | - | -| format | The format to use in the strftime syntax | +| Argument | Description | +|-----------|------------------------------------------------------------------------------------------------------------------| +| format | The format to use in the strftime syntax | | time zone | Optional argument that specifies the time zone to use from the IANA Time Zone Database (e.g. 'America/New_York') | **Return Type**: String @@ -1872,23 +1872,26 @@ argument. If the attribute "time" has the value "1420058163264", then the following Expressions will yield the following results: -| Expression | Value | -| - | - | -| `${time:format("%Y/%m/%d %H:%M:%S", "GMT")}` | `2014/12/31 20:36:03` | -| `${time:format("%Y", "America/Los_Angeles")}` | `2014` | +| Expression | Value | +|-----------------------------------------------|-----------------------| +| `${time:format("%Y/%m/%d %H:%M:%S", "GMT")}` | `2014/12/31 20:36:03` | +| `${time:format("%Y", "America/Los_Angeles")}` | `2014` | ### toDate **Description**: Converts a String into a date represented by the number of milliseconds since the UNIX epoch, based on the format specified by the -argument. The argument must be a String that is a valid strftime syntax. The +argument. The first argument must be a String that is a valid strftime syntax. The Subject is expected to be a String that is formatted according the argument. The date will be evaluated using the local time zone unless specified in the second optional argument. +If called without arguments it will parse the subject as a RFC3339 formatted datetime. **Subject Type**: String -| format | The format to use in the strftime syntax | +| Argument | Description | +|-----------|--------------------------------------------------------------------------------------------------------------------------------------------| +| format | Optional argument that specifies the format to use in the strftime syntax | | time zone | Optional argument that specifies the time zone to use when parsing the subject, from the IANA Time Zone Database (e.g. 'America/New_York') | **Return Type**: Number @@ -1911,7 +1914,7 @@ chaining together the two functions: **Description**: Returns the current date and time as a Date data type object. -**Subject Type**: String +**Subject Type**: No Subject **Arguments**: No arguments @@ -1919,9 +1922,9 @@ chaining together the two functions: **Examples**: -| Expression | Value | -| - | - | -| `${now()}` | `Count of milliseconds since the UNIX epoch` | -| `${now():minus(86400000)` | `A number presenting the time 24 hours ago` | -| `${now():format('Y')}` | `The current year` | +| Expression | Value | +|-----------------------------------------|-----------------------------------------------------------------------------------------| +| `${now()}` | `Count of milliseconds since the UNIX epoch` | +| `${now():minus(86400000)` | `A number presenting the time 24 hours ago` | +| `${now():format('Y')}` | `The current year` | | `${now():minus(86400000):format('%a')}` | `The day of the week that was yesterday, as a 3-letter abbreviation (For example, Wed)` | diff --git a/extensions/expression-language/Expression.cpp b/extensions/expression-language/Expression.cpp index 5bb59506d..bde9039fb 100644 --- a/extensions/expression-language/Expression.cpp +++ b/extensions/expression-language/Expression.cpp @@ -633,35 +633,46 @@ Value expr_escapeCsv(const std::vector<Value> &args) { } Value expr_format(const std::vector<Value> &args) { - std::chrono::milliseconds dur(args[0].asUnsignedLong()); - std::chrono::system_clock::time_point dt(dur); - auto zone = date::current_zone(); - if (args.size() > 2) { - zone = date::locate_zone(args[2].asString()); - } - auto t = date::make_zoned(zone, dt); - std::stringstream result_s; - result_s << date::format(args[1].asString(), t); - return Value(result_s.str()); + using std::chrono::milliseconds; + + date::sys_time<milliseconds> utc_time_point{milliseconds(args[0].asUnsignedLong())}; + auto format_string = args[1].asString(); + auto zone = args.size() > 2 ? date::locate_zone(args[2].asString()) : date::current_zone(); + + auto zoned_time_point = date::make_zoned(zone, utc_time_point); + std::ostringstream result_stream; + result_stream << date::format(args[1].asString(), zoned_time_point); + return Value(result_stream.str()); } Value expr_toDate(const std::vector<Value> &args) { - auto arg_0 = args[0].asString(); - std::istringstream arg_s { arg_0 }; - date::sys_time<std::chrono::milliseconds> t; - date::from_stream(arg_s, args[1].asString().c_str(), t); - auto zone = date::current_zone(); - if (args.size() > 2) { - zone = date::locate_zone(args[2].asString()); + using std::chrono::milliseconds; + auto input_string = args[0].asString(); + + if (args.size() == 1) { + if (auto parsed_rfc3339 = org::apache::nifi::minifi::utils::timeutils::parseRfc3339(input_string)) + return Value(int64_t{std::chrono::duration_cast<milliseconds>(parsed_rfc3339->time_since_epoch()).count()}); + else + throw std::runtime_error(fmt::format("Failed to parse \"{}\" as an RFC3339 formatted datetime", input_string)); } - auto utc = date::locate_zone("UTC"); - auto utct = date::make_zoned(utc, t); - auto zt = date::make_zoned(zone, utct.get_local_time()); - return Value(int64_t{std::chrono::duration_cast<std::chrono::milliseconds>(zt.get_sys_time().time_since_epoch()).count()}); + auto format_string = args[1].asString(); + auto zone = args.size() > 2 ? date::locate_zone(args[2].asString()) : date::current_zone(); + + std::istringstream input_stream{ input_string }; + date::sys_time<milliseconds> time_point; + date::from_stream(input_stream, format_string.c_str(), time_point); + if (input_stream.fail() || (input_stream.peek() && !input_stream.eof())) + throw std::runtime_error(fmt::format(R"(Failed to parse "{}", with "{}" format)", input_string, format_string)); + + auto utc_zone = date::locate_zone("UTC"); + auto utc_time_point = date::make_zoned(utc_zone, time_point); + auto zoned_time_point = date::make_zoned(zone, utc_time_point.get_local_time()); + return Value(int64_t{std::chrono::duration_cast<milliseconds>(zoned_time_point.get_sys_time().time_since_epoch()).count()}); } Value expr_now(const std::vector<Value>& /*args*/) { - return Value(int64_t{std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count()}); + using std::chrono::milliseconds; + return Value(int64_t{std::chrono::duration_cast<milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count()}); } Value expr_unescapeCsv(const std::vector<Value> &args) { @@ -1510,7 +1521,7 @@ Expression make_dynamic_function(const std::string &function_name, const std::ve } else if (function_name == "format") { return make_dynamic_function_incomplete<expr_format>(function_name, args, 1); } else if (function_name == "toDate") { - return make_dynamic_function_incomplete<expr_toDate>(function_name, args, 1); + return make_dynamic_function_incomplete<expr_toDate>(function_name, args, 0); } else if (function_name == "now") { return make_dynamic_function_incomplete<expr_now>(function_name, args, 0); } else { diff --git a/extensions/expression-language/tests/ExpressionLanguageTests.cpp b/extensions/expression-language/tests/ExpressionLanguageTests.cpp index a7dc2cbd2..f7b162c0e 100644 --- a/extensions/expression-language/tests/ExpressionLanguageTests.cpp +++ b/extensions/expression-language/tests/ExpressionLanguageTests.cpp @@ -1226,22 +1226,48 @@ TEST_CASE("Parse Date", "[expressionParseDate]") { #ifdef WIN32 expression::dateSetInstall(TZ_DATA_DIR); #endif - auto expr = expression::compile("${message:toDate('%Y/%m/%d', 'America/Los_Angeles')}"); - auto flow_file_a = std::make_shared<core::FlowFile>(); flow_file_a->addAttribute("message", "2014/04/30"); - REQUIRE("1398841200000" == expr(expression::Parameters{ flow_file_a }).asString()); + CHECK("1398841200000" == expression::compile("${message:toDate('%Y/%m/%d', 'America/Los_Angeles')}")(expression::Parameters{ flow_file_a }).asString()); + + flow_file_a->addAttribute("trillion_utc", "2001/09/09 01:46:40.000Z"); + flow_file_a->addAttribute("trillion_paris", "2001/09/09 03:46:40.000Z"); + flow_file_a->addAttribute("trillion_la", "2001/09/08 18:46:40.000Z"); + CHECK("1000000000000" == expression::compile("${trillion_utc:toDate('%Y/%m/%d %H:%M:%SZ', 'UTC')}")(expression::Parameters{ flow_file_a }).asString()); + CHECK("1000000000000" == expression::compile("${trillion_paris:toDate('%Y/%m/%d %H:%M:%SZ', 'Europe/Paris')}")(expression::Parameters{ flow_file_a }).asString()); + CHECK("1000000000000" == expression::compile("${trillion_la:toDate('%Y/%m/%d %H:%M:%SZ', 'America/Los_Angeles')}")(expression::Parameters{ flow_file_a }).asString()); + + flow_file_a->addAttribute("timestamp_with_zone_info_00_00", "2023-03-02T03:49:55.190+08:45"); + flow_file_a->addAttribute("timestamp_with_zone_info_08_45", "2023-03-02T03:49:55.190+08:45"); + + CHECK("1677697495190" == expression::compile("${timestamp_with_zone_info_00_00:toDate('%FT%T%Ez', 'UTC')}")(expression::Parameters{ flow_file_a }).asString()); + CHECK("1677697495190" == expression::compile("${timestamp_with_zone_info_08_45:toDate('%FT%T%Ez', 'UTC')}")(expression::Parameters{ flow_file_a }).asString()); + + flow_file_a->addAttribute("invalid_timestamp_1", " 2023-03-02T03:49:55.190+08:45"); + flow_file_a->addAttribute("invalid_timestamp_2", "2023-03-02T03:49:55.190+08:45 "); + flow_file_a->addAttribute("invalid_timestamp_3", "2023-03-02 03:49:55.190+08:45 "); + + REQUIRE_THROWS_AS(expression::compile("${invalid_timestamp_1:toDate('%FT%T%Ez', 'UTC')}")(expression::Parameters{ flow_file_a }), std::runtime_error); + REQUIRE_THROWS_AS(expression::compile("${invalid_timestamp_2:toDate('%FT%T%Ez', 'UTC')}")(expression::Parameters{ flow_file_a }), std::runtime_error); + REQUIRE_THROWS_AS(expression::compile("${invalid_timestamp_3:toDate('%FT%T%Ez', 'UTC')}")(expression::Parameters{ flow_file_a }), std::runtime_error); } TEST_CASE("Reformat Date", "[expressionReformatDate]") { #ifdef WIN32 expression::dateSetInstall(TZ_DATA_DIR); #endif - auto expr = expression::compile("${message:toDate('%Y/%m/%d', 'GMT'):format('%m-%d-%Y', 'America/New_York')}"); - auto flow_file_a = std::make_shared<core::FlowFile>(); flow_file_a->addAttribute("message", "2014/03/14"); - REQUIRE("03-13-2014" == expr(expression::Parameters{ flow_file_a }).asString()); + flow_file_a->addAttribute("blue", "20130917162643"); + + CHECK("03-13-2014" == expression::compile("${message:toDate('%Y/%m/%d', 'UTC'):format('%m-%d-%Y', 'America/New_York')}")(expression::Parameters{ flow_file_a }).asString()); + + auto blue_utc_expr = expression::compile("${blue:toDate('%Y%m%d%H%M%S', 'UTC'):format('%Y/%m/%d %H:%M:%SZ', 'UTC')}"); + auto blue_paris_expr = expression::compile("${blue:toDate('%Y%m%d%H%M%S', 'UTC'):format('%Y/%m/%d %H:%M:%SZ', 'Europe/Paris')}"); + auto blue_la_expr = expression::compile("${blue:toDate('%Y%m%d%H%M%S', 'UTC'):format('%Y/%m/%d %H:%M:%SZ', 'America/Los_Angeles')}"); + CHECK("2013/09/17 16:26:43.000Z" == blue_utc_expr(expression::Parameters{ flow_file_a }).asString()); + CHECK("2013/09/17 18:26:43.000Z" == blue_paris_expr(expression::Parameters{ flow_file_a }).asString()); + CHECK("2013/09/17 09:26:43.000Z" == blue_la_expr(expression::Parameters{ flow_file_a }).asString()); } TEST_CASE("Now Date", "[expressionNowDate]") { @@ -1249,22 +1275,77 @@ TEST_CASE("Now Date", "[expressionNowDate]") { expression::dateSetInstall(TZ_DATA_DIR); #endif auto expr = expression::compile("${now():format('%Y')}"); - - auto flow_file_a = std::make_shared<core::FlowFile>(); - flow_file_a->addAttribute("message", "2014/03/14"); - date::year_month_day date{std::chrono::floor<std::chrono::days>(std::chrono::system_clock::now())}; - - REQUIRE(date.year().operator int() == expr(expression::Parameters{ flow_file_a }).asSignedLong()); + auto current_year = date::year_month_day{std::chrono::floor<std::chrono::days>(std::chrono::system_clock::now())}.year().operator int(); + + CHECK(current_year == expr(expression::Parameters{ }).asSignedLong()); +} + +TEST_CASE("Parse RFC3339 with Expression Language toDate") { + using date::sys_days; + using org::apache::nifi::minifi::utils::timeutils::parseRfc3339; + using namespace date::literals; + using namespace std::literals::chrono_literals; + using std::chrono::milliseconds; + + milliseconds expected_second = std::chrono::floor<milliseconds>((sys_days(2023_y / 03 / 01) + 19h + 04min + 55s).time_since_epoch()); + milliseconds expected_tenth_second = std::chrono::floor<milliseconds>((sys_days(2023_y / 03 / 01) + 19h + 04min + 55s + 100ms).time_since_epoch()); + milliseconds expected_milli_second = std::chrono::floor<milliseconds>((sys_days(2023_y / 03 / 01) + 19h + 04min + 55s + 190ms).time_since_epoch()); + + CHECK(expression::compile("${literal('2023-03-01T19:04:55Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01T19:04:55.1Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_tenth_second.count()); + CHECK(expression::compile("${literal('2023-03-01T19:04:55.19Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01T19:04:55.190Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01T19:04:55.190999Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01t19:04:55z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01t19:04:55.190z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01T20:04:55+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01T20:04:55.190+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01T20:04:55.190999+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 20:04:55+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01 20:04:55.1+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_tenth_second.count()); + CHECK(expression::compile("${literal('2023-03-01 20:04:55.19+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 20:04:55.190+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 20:04:55.190999+01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01_19:04:55Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01_19:04:55z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55.1Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_tenth_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55.19Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55.190Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01_19:04:55.190Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55.190999Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01_19:04:55.190999Z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55.190z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01_19:04:55.190z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55.190999z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01_19:04:55.190999z'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55-00:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01 19:04:55.190-00:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01T19:04:55-00:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01T19:04:55.190-00:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-02T03:49:55+08:45'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01T19:04:55+00:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + CHECK(expression::compile("${literal('2023-03-01T19:04:55.190+00:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_milli_second.count()); + CHECK(expression::compile("${literal('2023-03-01T18:04:55-01:00'):toDate()}")(expression::Parameters()).asSignedLong() == expected_second.count()); + + REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01T19:04:55Zbanana'):toDate()}")(expression::Parameters()), std::runtime_error); + REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01T19:04:55'):toDate()}")(expression::Parameters()), std::runtime_error); + REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01T19:04:55T'):toDate()}")(expression::Parameters()), std::runtime_error); + REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01T19:04:55Z '):toDate()}")(expression::Parameters()), std::runtime_error); + REQUIRE_THROWS_AS(expression::compile("${literal(' 2023-03-01T19:04:55Z'):toDate()}")(expression::Parameters()), std::runtime_error); + REQUIRE_THROWS_AS(expression::compile("${literal('2023-03-01'):toDate()}")(expression::Parameters()), std::runtime_error); } TEST_CASE("Format Date", "[expressionFormatDate]") { - auto expr_gmt = expression::compile("${message:format('%m-%d-%Y', 'GMT')}"); - auto expr_utc = expression::compile("${message:format('%m-%d-%Y', 'UTC')}"); - +#ifdef WIN32 + expression::dateSetInstall(TZ_DATA_DIR); +#endif auto flow_file_a = std::make_shared<core::FlowFile>(); - flow_file_a->addAttribute("message", "1394755200000"); - REQUIRE("03-14-2014" == expr_gmt(expression::Parameters{ flow_file_a }).asString()); - REQUIRE("03-14-2014" == expr_utc(expression::Parameters{ flow_file_a }).asString()); + flow_file_a->addAttribute("trillion_milliseconds", "1000000000000"); + CHECK(expression::compile("${trillion_milliseconds:format('%Y/%m/%d %H:%M:%SZ', 'UTC')}")(expression::Parameters{ flow_file_a }).asString() == "2001/09/09 01:46:40.000Z"); + CHECK(expression::compile("${trillion_milliseconds:format('%Y/%m/%d %H:%M:%SZ', 'Europe/Paris')}")(expression::Parameters{ flow_file_a }).asString() == "2001/09/09 03:46:40.000Z"); + CHECK(expression::compile("${trillion_milliseconds:format('%Y/%m/%d %H:%M:%SZ', 'America/Los_Angeles')}")(expression::Parameters{ flow_file_a }).asString() == "2001/09/08 18:46:40.000Z"); } TEST_CASE("IP", "[expressionIP]") { diff --git a/libminifi/include/utils/TimeUtil.h b/libminifi/include/utils/TimeUtil.h index 2aab23ba4..2080c1ebe 100644 --- a/libminifi/include/utils/TimeUtil.h +++ b/libminifi/include/utils/TimeUtil.h @@ -115,6 +115,8 @@ inline std::optional<std::chrono::sys_seconds> parseDateTimeStr(const std::strin return tp; } +std::optional<std::chrono::system_clock::time_point> parseRfc3339(const std::string& str); + inline std::string getDateTimeStr(std::chrono::sys_seconds tp) { return date::format("%Y-%m-%dT%H:%M:%SZ", tp); } diff --git a/libminifi/src/utils/TimeUtil.cpp b/libminifi/src/utils/TimeUtil.cpp index 04df8d6fb..58aaec3a2 100644 --- a/libminifi/src/utils/TimeUtil.cpp +++ b/libminifi/src/utils/TimeUtil.cpp @@ -16,9 +16,11 @@ */ #include "utils/TimeUtil.h" +#include "range/v3/algorithm/contains.hpp" namespace org::apache::nifi::minifi::utils::timeutils { +using namespace std::literals::chrono_literals; static std::mutex global_clock_mtx; static std::shared_ptr<SteadyClock> global_clock{std::make_shared<SteadyClock>()}; @@ -33,4 +35,36 @@ void setClock(std::shared_ptr<SteadyClock> clock) { global_clock = std::move(clock); } +std::optional<std::chrono::system_clock::time_point> parseRfc3339(const std::string& str) { + std::istringstream stream(str); + date::year_month_day date_part; + date::from_stream(stream, "%F", date_part); + + if (stream.fail()) + return std::nullopt; + + constexpr std::string_view accepted_delimiters = "tT_ "; + char delimiter_char; + stream.get(delimiter_char); + + if (stream.fail() || !ranges::contains(accepted_delimiters, delimiter_char)) + return std::nullopt; + + std::chrono::system_clock::duration time_part; + std::chrono::minutes offset = 0min; + if (str.ends_with('Z') || str.ends_with('z')) { + date::from_stream(stream, "%T", time_part); + if (stream.fail()) + return std::nullopt; + stream.get(); + } else { + date::from_stream(stream, "%T%Ez", time_part, {}, &offset); + } + + if (stream.fail() || (stream.peek() && !stream.eof())) + return std::nullopt; + + return date::sys_days(date_part) + time_part - offset; +} + } // namespace org::apache::nifi::minifi::utils::timeutils diff --git a/libminifi/test/unit/TimeUtilTests.cpp b/libminifi/test/unit/TimeUtilTests.cpp index 883574df0..ef9d9ca37 100644 --- a/libminifi/test/unit/TimeUtilTests.cpp +++ b/libminifi/test/unit/TimeUtilTests.cpp @@ -254,3 +254,60 @@ TEST_CASE("Test roundToNextSecond", "[roundingTests]") { CHECK(parseLocalTimePoint("2022-06-21 11:00:01") == roundToNextSecond(parseLocalTimePoint("2022-06-21 11:00:00"))); CHECK(parseLocalTimePoint("2022-07-21 12:12:01") == roundToNextSecond(parseLocalTimePoint("2022-07-21 12:12:00"))); } + +TEST_CASE("Parse RFC3339", "[parseRfc3339]") { + using date::sys_days; + using org::apache::nifi::minifi::utils::timeutils::parseRfc3339; + using namespace date::literals; + using namespace std::literals::chrono_literals; + + auto expected_second = sys_days(2023_y / 03 / 01) + 19h + 04min + 55s; + auto expected_tenth_second = sys_days(2023_y / 03 / 01) + 19h + 04min + 55s + 100ms; + auto expected_milli_second = sys_days(2023_y / 03 / 01) + 19h + 04min + 55s + 190ms; + auto expected_micro_second = sys_days(2023_y / 03 / 01) + 19h + 04min + 55s + 190999us; + + CHECK(parseRfc3339("2023-03-01T19:04:55Z") == expected_second); + CHECK(parseRfc3339("2023-03-01T19:04:55.1Z") == expected_tenth_second); + CHECK(parseRfc3339("2023-03-01T19:04:55.19Z") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01T19:04:55.190Z") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01T19:04:55.190999Z") == expected_micro_second); + CHECK(parseRfc3339("2023-03-01t19:04:55z") == expected_second); + CHECK(parseRfc3339("2023-03-01t19:04:55.190z") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01T20:04:55+01:00") == expected_second); + CHECK(parseRfc3339("2023-03-01T20:04:55.190+01:00") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01T20:04:55.190999+01:00") == expected_micro_second); + CHECK(parseRfc3339("2023-03-01 20:04:55+01:00") == expected_second); + CHECK(parseRfc3339("2023-03-01 20:04:55.1+01:00") == expected_tenth_second); + CHECK(parseRfc3339("2023-03-01 20:04:55.19+01:00") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01 20:04:55.190+01:00") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01 20:04:55.190999+01:00") == expected_micro_second); + CHECK(parseRfc3339("2023-03-01 19:04:55Z") == expected_second); + CHECK(parseRfc3339("2023-03-01_19:04:55Z") == expected_second); + CHECK(parseRfc3339("2023-03-01 19:04:55z") == expected_second); + CHECK(parseRfc3339("2023-03-01_19:04:55z") == expected_second); + CHECK(parseRfc3339("2023-03-01 19:04:55.1Z") == expected_tenth_second); + CHECK(parseRfc3339("2023-03-01 19:04:55.19Z") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01 19:04:55.190Z") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01_19:04:55.190Z") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01 19:04:55.190999Z") == expected_micro_second); + CHECK(parseRfc3339("2023-03-01_19:04:55.190999Z") == expected_micro_second); + CHECK(parseRfc3339("2023-03-01 19:04:55.190z") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01_19:04:55.190z") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01 19:04:55.190999z") == expected_micro_second); + CHECK(parseRfc3339("2023-03-01_19:04:55.190999z") == expected_micro_second); + CHECK(parseRfc3339("2023-03-01 19:04:55-00:00") == expected_second); + CHECK(parseRfc3339("2023-03-01 19:04:55.190-00:00") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01T19:04:55-00:00") == expected_second); + CHECK(parseRfc3339("2023-03-01T19:04:55.190-00:00") == expected_milli_second); + CHECK(parseRfc3339("2023-03-02T03:49:55+08:45") == expected_second); + CHECK(parseRfc3339("2023-03-01T19:04:55+00:00") == expected_second); + CHECK(parseRfc3339("2023-03-01T19:04:55.190+00:00") == expected_milli_second); + CHECK(parseRfc3339("2023-03-01T18:04:55-01:00") == expected_second); + + CHECK_FALSE(parseRfc3339("2023-03-01T19:04:55Zbanana")); + CHECK_FALSE(parseRfc3339("2023-03-01T19:04:55")); + CHECK_FALSE(parseRfc3339("2023-03-01T19:04:55T")); + CHECK_FALSE(parseRfc3339("2023-03-01T19:04:55Z ")); + CHECK_FALSE(parseRfc3339(" 2023-03-01T19:04:55Z")); + CHECK_FALSE(parseRfc3339("2023-03-01")); +}
