This is an automated email from the ASF dual-hosted git repository. fgerlits pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit bbfa1e01b8d373700369d6cf356393b9999712e0 Author: Marton Szasz <[email protected]> AuthorDate: Wed Jun 7 13:32:40 2023 +0200 MINIFICPP-2132 improve error messages, refactor EL Value Signed-off-by: Ferenc Gerlits <[email protected]> This closes #1585 --- extensions/expression-language/common/Value.h | 342 +++++++-------------- .../tests/ExpressionLanguageTests.cpp | 38 +++ .../processors/RouteOnAttribute.cpp | 14 +- libminifi/include/utils/GeneralUtils.h | 8 + 4 files changed, 167 insertions(+), 235 deletions(-) diff --git a/extensions/expression-language/common/Value.h b/extensions/expression-language/common/Value.h index 65d3cf586..ea524b77c 100644 --- a/extensions/expression-language/common/Value.h +++ b/extensions/expression-language/common/Value.h @@ -15,247 +15,141 @@ * limitations under the License. */ -#include <string> -#include <sstream> #include <iomanip> #include <limits> -#include <algorithm> +#include <string> +#include <sstream> #include <utility> +#include <variant> +#include "utils/GeneralUtils.h" +#include "utils/StringUtils.h" #pragma once -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace expression { - +namespace org::apache::nifi::minifi::expression { /** - * Represents an expression value, which can be one of multiple types or NULL + * Represents an expression value, which can be one of multiple types or NULL (represented as variant with monostate as null) */ class Value { public: - /** - * Construct a default (NULL) value - */ + /// Construct a default (NULL) value Value() = default; - /** - * Construct a string value - */ - explicit Value(std::string val) { - setString(std::move(val)); - } - - /** - * Construct a boolean value - */ - explicit Value(bool val) { - setBoolean(val); - } - - /** - * Construct an unsigned long value - */ - explicit Value(uint64_t val) { - setUnsignedLong(val); - } - - /** - * Construct a signed long value - */ - explicit Value(int64_t val) { - setSignedLong(val); - } - - /** - * Construct a long double value - */ - explicit Value(long double val) { - setLongDouble(val); - } - - bool isNull() const { - return is_null_; - } - - bool isString() const { - return is_string_; - } - - bool isDecimal() const { - if (is_long_double_) { - return true; - } else if (is_string_ && (string_val_.find('.') != string_val_.npos || - string_val_.find('e') != string_val_.npos || - string_val_.find('E') != string_val_.npos)) { - return true; - } else { - return false; - } - } - - void setSignedLong(int64_t val) { - is_null_ = false; - is_bool_ = false; - is_signed_long_ = true; - is_unsigned_long_ = false; - is_long_double_ = false; - is_string_ = false; - signed_long_val_ = val; - } - - void setUnsignedLong(uint64_t val) { - is_null_ = false; - is_bool_ = false; - is_signed_long_ = false; - is_unsigned_long_ = true; - is_long_double_ = false; - is_string_ = false; - unsigned_long_val_ = val; + /// Construct with the specified value + explicit Value(std::string val) :value_{std::move(val)} {} + explicit Value(bool val) :value_{val} {} + explicit Value(uint64_t val) :value_{val} {} + explicit Value(int64_t val) :value_{val} {} + explicit Value(long double val) :value_{val} {} + + [[nodiscard]] bool isNull() const { return holds_alternative<std::monostate>(value_); } + + [[nodiscard]] bool isDecimal() const { + return std::visit(utils::overloaded{ + [](long double) { return true; }, + [](const std::string& string_val) { + return string_val.find('.') != string_val.npos || + string_val.find('e') != string_val.npos || + string_val.find('E') != string_val.npos; + }, + [](const auto&) { return false; } + }, value_); + } + + void setSignedLong(int64_t val) { value_ = val; } + void setUnsignedLong(uint64_t val) { value_ = val; } + void setLongDouble(long double val) { value_ = val; } + void setBoolean(bool val) { value_ = val; } + void setString(std::string val) { value_ = std::move(val); } + + [[nodiscard]] std::string asString() const { + return std::visit<std::string>(utils::overloaded{ + [](const std::string& str) { return str; }, + [](bool b) -> std::string { return b ? "true" : "false"; }, + [](int64_t i) { return std::to_string(i); }, + [](uint64_t i) { return std::to_string(i); }, + [](long double d) { + std::stringstream ss; + ss << std::fixed << std::setprecision(std::numeric_limits<double>::digits10) + << d; + auto result = ss.str(); + result.erase(result.find_last_not_of('0') + 1, std::string::npos); + + if (result.find('.') == result.length() - 1) { + result.erase(result.length() - 1, std::string::npos); + } + + return result; + }, + [](std::monostate) { return std::string{}; } + }, value_); + } + + [[nodiscard]] uint64_t asUnsignedLong() const { + return std::visit<uint64_t>(utils::overloaded{ + [](uint64_t i) { return i; }, + [](int64_t i) { return gsl::narrow_cast<uint64_t>(i); }, + [](long double d) { return static_cast<uint64_t>(d); }, + [](const std::string& str) -> uint64_t { + const auto stoull_ = [](const std::string& s) { return std::stoull(s); }; + return strParse<uint64_t>(stoull_, 0, "Value::asUnsignedLong", str); + }, + [](auto) { return uint64_t{0}; } + }, value_); + } + + [[nodiscard]] int64_t asSignedLong() const { + return std::visit<int64_t>(utils::overloaded{ + [](int64_t i) { return i; }, + [](uint64_t i) { return gsl::narrow_cast<int64_t>(i); }, + [](long double d) { return static_cast<int64_t>(d); }, + [](const std::string& str) -> int64_t { + const auto stoll_ = [](const std::string& s) { return std::stoll(s); }; + return strParse<int64_t>(stoll_, 0, "Value::asSignedLong", str); + }, + [](auto) { return int64_t{0}; } + }, value_); + } + + [[nodiscard]] long double asLongDouble() const { + return std::visit<long double>(utils::overloaded{ + [](long double d) { return d; }, + [](int64_t i) { return static_cast<long double>(i); }, + [](uint64_t i) { return static_cast<long double>(i); }, + [](const std::string& str) -> long double { + const auto stold_ = [](const std::string& s) { return std::stold(s); }; + return strParse<long double>(stold_, 0.0, "Value::asLongDouble", str); + }, + [](auto) -> long double { return 0.0; } + }, value_); + } + + [[nodiscard]] bool asBoolean() const { + return std::visit(utils::overloaded{ + [](bool b) { return b; }, + [](int64_t i) { return i != 0; }, + [](uint64_t i) { return i != 0; }, + [](long double d) { return d != 0.0; }, + [](const std::string& str) { return utils::StringUtils::toBool(str).value_or(false); }, + [](auto) { return false; } + }, value_); } - void setLongDouble(long double val) { - is_null_ = false; - is_bool_ = false; - is_signed_long_ = false; - is_unsigned_long_ = false; - is_long_double_ = true; - is_string_ = false; - long_double_val_ = val; - } - - void setBoolean(bool val) { - is_null_ = false; - is_bool_ = true; - is_signed_long_ = false; - is_unsigned_long_ = false; - is_long_double_ = false; - is_string_ = false; - bool_val_ = val; - } - - void setString(std::string val) { - is_null_ = false; - is_bool_ = false; - is_signed_long_ = false; - is_unsigned_long_ = false; - is_long_double_ = false; - is_string_ = true; - string_val_ = std::move(val); - } - - std::string asString() const { - if (is_string_) { - return string_val_; - } else if (is_bool_) { - if (bool_val_) { - return "true"; - } else { - return "false"; - } - } else if (is_signed_long_) { - return std::to_string(signed_long_val_); - } else if (is_unsigned_long_) { - return std::to_string(unsigned_long_val_); - } else if (is_long_double_) { - std::stringstream ss; - ss << std::fixed << std::setprecision(std::numeric_limits<double>::digits10) - << long_double_val_; - auto result = ss.str(); - result.erase(result.find_last_not_of('0') + 1, std::string::npos); - - if (result.find('.') == result.length() - 1) { - result.erase(result.length() - 1, std::string::npos); - } - - return result; - } else { - return ""; - } - } - - uint64_t asUnsignedLong() const { - if (is_unsigned_long_) { - return unsigned_long_val_; - } else if (is_string_) { - return string_val_.empty() ? 0 : std::stoull(string_val_); - } else if (is_signed_long_) { - return signed_long_val_; - } else if (is_long_double_) { - return static_cast<uint64_t >(long_double_val_); - } else { - return 0; - } - } - - int64_t asSignedLong() const { - if (is_signed_long_) { - return signed_long_val_; - } else if (is_unsigned_long_) { - return unsigned_long_val_; - } else if (is_string_) { - return string_val_.empty() ? 0 : std::stoll(string_val_); - } else if (is_long_double_) { - return static_cast<int64_t >(long_double_val_); - } else { - return 0; - } - } - - long double asLongDouble() const { - if (is_signed_long_) { - return static_cast<long double>(signed_long_val_); - } else if (is_unsigned_long_) { - return static_cast<long double>(unsigned_long_val_); - } else if (is_long_double_) { - return long_double_val_; - } else if (is_string_) { - return string_val_.empty() ? 0 : std::stold(string_val_); - } else { - return 0.0; - } - } - - bool asBoolean() const { - if (is_bool_) { - return bool_val_; - } - if (is_signed_long_) { - return signed_long_val_ != 0; - } else if (is_unsigned_long_) { - return unsigned_long_val_ != 0; - } else if (is_long_double_) { - return long_double_val_ != 0.0; - } else if (is_string_) { - std::string bool_str = string_val_; - std::transform(bool_str.begin(), bool_str.end(), bool_str.begin(), ::tolower); - std:: istringstream bools(bool_str); - bool bool_val; - bools >> std::boolalpha >> bool_val; - return bool_val; - } else { - return false; + private: + template<typename T> + static T strParse(std::regular_invocable<std::string> auto const& conversion_function, T default_value, std::string_view context, const std::string& value) { + if (value.empty()) return default_value; + try { + return std::invoke(conversion_function, value); + } catch (const std::invalid_argument& ex) { + throw std::invalid_argument{utils::StringUtils::join_pack(context, " failed to parse \"", value, "\": invalid argument")}; + } catch (const std::out_of_range& ex) { + throw std::out_of_range{utils::StringUtils::join_pack(context, " failed to parse \"", value, "\": out of range")}; } } - private: - bool is_null_ = true; - bool is_string_ = false; - bool is_bool_ = false; - bool is_unsigned_long_ = false; - bool is_signed_long_ = false; - bool is_long_double_ = false; - bool bool_val_ = false; - uint64_t unsigned_long_val_ = 0; - int64_t signed_long_val_ = 0; - long double long_double_val_ = 0.0; - std::string string_val_ = ""; + std::variant<std::monostate, bool, uint64_t, int64_t, long double, std::string> value_; }; -} /* namespace expression */ -} /* namespace minifi */ -} /* namespace nifi */ -} /* namespace apache */ -} /* namespace org */ +} // namespace org::apache::nifi::minifi::expression diff --git a/extensions/expression-language/tests/ExpressionLanguageTests.cpp b/extensions/expression-language/tests/ExpressionLanguageTests.cpp index 655f6b699..3329d6b9d 100644 --- a/extensions/expression-language/tests/ExpressionLanguageTests.cpp +++ b/extensions/expression-language/tests/ExpressionLanguageTests.cpp @@ -908,6 +908,44 @@ TEST_CASE("GT3", "[expressionGT3]") { REQUIRE("false" == expr(expression::Parameters{ flow_file_a }).asString()); } +// using :gt() to test string to integer parsing code +TEST_CASE("GT4 Value parsing errors", "[expressionGT4][outofrange]") { + const char* test_str; + const char* expected_substr; + SECTION("integer out of range") { + // 2 ^ 64, the smallest positive integer that's not representable even in uint64_t + test_str = "18446744073709551616"; + expected_substr = "out of range"; + } + SECTION("integer invalid") { + test_str = "banana1337"; + expected_substr = "invalid argument"; + } + SECTION("floating point out of range") { + // largest IEEE754 quad precision float normal number times 10 (bumped the exponent by one) + test_str = "1.1897314953572317650857593266280070162e+4933"; + expected_substr = "out of range"; + } + SECTION("floating point invalid") { + test_str = "app.le+1337"; + expected_substr = "invalid argument"; + } + REQUIRE(test_str); + REQUIRE(expected_substr); + const auto expr = expression::compile("${attr1:gt(13.37)}"); + auto flow_file = std::make_shared<core::FlowFile>(); + flow_file->addAttribute("attr1", test_str); + try { + const auto result = expr(expression::Parameters{flow_file}).asString(); + REQUIRE(false); + } catch (const std::exception& ex) { + const std::string message = ex.what(); + // The exception message should be helpful enough to contain the problem description, and the problematic value + CHECK(message.find(expected_substr) != std::string::npos); + CHECK(message.find(test_str) != std::string::npos); + } +} + TEST_CASE("GE", "[expressionGE]") { auto expr = expression::compile("${attr:plus(5):ge(6)}"); diff --git a/extensions/standard-processors/processors/RouteOnAttribute.cpp b/extensions/standard-processors/processors/RouteOnAttribute.cpp index d7ff2ef34..f6b7654c9 100644 --- a/extensions/standard-processors/processors/RouteOnAttribute.cpp +++ b/extensions/standard-processors/processors/RouteOnAttribute.cpp @@ -26,11 +26,7 @@ #include "core/Resource.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace processors { +namespace org::apache::nifi::minifi::processors { const core::Relationship RouteOnAttribute::Unmatched("unmatched", "Files which do not match any expression are routed here"); const core::Relationship RouteOnAttribute::Failure("failure", "Failed files are transferred to failure"); @@ -86,7 +82,7 @@ void RouteOnAttribute::onTrigger(core::ProcessContext *context, core::ProcessSes session->remove(flow_file); } } catch (const std::exception &e) { - logger_->log_error("Caught exception while updating attributes: %s", e.what()); + logger_->log_error("Caught exception while updating attributes: type: %s, what: %s", typeid(e).name(), e.what()); session->transfer(flow_file, Failure); yield(); } @@ -94,8 +90,4 @@ void RouteOnAttribute::onTrigger(core::ProcessContext *context, core::ProcessSes REGISTER_RESOURCE(RouteOnAttribute, Processor); -} /* namespace processors */ -} /* namespace minifi */ -} /* namespace nifi */ -} /* namespace apache */ -} /* namespace org */ +} // namespace org::apache::nifi::minifi::processors diff --git a/libminifi/include/utils/GeneralUtils.h b/libminifi/include/utils/GeneralUtils.h index bc641f1d2..98656afae 100644 --- a/libminifi/include/utils/GeneralUtils.h +++ b/libminifi/include/utils/GeneralUtils.h @@ -118,4 +118,12 @@ using remove_cvref_t = typename std::remove_cv<typename std::remove_reference<T> inline constexpr bool implies(bool a, bool b) noexcept { return !a || b; } +template<typename... Funcs> +struct overloaded : Funcs... { + using Funcs::operator()...; +}; +// deduction guide. Shouldn't be necessary since C++20, but Clang doesn't implement "Class template argument deduction for aggregates" yet +template<typename... Funcs> +overloaded(Funcs...) -> overloaded<Funcs...>; + } // namespace org::apache::nifi::minifi::utils
