Repository: nifi-minifi-cpp Updated Branches: refs/heads/master 2e4a3521e -> 253a1b74f
MINIFICPP-445 Added escape/unescape CSV expression language functions This closes #293. Signed-off-by: Aldrin Piri <ald...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/commit/253a1b74 Tree: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/tree/253a1b74 Diff: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/diff/253a1b74 Branch: refs/heads/master Commit: 253a1b74f842479b7fa25fdca76b8e83a5e7a059 Parents: 2e4a352 Author: Andrew I. Christianson <a...@andyic.org> Authored: Fri Mar 23 12:49:51 2018 -0400 Committer: Aldrin Piri <ald...@apache.org> Committed: Mon Apr 9 16:09:17 2018 -0400 ---------------------------------------------------------------------- EXPRESSIONS.md | 80 +++++++++++++++++++- extensions/expression-language/Expression.cpp | 55 ++++++++++++++ libminifi/include/utils/StringUtils.h | 19 ++++- .../ExpressionLanguageTests.cpp | 36 ++++++++- 4 files changed, 181 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/EXPRESSIONS.md ---------------------------------------------------------------------- diff --git a/EXPRESSIONS.md b/EXPRESSIONS.md index 371ddb4..4a72dae 100644 --- a/EXPRESSIONS.md +++ b/EXPRESSIONS.md @@ -204,6 +204,10 @@ token, filename. - [`escapeJson`](#escapejson) - [`unescapeJson`](#unescapejson) +- [`escapeXml`](#escapexml) +- [`escapeCsv`](#escapecsv) +- [`unescapeXml`](#unescapexml) +- [`unescapeCsv`](#unescapecsv) ## Planned Features @@ -221,12 +225,8 @@ token, filename. ### Encode/Decode Functions -- `escapeXml` -- `escapeCsv` - `escapeHtml3` - `escapeHtml4` -- `unescapeXml` -- `unescapeCsv` - `unescapeHtml3` - `unescapeHtml4` - `urlEncode` @@ -1266,3 +1266,75 @@ If the "message" attribute is 'This is a "test!"', then the Expression If the "message" attribute is 'This is a \"test!\"', then the Expression `${message:unescapeJson()}` will return 'This is a "test!"' + +### escapeXml + +**Description**: This function prepares the Subject to be inserted into XML +document by escaping the characters in a String using XML entities. The +function correctly escapes quotes, apostrophe, ampersand, `<`, `>` and +control-chars. + +**Subject Type**: String + +**Arguments**: No arguments + +**Return Type**: String + +**Examples**: + +If the "message" attribute is `Zero > One < \"two!\" & 'true'`, then the +Expression `${message:escapeXml()}` will return `Zero > One < +"two!" & 'true'` + +### unescapeXml + +**Description**: This function unescapes a string containing XML entity escapes +to a string containing the actual Unicode characters corresponding to the +escapes. Supports only the five basic XML entities (gt, lt, quot, amp, apos). + +**Subject Type**: String + +**Arguments**: No arguments + +**Return Type**: String + +**Examples**: + +If the "message" attribute is `Zero > One < "two!" & +'true'`, then the Expression `${message:escapeXml()}` will return +`Zero > One < \"two!\" & 'true'` + +### escapeCsv + +**Description**: This function prepares the Subject to be inserted into CSV +document by escaping the characters in a String using the rules in RFC 4180. +The function correctly escapes quotes and surround the string in quotes if +needed. + +**Subject Type**: String + +**Arguments**: No arguments + +**Return Type**: String + +**Examples**: + +If the "message" attribute is `Zero > One < "two!" & 'true'`, then the +Expression `${message:escapeCsv()}` will return `"Zero > One < ""two!"" & +'true'"` + +### unescapeCsv + +**Description**: This function unescapes a String from a CSV document according +to the rules of RFC 4180 + +**Subject Type**: String + +**Arguments**: No arguments + +**Return Type**: String + +**Examples**: + +If the "message" attribute is `"Zero > One < ""two!"" & 'true'"`, then the +Expression `${message:escapeCsv()}` will return `Zero > One < "two!" & 'true'` http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/extensions/expression-language/Expression.cpp ---------------------------------------------------------------------- diff --git a/extensions/expression-language/Expression.cpp b/extensions/expression-language/Expression.cpp index a4ee41b..cd5199b 100644 --- a/extensions/expression-language/Expression.cpp +++ b/extensions/expression-language/Expression.cpp @@ -222,6 +222,57 @@ Value expr_unescapeXml(const std::vector<Value> &args) { })); } +Value expr_escapeCsv(const std::vector<Value> &args) { + auto result = args[0].asString(); + const char quote_req_chars[] = {'"', '\r', '\n', ','}; + bool quote_required = false; + + for (const auto &c : quote_req_chars) { + if (result.find(c) != std::string::npos) { + quote_required = true; + break; + } + } + + if (quote_required) { + std::string quoted_result = "\""; + quoted_result.append(utils::StringUtils::replaceMap(result, {{"\"", "\"\""}})); + quoted_result.append("\""); + return Value(quoted_result); + } + + return Value(result); +} + +Value expr_unescapeCsv(const std::vector<Value> &args) { + auto result = args[0].asString(); + + if (result[0] == '"' && result[result.size() - 1] == '"') { + bool quote_required = false; + + size_t quote_pos = result.find('"', 1); + + if (quote_pos != result.length() - 1) { + quote_required = true; + } else { + const char quote_req_chars[] = {'\r', '\n', ','}; + + for (const auto &c : quote_req_chars) { + if (result.find(c) != std::string::npos) { + quote_required = true; + break; + } + } + } + + if (quote_required) { + return Value(utils::StringUtils::replaceMap(result.substr(1, result.size() - 2), {{"\"\"", "\""}})); + } + } + + return Value(result); +} + #ifdef EXPRESSION_LANGUAGE_USE_REGEX Value expr_replace(const std::vector<Value> &args) { @@ -555,6 +606,10 @@ Expression make_dynamic_function(const std::string &function_name, return make_dynamic_function_incomplete<expr_escapeXml>(function_name, args, 0); } else if (function_name == "unescapeXml") { return make_dynamic_function_incomplete<expr_unescapeXml>(function_name, args, 0); + } else if (function_name == "escapeCsv") { + return make_dynamic_function_incomplete<expr_escapeCsv>(function_name, args, 0); + } else if (function_name == "unescapeCsv") { + return make_dynamic_function_incomplete<expr_unescapeCsv>(function_name, args, 0); #ifdef EXPRESSION_LANGUAGE_USE_REGEX } else if (function_name == "replace") { return make_dynamic_function_incomplete<expr_replace>(function_name, args, 2); http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/libminifi/include/utils/StringUtils.h ---------------------------------------------------------------------- diff --git a/libminifi/include/utils/StringUtils.h b/libminifi/include/utils/StringUtils.h index 4bedce7..7f33260 100644 --- a/libminifi/include/utils/StringUtils.h +++ b/libminifi/include/utils/StringUtils.h @@ -210,15 +210,28 @@ class StringUtils { } static std::string replaceMap(std::string source_string, const std::map<std::string, std::string> &replace_map) { + auto result_string = source_string; + + std::vector<std::pair<size_t, std::pair<size_t, std::string>>> replacements; for (const auto &replace_pair : replace_map) { size_t replace_pos = 0; while ((replace_pos = source_string.find(replace_pair.first, replace_pos)) != std::string::npos) { - source_string.replace(replace_pos, replace_pair.first.length(), replace_pair.second); - replace_pos += replace_pair.second.length(); + replacements.emplace_back(std::make_pair(replace_pos, + std::make_pair(replace_pair.first.length(), replace_pair.second))); + replace_pos += replace_pair.first.length(); } } - return source_string; + std::sort(replacements.begin(), replacements.end(), [](const std::pair<size_t, std::pair<size_t, std::string>> a, + const std::pair<size_t, std::pair<size_t, std::string>> &b) { + return a.first > b.first; + }); + + for (const auto &replacement : replacements) { + result_string = source_string.replace(replacement.first, replacement.second.first, replacement.second.second); + } + + return result_string; } }; http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp ---------------------------------------------------------------------- diff --git a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp index a198335..12c60ca 100644 --- a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp +++ b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp @@ -1046,14 +1046,14 @@ TEST_CASE("Encode XML", "[expressionEncodeXML]") { // NOLINT auto flow_file_a = std::make_shared<MockFlowFile>(); flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'"); - REQUIRE("Zero > One < &quot;two!&quot; & 'true'" == expr({flow_file_a}).asString()); + REQUIRE("Zero > One < "two!" & 'true'" == expr({flow_file_a}).asString()); } TEST_CASE("Decode XML", "[expressionDecodeXML]") { // NOLINT auto expr = expression::compile("${message:unescapeXml()}"); auto flow_file_a = std::make_shared<MockFlowFile>(); - flow_file_a->addAttribute("message", "Zero > One < &quot;two!&quot; & 'true'"); + flow_file_a->addAttribute("message", "Zero > One < "two!" & 'true'"); REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString()); } @@ -1064,3 +1064,35 @@ TEST_CASE("Encode Decode XML", "[expressionEncodeDecodeXML]") { // NOLINT flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'"); REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString()); } + +TEST_CASE("Encode CSV", "[expressionEncodeCSV]") { // NOLINT + auto expr = expression::compile("${message:escapeCsv()}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'"); + REQUIRE("\"Zero > One < \"\"two!\"\" & 'true'\"" == expr({flow_file_a}).asString()); +} + +TEST_CASE("Decode CSV", "[expressionDecodeCSV]") { // NOLINT + auto expr = expression::compile("${message:unescapeCsv()}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("message", R"("Zero > One < ""two!"" & 'true'")"); + REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString()); +} + +TEST_CASE("Decode CSV 2", "[expressionDecodeCSV2]") { // NOLINT + auto expr = expression::compile("${message:unescapeCsv()}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("message", R"("quoted")"); + REQUIRE("\"quoted\"" == expr({flow_file_a}).asString()); +} + +TEST_CASE("Encode Decode CSV", "[expressionEncodeDecodeCSV]") { // NOLINT + auto expr = expression::compile("${message:escapeCsv():unescapeCsv()}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'"); + REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString()); +}