Repository: nifi-minifi-cpp Updated Branches: refs/heads/master 8ea78747e -> d14db2cbb
MINIFICPP-455 Added HTML4 escape/unescape This closes #300. Signed-off-by: Marc Parisi <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/commit/d14db2cb Tree: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/tree/d14db2cb Diff: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/diff/d14db2cb Branch: refs/heads/master Commit: d14db2cbb826ff03fd83876dbba4e4f209d442db Parents: 8ea7874 Author: Andrew I. Christianson <[email protected]> Authored: Tue Apr 17 13:20:48 2018 -0400 Committer: Marc Parisi <[email protected]> Committed: Wed Apr 18 06:38:02 2018 -0400 ---------------------------------------------------------------------- extensions/expression-language/Expression.cpp | 574 +++++++++++++++++++ .../ExpressionLanguageTests.cpp | 24 + 2 files changed, 598 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/d14db2cb/extensions/expression-language/Expression.cpp ---------------------------------------------------------------------- diff --git a/extensions/expression-language/Expression.cpp b/extensions/expression-language/Expression.cpp index 2590465..c6f6d87 100644 --- a/extensions/expression-language/Expression.cpp +++ b/extensions/expression-language/Expression.cpp @@ -331,6 +331,291 @@ Value expr_escapeHtml3(const std::vector<Value> &args) { })); } +Value expr_escapeHtml4(const std::vector<Value> &args) { + return Value(utils::StringUtils::replaceMap( + args[0].asString(), + { + {"!", "!"}, + {"\"", """}, + {"#", "#"}, + {"$", "$"}, + {"%", "%"}, + {"&", "&"}, + {"'", "'"}, + {"(", "("}, + {")", ")"}, + {"*", "*"}, + {"+", "+"}, + {",", ","}, + {"-", "−"}, + {".", "."}, + {"/", "/"}, + {":", ":"}, + {";", ";"}, + {"<", "<"}, + {"=", "="}, + {">", ">"}, + {"?", "?"}, + {"@", "@"}, + {"[", "["}, + {"\\", "\"}, + {"]", "]"}, + {"^", "ˆ"}, + {"_", "_"}, + {"`", "`"}, + {"{", "{"}, + {"|", "|"}, + {"}", "}"}, + {"~", "˜"}, + {"¡", "¡"}, + {"¢", "¢"}, + {"£", "£"}, + {"¤", "¤"}, + {"Â¥", "¥"}, + {"¦", "&brkbar;"}, + {"§", "§"}, + {"¨", "¨"}, + {"©", "©"}, + {"ª", "ª"}, + {"«", "«"}, + {"¬", "¬"}, + {"®", "®"}, + {"¯", "¯"}, + {"°", "°"}, + {"±", "±"}, + {"²", "²"}, + {"³", "³"}, + {"´", "´"}, + {"µ", "µ"}, + {"¶", "¶"}, + {"·", "·"}, + {"¸", "¸"}, + {"¹", "¹"}, + {"º", "º"}, + {"»", "»;"}, + {"¼", "¼"}, + {"½", "½"}, + {"¾", "¾"}, + {"¿", "¿"}, + {"Ã", "À"}, + {"Ã", "Á"}, + {"Ã", "Â"}, + {"Ã", "Ã"}, + {"Ã", "Ä"}, + {"à ", "Å"}, + {"Ã", "Æ"}, + {"Ã", "Ç"}, + {"Ã", "È"}, + {"Ã", "É"}, + {"Ã", "Ê"}, + {"Ã", "Ë"}, + {"Ã", "Ì"}, + {"Ã", "Í"}, + {"Ã", "Î"}, + {"Ã", "Ï"}, + {"Ã", "Ð"}, + {"Ã", "Ñ"}, + {"Ã", "Ò"}, + {"Ã", "Ó"}, + {"Ã", "Ô"}, + {"Ã", "Õ"}, + {"Ã", "Ö"}, + {"Ã", "×"}, + {"Ã", "Ø"}, + {"Ã", "Ù;"}, + {"Ã", "Ú"}, + {"Ã", "Û"}, + {"Ã", "Ü"}, + {"Ã", "Ý"}, + {"Ã", "Þ"}, + {"Ã", "ß"}, + {"à ", "à"}, + {"á", "á"}, + {"â", "â"}, + {"ã", "ã"}, + {"ä", "ä"}, + {"Ã¥", "å"}, + {"æ", "æ"}, + {"ç", "ç"}, + {"è", "è"}, + {"é", "é"}, + {"ê", "ê"}, + {"ë", "ë"}, + {"ì", "ì"}, + {"Ã", "í"}, + {"î", "î"}, + {"ï", "ï"}, + {"ð", "ð"}, + {"ñ", "ñ"}, + {"ò", "ò"}, + {"ó", "ó"}, + {"ô", "ô"}, + {"õ", "õ"}, + {"ö", "ö"}, + {"÷", "÷"}, + {"ø", "ø"}, + {"ù", "ù"}, + {"ú", "ú"}, + {"û", "û"}, + {"ü", "ü"}, + {"ý", "ý"}, + {"þ", "þ"}, + {"ÿ", "ÿ"}, + {"\u0192", "ƒ"}, + {"\u0391", "Α"}, + {"\u0392", "Β"}, + {"\u0393", "Γ"}, + {"\u0394", "Δ"}, + {"\u0395", "Ε"}, + {"\u0396", "Ζ"}, + {"\u0397", "Η"}, + {"\u0398", "Θ"}, + {"\u0399", "Ι"}, + {"\u039A", "Κ"}, + {"\u039B", "Λ"}, + {"\u039C", "Μ"}, + {"\u039D", "Ν"}, + {"\u039E", "Ξ"}, + {"\u039F", "Ο"}, + {"\u03A0", "Π"}, + {"\u03A1", "Ρ"}, + {"\u03A3", "Σ"}, + {"\u03A4", "Τ"}, + {"\u03A5", "Υ"}, + {"\u03A6", "Φ"}, + {"\u03A7", "Χ"}, + {"\u03A8", "Ψ"}, + {"\u03A9", "Ω"}, + {"\u03B1", "α"}, + {"\u03B2", "β"}, + {"\u03B3", "γ"}, + {"\u03B4", "δ"}, + {"\u03B5", "ε"}, + {"\u03B6", "ζ"}, + {"\u03B7", "η"}, + {"\u03B8", "θ"}, + {"\u03B9", "ι"}, + {"\u03BA", "κ"}, + {"\u03BB", "λ"}, + {"\u03BC", "μ"}, + {"\u03BD", "ν"}, + {"\u03BE", "ξ"}, + {"\u03BF", "ο"}, + {"\u03C0", "π"}, + {"\u03C1", "ρ"}, + {"\u03C2", "ς"}, + {"\u03C3", "σ"}, + {"\u03C4", "τ"}, + {"\u03C5", "υ"}, + {"\u03C6", "φ"}, + {"\u03C7", "χ"}, + {"\u03C8", "ψ"}, + {"\u03C9", "ω"}, + {"\u03D1", "ϑ"}, + {"\u03D2", "ϒ"}, + {"\u03D6", "ϖ"}, + {"\u2022", "•"}, + {"\u2026", "…"}, + {"\u2032", "′"}, + {"\u2033", "″"}, + {"\u203E", "‾"}, + {"\u2044", "⁄"}, + {"\u2118", "℘"}, + {"\u2111", "ℑ"}, + {"\u211C", "ℜ"}, + {"\u2122", "™"}, + {"\u2135", "ℵ"}, + {"\u2190", "←"}, + {"\u2191", "↑"}, + {"\u2192", "→"}, + {"\u2193", "↓"}, + {"\u2194", "↔"}, + {"\u21B5", "↵"}, + {"\u21D0", "⇐"}, + {"\u21D1", "⇑"}, + {"\u21D2", "⇒"}, + {"\u21D3", "⇓"}, + {"\u21D4", "⇔"}, + {"\u2200", "∀"}, + {"\u2202", "∂"}, + {"\u2203", "∃"}, + {"\u2205", "∅"}, + {"\u2207", "∇"}, + {"\u2208", "∈"}, + {"\u2209", "∉"}, + {"\u220B", "∋"}, + {"\u220F", "∏"}, + {"\u2211", "∑"}, + {"\u2212", "−"}, + {"\u2217", "∗"}, + {"\u221A", "√"}, + {"\u221D", "∝"}, + {"\u221E", "∞"}, + {"\u2220", "∠"}, + {"\u2227", "∧"}, + {"\u2228", "∨"}, + {"\u2229", "∩"}, + {"\u222A", "∪"}, + {"\u222B", "∫"}, + {"\u2234", "∴"}, + {"\u223C", "∼"}, + {"\u2245", "≅"}, + {"\u2248", "≈"}, + {"\u2260", "≠"}, + {"\u2261", "≡"}, + {"\u2264", "≤"}, + {"\u2265", "≥"}, + {"\u2282", "⊂"}, + {"\u2283", "⊃"}, + {"\u2284", "⊄"}, + {"\u2286", "⊆"}, + {"\u2287", "⊇"}, + {"\u2295", "⊕"}, + {"\u2297", "⊗"}, + {"\u22A5", "⊥"}, + {"\u22C5", "⋅"}, + {"\u2308", "⌈"}, + {"\u2309", "⌉"}, + {"\u230A", "⌊"}, + {"\u230B", "⌋"}, + {"\u2329", "⟨"}, + {"\u232A", "⟩"}, + {"\u25CA", "◊"}, + {"\u2660", "♠"}, + {"\u2663", "♣"}, + {"\u2665", "♥"}, + {"\u2666", "♦"}, + {"\u0152", "Œ"}, + {"\u0153", "œ"}, + {"\u0160", "Š"}, + {"\u0161", "š"}, + {"\u0178", "Ÿ"}, + {"\u02C6", "ˆ"}, + {"\u02DC", "˜"}, + {"\u2002", " "}, + {"\u2003", " "}, + {"\u2009", " "}, + {"\u200C", "‌"}, + {"\u200D", "‍"}, + {"\u200E", "‎"}, + {"\u200F", "‏"}, + {"\u2013", "–"}, + {"\u2014", "—"}, + {"\u2018", "‘"}, + {"\u2019", "’"}, + {"\u201A", "‚"}, + {"\u201C", "“"}, + {"\u201D", "”"}, + {"\u201E", "„"}, + {"\u2020", "†"}, + {"\u2021", "‡"}, + {"\u2030", "‰"}, + {"\u2039", "‹"}, + {"\u203A", "›"}, + {"\u20AC", "€"} + })); +} + Value expr_unescapeHtml3(const std::vector<Value> &args) { return Value(utils::StringUtils::replaceMap( args[0].asString(), @@ -464,6 +749,291 @@ Value expr_unescapeHtml3(const std::vector<Value> &args) { })); } +Value expr_unescapeHtml4(const std::vector<Value> &args) { + return Value(utils::StringUtils::replaceMap( + args[0].asString(), + { + {"!", "!"}, + {""", "\""}, + {"#", "#"}, + {"$", "$"}, + {"%", "%"}, + {"&", "&"}, + {"'", "'"}, + {"(", "("}, + {")", ")"}, + {"*", "*"}, + {"+", "+"}, + {",", ","}, + {"−", "-"}, + {".", "."}, + {"/", "/"}, + {":", ":"}, + {";", ";"}, + {"<", "<"}, + {"=", "="}, + {">", ">"}, + {"?", "?"}, + {"@", "@"}, + {"[", "["}, + {"\", "\\"}, + {"]", "]"}, + {"ˆ", "^"}, + {"_", "_"}, + {"`", "`"}, + {"{", "{"}, + {"|", "|"}, + {"}", "}"}, + {"˜", "~"}, + {"¡", "¡"}, + {"¢", "¢"}, + {"£", "£"}, + {"¤", "¤"}, + {"¥", "Â¥"}, + {"&brkbar;", "¦"}, + {"§", "§"}, + {"¨", "¨"}, + {"©", "©"}, + {"ª", "ª"}, + {"«", "«"}, + {"¬", "¬"}, + {"®", "®"}, + {"¯", "¯"}, + {"°", "°"}, + {"±", "±"}, + {"²", "²"}, + {"³", "³"}, + {"´", "´"}, + {"µ", "µ"}, + {"¶", "¶"}, + {"·", "·"}, + {"¸", "¸"}, + {"¹", "¹"}, + {"º", "º"}, + {"»;", "»"}, + {"¼", "¼"}, + {"½", "½"}, + {"¾", "¾"}, + {"¿", "¿"}, + {"À", "Ã"}, + {"Á", "Ã"}, + {"Â", "Ã"}, + {"Ã", "Ã"}, + {"Ä", "Ã"}, + {"Å", "à "}, + {"Æ", "Ã"}, + {"Ç", "Ã"}, + {"È", "Ã"}, + {"É", "Ã"}, + {"Ê", "Ã"}, + {"Ë", "Ã"}, + {"Ì", "Ã"}, + {"Í", "Ã"}, + {"Î", "Ã"}, + {"Ï", "Ã"}, + {"Ð", "Ã"}, + {"Ñ", "Ã"}, + {"Ò", "Ã"}, + {"Ó", "Ã"}, + {"Ô", "Ã"}, + {"Õ", "Ã"}, + {"Ö", "Ã"}, + {"×", "Ã"}, + {"Ø", "Ã"}, + {"Ù;", "Ã"}, + {"Ú", "Ã"}, + {"Û", "Ã"}, + {"Ü", "Ã"}, + {"Ý", "Ã"}, + {"Þ", "Ã"}, + {"ß", "Ã"}, + {"à", "à "}, + {"á", "á"}, + {"â", "â"}, + {"ã", "ã"}, + {"ä", "ä"}, + {"å", "Ã¥"}, + {"æ", "æ"}, + {"ç", "ç"}, + {"è", "è"}, + {"é", "é"}, + {"ê", "ê"}, + {"ë", "ë"}, + {"ì", "ì"}, + {"í", "Ã"}, + {"î", "î"}, + {"ï", "ï"}, + {"ð", "ð"}, + {"ñ", "ñ"}, + {"ò", "ò"}, + {"ó", "ó"}, + {"ô", "ô"}, + {"õ", "õ"}, + {"ö", "ö"}, + {"÷", "÷"}, + {"ø", "ø"}, + {"ù", "ù"}, + {"ú", "ú"}, + {"û", "û"}, + {"ü", "ü"}, + {"ý", "ý"}, + {"þ", "þ"}, + {"ÿ", "ÿ"}, + {"ƒ", "\u0192"}, + {"Α", "\u0391"}, + {"Β", "\u0392"}, + {"Γ", "\u0393"}, + {"Δ", "\u0394"}, + {"Ε", "\u0395"}, + {"Ζ", "\u0396"}, + {"Η", "\u0397"}, + {"Θ", "\u0398"}, + {"Ι", "\u0399"}, + {"Κ", "\u039A"}, + {"Λ", "\u039B"}, + {"Μ", "\u039C"}, + {"Ν", "\u039D"}, + {"Ξ", "\u039E"}, + {"Ο", "\u039F"}, + {"Π", "\u03A0"}, + {"Ρ", "\u03A1"}, + {"Σ", "\u03A3"}, + {"Τ", "\u03A4"}, + {"Υ", "\u03A5"}, + {"Φ", "\u03A6"}, + {"Χ", "\u03A7"}, + {"Ψ", "\u03A8"}, + {"Ω", "\u03A9"}, + {"α", "\u03B1"}, + {"β", "\u03B2"}, + {"γ", "\u03B3"}, + {"δ", "\u03B4"}, + {"ε", "\u03B5"}, + {"ζ", "\u03B6"}, + {"η", "\u03B7"}, + {"θ", "\u03B8"}, + {"ι", "\u03B9"}, + {"κ", "\u03BA"}, + {"λ", "\u03BB"}, + {"μ", "\u03BC"}, + {"ν", "\u03BD"}, + {"ξ", "\u03BE"}, + {"ο", "\u03BF"}, + {"π", "\u03C0"}, + {"ρ", "\u03C1"}, + {"ς", "\u03C2"}, + {"σ", "\u03C3"}, + {"τ", "\u03C4"}, + {"υ", "\u03C5"}, + {"φ", "\u03C6"}, + {"χ", "\u03C7"}, + {"ψ", "\u03C8"}, + {"ω", "\u03C9"}, + {"ϑ", "\u03D1"}, + {"ϒ", "\u03D2"}, + {"ϖ", "\u03D6"}, + {"•", "\u2022"}, + {"…", "\u2026"}, + {"′", "\u2032"}, + {"″", "\u2033"}, + {"‾", "\u203E"}, + {"⁄", "\u2044"}, + {"℘", "\u2118"}, + {"ℑ", "\u2111"}, + {"ℜ", "\u211C"}, + {"™", "\u2122"}, + {"ℵ", "\u2135"}, + {"←", "\u2190"}, + {"↑", "\u2191"}, + {"→", "\u2192"}, + {"↓", "\u2193"}, + {"↔", "\u2194"}, + {"↵", "\u21B5"}, + {"⇐", "\u21D0"}, + {"⇑", "\u21D1"}, + {"⇒", "\u21D2"}, + {"⇓", "\u21D3"}, + {"⇔", "\u21D4"}, + {"∀", "\u2200"}, + {"∂", "\u2202"}, + {"∃", "\u2203"}, + {"∅", "\u2205"}, + {"∇", "\u2207"}, + {"∈", "\u2208"}, + {"∉", "\u2209"}, + {"∋", "\u220B"}, + {"∏", "\u220F"}, + {"∑", "\u2211"}, + {"−", "\u2212"}, + {"∗", "\u2217"}, + {"√", "\u221A"}, + {"∝", "\u221D"}, + {"∞", "\u221E"}, + {"∠", "\u2220"}, + {"∧", "\u2227"}, + {"∨", "\u2228"}, + {"∩", "\u2229"}, + {"∪", "\u222A"}, + {"∫", "\u222B"}, + {"∴", "\u2234"}, + {"∼", "\u223C"}, + {"≅", "\u2245"}, + {"≈", "\u2248"}, + {"≠", "\u2260"}, + {"≡", "\u2261"}, + {"≤", "\u2264"}, + {"≥", "\u2265"}, + {"⊂", "\u2282"}, + {"⊃", "\u2283"}, + {"⊄", "\u2284"}, + {"⊆", "\u2286"}, + {"⊇", "\u2287"}, + {"⊕", "\u2295"}, + {"⊗", "\u2297"}, + {"⊥", "\u22A5"}, + {"⋅", "\u22C5"}, + {"⌈", "\u2308"}, + {"⌉", "\u2309"}, + {"⌊", "\u230A"}, + {"⌋", "\u230B"}, + {"⟨", "\u2329"}, + {"⟩", "\u232A"}, + {"◊", "\u25CA"}, + {"♠", "\u2660"}, + {"♣", "\u2663"}, + {"♥", "\u2665"}, + {"♦", "\u2666"}, + {"Œ", "\u0152"}, + {"œ", "\u0153"}, + {"Š", "\u0160"}, + {"š", "\u0161"}, + {"Ÿ", "\u0178"}, + {"ˆ", "\u02C6"}, + {"˜", "\u02DC"}, + {" ", "\u2002"}, + {" ", "\u2003"}, + {" ", "\u2009"}, + {"‌", "\u200C"}, + {"‍", "\u200D"}, + {"‎", "\u200E"}, + {"‏", "\u200F"}, + {"–", "\u2013"}, + {"—", "\u2014"}, + {"‘", "\u2018"}, + {"’", "\u2019"}, + {"‚", "\u201A"}, + {"“", "\u201C"}, + {"”", "\u201D"}, + {"„", "\u201E"}, + {"†", "\u2020"}, + {"‡", "\u2021"}, + {"‰", "\u2030"}, + {"‹", "\u2039"}, + {"›", "\u203A"}, + {"€", "\u20AC"} + })); +} + Value expr_escapeXml(const std::vector<Value> &args) { return Value(utils::StringUtils::replaceMap( args[0].asString(), @@ -876,6 +1446,10 @@ Expression make_dynamic_function(const std::string &function_name, return make_dynamic_function_incomplete<expr_escapeHtml3>(function_name, args, 0); } else if (function_name == "unescapeHtml3") { return make_dynamic_function_incomplete<expr_unescapeHtml3>(function_name, args, 0); + } else if (function_name == "escapeHtml4") { + return make_dynamic_function_incomplete<expr_escapeHtml4>(function_name, args, 0); + } else if (function_name == "unescapeHtml4") { + return make_dynamic_function_incomplete<expr_unescapeHtml4>(function_name, args, 0); } else if (function_name == "escapeCsv") { return make_dynamic_function_incomplete<expr_escapeCsv>(function_name, args, 0); } else if (function_name == "unescapeCsv") { http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/d14db2cb/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp ---------------------------------------------------------------------- diff --git a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp index a2fc972..421d263 100644 --- a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp +++ b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp @@ -1089,6 +1089,30 @@ TEST_CASE("Encode Decode HTML3", "[expressionEncodeDecodeHTML3]") { // NOLINT REQUIRE("¥ & < «" == expr({flow_file_a}).asString()); } +TEST_CASE("Encode HTML4", "[expressionEncodeHTML4]") { // NOLINT + auto expr = expression::compile("${message:escapeHtml4()}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("message", "Â¥ & Φ < «"); + REQUIRE("¥ & Φ < «" == expr({flow_file_a}).asString()); +} + +TEST_CASE("Decode HTML4", "[expressionDecodeHTML4]") { // NOLINT + auto expr = expression::compile("${message:unescapeHtml4()}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("message", "¥ ι & < «"); + REQUIRE("Â¥ ι & < «" == expr({flow_file_a}).asString()); +} + +TEST_CASE("Encode Decode HTML4", "[expressionEncodeDecodeHTML4]") { // NOLINT + auto expr = expression::compile("${message:escapeHtml4():unescapeHtml4()}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("message", "¥ & < Π «"); + REQUIRE("¥ & < Π «" == expr({flow_file_a}).asString()); +} + TEST_CASE("Encode CSV", "[expressionEncodeCSV]") { // NOLINT auto expr = expression::compile("${message:escapeCsv()}");
