Repository: nifi-minifi-cpp Updated Branches: refs/heads/master ead988683 -> 15d9a6911
MINIFICPP-474 Added getDelimitedField EL function This closes #328. Signed-off-by: Marc Parisi <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/commit/15d9a691 Tree: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/tree/15d9a691 Diff: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/diff/15d9a691 Branch: refs/heads/master Commit: 15d9a6911b47e9515045af5b9e34ccff40dec3f1 Parents: ead9886 Author: Andrew I. Christianson <[email protected]> Authored: Fri May 11 10:46:55 2018 -0400 Committer: Marc Parisi <[email protected]> Committed: Mon May 14 12:51:33 2018 -0400 ---------------------------------------------------------------------- EXPRESSIONS.md | 36 +++++- extensions/expression-language/Expression.cpp | 126 ++++++++++++++++++- extensions/expression-language/Parser.yy | 5 +- .../ExpressionLanguageTests.cpp | 30 ++++- 4 files changed, 187 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/15d9a691/EXPRESSIONS.md ---------------------------------------------------------------------- diff --git a/EXPRESSIONS.md b/EXPRESSIONS.md index 95864d7..f2eebb2 100644 --- a/EXPRESSIONS.md +++ b/EXPRESSIONS.md @@ -172,6 +172,7 @@ token, filename. - [`substringBeforeLast`](#substringbeforelast) - [`substringAfter`](#substringafter) - [`substringAfterLast`](#substringafterlast) +- [`getDelimitedField`](#getdelimitedfield) - [`replace`](#replace) - [`replaceFirst`](#replacefirst) - [`replaceAll`](#replaceall) @@ -224,10 +225,6 @@ token, filename. ## Planned Features -### String Manipulation - -- `getDelimitedField` - ### Searching - `jsonPath` @@ -737,6 +734,37 @@ values: | `${filename:substringAfterLast(' n')}` | ew filename.txt | | `${filename:substringAfterLast('missing')}` | a brand new filename.txt | +### getDelimitedField + +**Description**: Parses the Subject as a delimited line of text and returns +just a single field from that delimited text. + +**Subject Type**: String + +**Arguments**: + +| Argument | Description | +| - | - | +| index | The index of the field to return. A value of 1 will return the first field, a value of 2 will return the second field, and so on. | +| delimiter | Optional argument that provides the character to use as a field separator. If not specified, a comma will be used. This value must be exactly 1 character. | +| quoteCHar | Optional argument that provides the character that can be used to quote values so that the delimiter can be used within a single field. If not specified, a double-quote (") will be used. This value must be exactly 1 character. | +| escapeChar | Optional argument that provides the character that can be used to escape the Quote Character or the Delimiter within a field. If not specified, a backslash (\) is used. This value must be exactly 1 character. | +| stripChars | Optional argument that specifies whether or not quote characters and escape characters should be stripped. For example, if we have a field value `"1, 2, 3"` and this value is `true`, we will get the value `1, 2, 3`, but if this value is false, we will get the value `"1, 2, 3"` with the quotes. The default value is `false`. This value must be either `true` or `false`. | + +**Return Type**: String + +**Examples**: If the "line" attribute contains the value "Jacobson, John", 32, +Mr. and the "altLine" attribute contains the value Jacobson, John|32|Mr. then +the following Expressions will result in the following values: + +| Expression | Value | +| - | - | +| `${line:getDelimitedField(2)}` | ` 32` | +| `${line:getDelimitedField(2):trim()}` | `32` | +| `${line:getDelimitedField(1)}` | `"Jacobson, John"` | +| `${line:getDelimitedField(1, ',', '"', '\\', true)}` | `Jacobson, John` | +| `${altLine:getDelimitedField(1, '|')}` | `Jacobson, John` | + ### replace **Description**: Replaces all occurrences of one literal String within the Subject http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/15d9a691/extensions/expression-language/Expression.cpp ---------------------------------------------------------------------- diff --git a/extensions/expression-language/Expression.cpp b/extensions/expression-language/Expression.cpp index 2b64580..b26a515 100644 --- a/extensions/expression-language/Expression.cpp +++ b/extensions/expression-language/Expression.cpp @@ -193,6 +193,126 @@ Value expr_substringAfterLast(const std::vector<Value> &args) { return Value(arg_0.substr(last_pos + arg_1.length())); } +Value expr_getDelimitedField(const std::vector<Value> &args) { + const auto &subject = args[0].asString(); + const auto &index = args[1].asUnsignedLong() - 1; + char delimiter_ch = ','; + + if (args.size() > 2) { + delimiter_ch = args[2].asString()[0]; + } + + char quote_ch = '"'; + + if (args.size() > 3) { + quote_ch = args[3].asString()[0]; + } + + char escape_ch = '\\'; + + if (args.size() > 4) { + escape_ch = args[4].asString()[0]; + } + + bool strip_chars = false; + + if (args.size() > 5) { + strip_chars = args[5].asBoolean(); + } + + enum parse_states { + value, + quote + }; + + parse_states parse_state = value; + uint64_t field_idx = 0; + size_t field_size = 0; + std::string result; + result.resize(1024); + + for (uint64_t parse_pos = 0; parse_pos < subject.length(); parse_pos++) { + char cur_ch = subject[parse_pos]; + + if (cur_ch == escape_ch) { + if (!strip_chars && field_idx == index) { + field_size++; + + if (field_size >= result.size()) { + result.resize(result.size() + 1024); + } + + result[field_size - 1] = escape_ch; + } + parse_pos++; + if (parse_pos < subject.length()) { + cur_ch = subject[parse_pos]; + } else { + break; + } + } + + switch (parse_state) { + case value: + if (cur_ch == delimiter_ch) { + field_idx++; + if (field_idx > index) { + break; + } + continue; + } else if (cur_ch == quote_ch) { + if (!strip_chars && field_idx == index) { + field_size++; + + if (field_size >= result.size()) { + result.resize(result.size() + 1024); + } + + result[field_size - 1] = quote_ch; + } + parse_state = quote; + continue; + } else if (field_idx == index) { + field_size++; + + if (field_size >= result.size()) { + result.resize(result.size() + 1024); + } + + result[field_size - 1] = cur_ch; + } + break; + case quote: + if (cur_ch == quote_ch) { + if (!strip_chars && field_idx == index) { + field_size++; + + if (field_size >= result.size()) { + result.resize(result.size() + 1024); + } + + result[field_size - 1] = quote_ch; + } + parse_state = value; + continue; + } else if (field_idx == index) { + field_size++; + + if (field_size >= result.size()) { + result.resize(result.size() + 1024); + } + + result[field_size - 1] = cur_ch; + } + break; + } + } + + result.resize(field_size); + + return Value(result); +} + Value expr_startsWith(const std::vector<Value> &args) { const std::string &arg_0 = args[0].asString(); const std::string &arg_1 = args[1].asString(); @@ -1183,7 +1303,7 @@ Value expr_unescapeCsv(const std::vector<Value> &args) { Value expr_urlEncode(const std::vector<Value> &args) { auto arg_0 = args[0].asString(); - CURL * curl = curl_easy_init(); + CURL *curl = curl_easy_init(); if (curl != nullptr) { char *output = curl_easy_escape(curl, arg_0.c_str(), @@ -1204,7 +1324,7 @@ Value expr_urlEncode(const std::vector<Value> &args) { Value expr_urlDecode(const std::vector<Value> &args) { auto arg_0 = args[0].asString(); - CURL * curl = curl_easy_init(); + CURL *curl = curl_easy_init(); if (curl != nullptr) { int out_len; char *output = curl_easy_unescape(curl, @@ -1564,6 +1684,8 @@ Expression make_dynamic_function(const std::string &function_name, return make_dynamic_function_incomplete<expr_substringAfter>(function_name, args, 2); } else if (function_name == "substringAfterLast") { return make_dynamic_function_incomplete<expr_substringAfterLast>(function_name, args, 2); + } else if (function_name == "getDelimitedField") { + return make_dynamic_function_incomplete<expr_getDelimitedField>(function_name, args, 2); } else if (function_name == "startsWith") { return make_dynamic_function_incomplete<expr_startsWith>(function_name, args, 1); } else if (function_name == "endsWith") { http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/15d9a691/extensions/expression-language/Parser.yy ---------------------------------------------------------------------- diff --git a/extensions/expression-language/Parser.yy b/extensions/expression-language/Parser.yy index 2e053f9..9e4cab2 100644 --- a/extensions/expression-language/Parser.yy +++ b/extensions/expression-language/Parser.yy @@ -130,7 +130,6 @@ text_no_quote_no_dollar: IDENTIFIER { std::swap($$, $1); } | COLON { $$ = ":"; } | SEMI { $$ = ";"; } | FSLASH { $$ = "/"; } - | BSLASH { $$ = "\\"; } | STAR { $$ = "*"; } | HASH { $$ = "#"; } | NUMBER { std::swap($$, $1); } @@ -139,11 +138,15 @@ text_no_quote_no_dollar: IDENTIFIER { std::swap($$, $1); } text_inc_quote_escaped_dollar: text_no_quote_no_dollar { std::swap($$, $1); } | SQUOTE { $$ = "'"; } | DQUOTE { $$ = "\""; } + | BSLASH { $$ = "\\"; } | DOLLAR DOLLAR { $$ = "$"; } ; text_inc_dollar: text_no_quote_no_dollar { std::swap($$, $1); } | DOLLAR { $$ = "$"; } + | BSLASH SQUOTE { $$ = "'"; } + | BSLASH DQUOTE { $$ = "\""; } + | BSLASH BSLASH { $$ = "\\"; } ; quoted_text_content: %empty {} http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/15d9a691/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp ---------------------------------------------------------------------- diff --git a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp index 17fb903..5c331be 100644 --- a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp +++ b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp @@ -105,7 +105,7 @@ TEST_CASE("UTF-8 characters attribute", "[expressionLanguageTestUTF8Attribute]") } TEST_CASE("Single quoted attribute expression", "[expressionLanguageTestSingleQuotedAttributeExpression]") { // NOLINT - auto expr = expression::compile("text_before${'|{}()[],:;\\/*# \t\r\n$'}text_after"); + auto expr = expression::compile("text_before${'|{}()[],:;\\\\/*# \t\r\n$'}text_after"); auto flow_file_a = std::make_shared<MockFlowFile>(); flow_file_a->addAttribute("|{}()[],:;\\/*# \t\r\n$", "__flow_a_attr_value_a__"); @@ -113,7 +113,7 @@ TEST_CASE("Single quoted attribute expression", "[expressionLanguageTestSingleQu } TEST_CASE("Double quoted attribute expression", "[expressionLanguageTestDoubleQuotedAttributeExpression]") { // NOLINT - auto expr = expression::compile("text_before${\"|{}()[],:;\\/*# \t\r\n$\"}text_after"); + auto expr = expression::compile("text_before${\"|{}()[],:;\\\\/*# \t\r\n$\"}text_after"); auto flow_file_a = std::make_shared<MockFlowFile>(); flow_file_a->addAttribute("|{}()[],:;\\/*# \t\r\n$", "__flow_a_attr_value_a__"); @@ -300,6 +300,30 @@ TEST_CASE("Substring After Last", "[expressionLanguageSubstringAfterLast]") { / REQUIRE("__" == expr({flow_file_a}).asString()); } +TEST_CASE("Get Delimited", "[expressionLanguageGetDelimited]") { // NOLINT + auto expr = expression::compile("${attr:getDelimitedField(2)}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("attr", "\"Jacobson, John\", 32, Mr."); + REQUIRE(" 32" == expr({flow_file_a}).asString()); +} + +TEST_CASE("Get Delimited 2", "[expressionLanguageGetDelimited2]") { // NOLINT + auto expr = expression::compile("${attr:getDelimitedField(1)}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("attr", "\"Jacobson, John\", 32, Mr."); + REQUIRE("\"Jacobson, John\"" == expr({flow_file_a}).asString()); +} + +TEST_CASE("Get Delimited 3", "[expressionLanguageGetDelimited3]") { // NOLINT + auto expr = expression::compile("${attr:getDelimitedField(1, ',', '\\\"', '\\\\', 'true')}"); + + auto flow_file_a = std::make_shared<MockFlowFile>(); + flow_file_a->addAttribute("attr", "\"Jacobson, John\", 32, Mr."); + REQUIRE("Jacobson, John" == expr({flow_file_a}).asString()); +} + TEST_CASE("Starts With", "[expressionLanguageStartsWith]") { // NOLINT auto expr = expression::compile("${attr:startsWith('a brand')}"); @@ -409,7 +433,7 @@ TEST_CASE("Replace First Regex", "[expressionLanguageReplaceFirstRegex]") { // } TEST_CASE("Replace All", "[expressionLanguageReplaceAll]") { // NOLINT - auto expr = expression::compile("${attr:replaceAll('\\..*', '')}"); + auto expr = expression::compile("${attr:replaceAll('\\\\..*', '')}"); auto flow_file_a = std::make_shared<MockFlowFile>(); flow_file_a->addAttribute("attr", "a brand new filename.txt");
