Repository: nifi-minifi-cpp
Updated Branches:
  refs/heads/master ead988683 -> 15d9a6911


MINIFICPP-474 Added getDelimitedField EL function

This closes #328.

Signed-off-by: Marc Parisi <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/commit/15d9a691
Tree: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/tree/15d9a691
Diff: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/diff/15d9a691

Branch: refs/heads/master
Commit: 15d9a6911b47e9515045af5b9e34ccff40dec3f1
Parents: ead9886
Author: Andrew I. Christianson <[email protected]>
Authored: Fri May 11 10:46:55 2018 -0400
Committer: Marc Parisi <[email protected]>
Committed: Mon May 14 12:51:33 2018 -0400

----------------------------------------------------------------------
 EXPRESSIONS.md                                  |  36 +++++-
 extensions/expression-language/Expression.cpp   | 126 ++++++++++++++++++-
 extensions/expression-language/Parser.yy        |   5 +-
 .../ExpressionLanguageTests.cpp                 |  30 ++++-
 4 files changed, 187 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/15d9a691/EXPRESSIONS.md
----------------------------------------------------------------------
diff --git a/EXPRESSIONS.md b/EXPRESSIONS.md
index 95864d7..f2eebb2 100644
--- a/EXPRESSIONS.md
+++ b/EXPRESSIONS.md
@@ -172,6 +172,7 @@ token, filename.
 - [`substringBeforeLast`](#substringbeforelast)
 - [`substringAfter`](#substringafter)
 - [`substringAfterLast`](#substringafterlast)
+- [`getDelimitedField`](#getdelimitedfield)
 - [`replace`](#replace)
 - [`replaceFirst`](#replacefirst)
 - [`replaceAll`](#replaceall)
@@ -224,10 +225,6 @@ token, filename.
 
 ## Planned Features
 
-### String Manipulation
-
-- `getDelimitedField`
-
 ### Searching
 
 - `jsonPath`
@@ -737,6 +734,37 @@ values:
 | `${filename:substringAfterLast(' n')}` | ew filename.txt |
 | `${filename:substringAfterLast('missing')}` | a brand new filename.txt |
 
+### getDelimitedField
+
+**Description**: Parses the Subject as a delimited line of text and returns
+just a single field from that delimited text.
+
+**Subject Type**: String
+
+**Arguments**:
+
+| Argument | Description |
+| - | - |
+| index | The index of the field to return. A value of 1 will return the first 
field, a value of 2 will return the second field, and so on. |
+| delimiter | Optional argument that provides the character to use as a field 
separator. If not specified, a comma will be used. This value must be exactly 1 
character. |
+| quoteCHar | Optional argument that provides the character that can be used 
to quote values so that the delimiter can be used within a single field. If not 
specified, a double-quote (") will be used. This value must be exactly 1 
character. |
+| escapeChar | Optional argument that provides the character that can be used 
to escape the Quote Character or the Delimiter within a field. If not 
specified, a backslash (\) is used. This value must be exactly 1 character. |
+| stripChars | Optional argument that specifies whether or not quote 
characters and escape characters should be stripped. For example, if we have a 
field value `"1, 2, 3"` and this value is `true`, we will get the value `1, 2, 
3`, but if this value is false, we will get the value `"1, 2, 3"` with the 
quotes. The default value is `false`. This value must be either `true` or 
`false`. |
+
+**Return Type**: String
+
+**Examples**: If the "line" attribute contains the value "Jacobson, John", 32,
+Mr. and the "altLine" attribute contains the value Jacobson, John|32|Mr. then
+the following Expressions will result in the following values:
+
+| Expression | Value |
+| - | - |
+| `${line:getDelimitedField(2)}` | ` 32` |
+| `${line:getDelimitedField(2):trim()}` | `32` |
+| `${line:getDelimitedField(1)}` | `"Jacobson, John"` |
+| `${line:getDelimitedField(1, ',', '"', '\\', true)}` | `Jacobson, John` |
+| `${altLine:getDelimitedField(1, '|')}` | `Jacobson, John` |
+
 ### replace
 
 **Description**: Replaces all occurrences of one literal String within the 
Subject

http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/15d9a691/extensions/expression-language/Expression.cpp
----------------------------------------------------------------------
diff --git a/extensions/expression-language/Expression.cpp 
b/extensions/expression-language/Expression.cpp
index 2b64580..b26a515 100644
--- a/extensions/expression-language/Expression.cpp
+++ b/extensions/expression-language/Expression.cpp
@@ -193,6 +193,126 @@ Value expr_substringAfterLast(const std::vector<Value> 
&args) {
   return Value(arg_0.substr(last_pos + arg_1.length()));
 }
 
+Value expr_getDelimitedField(const std::vector<Value> &args) {
+  const auto &subject = args[0].asString();
+  const auto &index = args[1].asUnsignedLong() - 1;
+  char delimiter_ch = ',';
+
+  if (args.size() > 2) {
+    delimiter_ch = args[2].asString()[0];
+  }
+
+  char quote_ch = '"';
+
+  if (args.size() > 3) {
+    quote_ch = args[3].asString()[0];
+  }
+
+  char escape_ch = '\\';
+
+  if (args.size() > 4) {
+    escape_ch = args[4].asString()[0];
+  }
+
+  bool strip_chars = false;
+
+  if (args.size() > 5) {
+    strip_chars = args[5].asBoolean();
+  }
+
+  enum parse_states {
+    value,
+    quote
+  };
+
+  parse_states parse_state = value;
+  uint64_t field_idx = 0;
+  size_t field_size = 0;
+  std::string result;
+  result.resize(1024);
+
+  for (uint64_t parse_pos = 0; parse_pos < subject.length(); parse_pos++) {
+    char cur_ch = subject[parse_pos];
+
+    if (cur_ch == escape_ch) {
+      if (!strip_chars && field_idx == index) {
+        field_size++;
+
+        if (field_size >= result.size()) {
+          result.resize(result.size() + 1024);
+        }
+
+        result[field_size - 1] = escape_ch;
+      }
+      parse_pos++;
+      if (parse_pos < subject.length()) {
+        cur_ch = subject[parse_pos];
+      } else {
+        break;
+      }
+    }
+
+    switch (parse_state) {
+      case value:
+        if (cur_ch == delimiter_ch) {
+          field_idx++;
+          if (field_idx > index) {
+            break;
+          }
+          continue;
+        } else if (cur_ch == quote_ch) {
+          if (!strip_chars && field_idx == index) {
+            field_size++;
+
+            if (field_size >= result.size()) {
+              result.resize(result.size() + 1024);
+            }
+
+            result[field_size - 1] = quote_ch;
+          }
+          parse_state = quote;
+          continue;
+        } else if (field_idx == index) {
+          field_size++;
+
+          if (field_size >= result.size()) {
+            result.resize(result.size() + 1024);
+          }
+
+          result[field_size - 1] = cur_ch;
+        }
+        break;
+      case quote:
+        if (cur_ch == quote_ch) {
+          if (!strip_chars && field_idx == index) {
+            field_size++;
+
+            if (field_size >= result.size()) {
+              result.resize(result.size() + 1024);
+            }
+
+            result[field_size - 1] = quote_ch;
+          }
+          parse_state = value;
+          continue;
+        } else if (field_idx == index) {
+          field_size++;
+
+          if (field_size >= result.size()) {
+            result.resize(result.size() + 1024);
+          }
+
+          result[field_size - 1] = cur_ch;
+        }
+        break;
+    }
+  }
+
+  result.resize(field_size);
+
+  return Value(result);
+}
+
 Value expr_startsWith(const std::vector<Value> &args) {
   const std::string &arg_0 = args[0].asString();
   const std::string &arg_1 = args[1].asString();
@@ -1183,7 +1303,7 @@ Value expr_unescapeCsv(const std::vector<Value> &args) {
 
 Value expr_urlEncode(const std::vector<Value> &args) {
   auto arg_0 = args[0].asString();
-  CURL * curl = curl_easy_init();
+  CURL *curl = curl_easy_init();
   if (curl != nullptr) {
     char *output = curl_easy_escape(curl,
                                     arg_0.c_str(),
@@ -1204,7 +1324,7 @@ Value expr_urlEncode(const std::vector<Value> &args) {
 
 Value expr_urlDecode(const std::vector<Value> &args) {
   auto arg_0 = args[0].asString();
-  CURL * curl = curl_easy_init();
+  CURL *curl = curl_easy_init();
   if (curl != nullptr) {
     int out_len;
     char *output = curl_easy_unescape(curl,
@@ -1564,6 +1684,8 @@ Expression make_dynamic_function(const std::string 
&function_name,
     return 
make_dynamic_function_incomplete<expr_substringAfter>(function_name, args, 2);
   } else if (function_name == "substringAfterLast") {
     return 
make_dynamic_function_incomplete<expr_substringAfterLast>(function_name, args, 
2);
+  } else if (function_name == "getDelimitedField") {
+    return 
make_dynamic_function_incomplete<expr_getDelimitedField>(function_name, args, 
2);
   } else if (function_name == "startsWith") {
     return make_dynamic_function_incomplete<expr_startsWith>(function_name, 
args, 1);
   } else if (function_name == "endsWith") {

http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/15d9a691/extensions/expression-language/Parser.yy
----------------------------------------------------------------------
diff --git a/extensions/expression-language/Parser.yy 
b/extensions/expression-language/Parser.yy
index 2e053f9..9e4cab2 100644
--- a/extensions/expression-language/Parser.yy
+++ b/extensions/expression-language/Parser.yy
@@ -130,7 +130,6 @@ text_no_quote_no_dollar: IDENTIFIER { std::swap($$, $1); }
                        | COLON { $$ = ":"; }
                        | SEMI { $$ = ";"; }
                        | FSLASH { $$ = "/"; }
-                       | BSLASH { $$ = "\\"; }
                        | STAR { $$ = "*"; }
                        | HASH { $$ = "#"; }
                        | NUMBER { std::swap($$, $1); }
@@ -139,11 +138,15 @@ text_no_quote_no_dollar: IDENTIFIER { std::swap($$, $1); }
 text_inc_quote_escaped_dollar: text_no_quote_no_dollar { std::swap($$, $1); }
                              | SQUOTE { $$ = "'"; }
                              | DQUOTE { $$ = "\""; }
+                             | BSLASH { $$ = "\\"; }
                              | DOLLAR DOLLAR { $$ = "$"; }
                              ;
 
 text_inc_dollar: text_no_quote_no_dollar { std::swap($$, $1); }
                | DOLLAR { $$ = "$"; }
+               | BSLASH SQUOTE { $$ = "'"; }
+               | BSLASH DQUOTE { $$ = "\""; }
+               | BSLASH BSLASH { $$ = "\\"; }
                ;
 
 quoted_text_content: %empty {}

http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/15d9a691/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
----------------------------------------------------------------------
diff --git 
a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp 
b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
index 17fb903..5c331be 100644
--- a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
+++ b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
@@ -105,7 +105,7 @@ TEST_CASE("UTF-8 characters attribute", 
"[expressionLanguageTestUTF8Attribute]")
 }
 
 TEST_CASE("Single quoted attribute expression", 
"[expressionLanguageTestSingleQuotedAttributeExpression]") {  // NOLINT
-  auto expr = expression::compile("text_before${'|{}()[],:;\\/*# 
\t\r\n$'}text_after");
+  auto expr = expression::compile("text_before${'|{}()[],:;\\\\/*# 
\t\r\n$'}text_after");
 
   auto flow_file_a = std::make_shared<MockFlowFile>();
   flow_file_a->addAttribute("|{}()[],:;\\/*# \t\r\n$", 
"__flow_a_attr_value_a__");
@@ -113,7 +113,7 @@ TEST_CASE("Single quoted attribute expression", 
"[expressionLanguageTestSingleQu
 }
 
 TEST_CASE("Double quoted attribute expression", 
"[expressionLanguageTestDoubleQuotedAttributeExpression]") {  // NOLINT
-  auto expr = expression::compile("text_before${\"|{}()[],:;\\/*# 
\t\r\n$\"}text_after");
+  auto expr = expression::compile("text_before${\"|{}()[],:;\\\\/*# 
\t\r\n$\"}text_after");
 
   auto flow_file_a = std::make_shared<MockFlowFile>();
   flow_file_a->addAttribute("|{}()[],:;\\/*# \t\r\n$", 
"__flow_a_attr_value_a__");
@@ -300,6 +300,30 @@ TEST_CASE("Substring After Last", 
"[expressionLanguageSubstringAfterLast]") {  /
   REQUIRE("__" == expr({flow_file_a}).asString());
 }
 
+TEST_CASE("Get Delimited", "[expressionLanguageGetDelimited]") {  // NOLINT
+  auto expr = expression::compile("${attr:getDelimitedField(2)}");
+
+  auto flow_file_a = std::make_shared<MockFlowFile>();
+  flow_file_a->addAttribute("attr", "\"Jacobson, John\", 32, Mr.");
+  REQUIRE(" 32" == expr({flow_file_a}).asString());
+}
+
+TEST_CASE("Get Delimited 2", "[expressionLanguageGetDelimited2]") {  // NOLINT
+  auto expr = expression::compile("${attr:getDelimitedField(1)}");
+
+  auto flow_file_a = std::make_shared<MockFlowFile>();
+  flow_file_a->addAttribute("attr", "\"Jacobson, John\", 32, Mr.");
+  REQUIRE("\"Jacobson, John\"" == expr({flow_file_a}).asString());
+}
+
+TEST_CASE("Get Delimited 3", "[expressionLanguageGetDelimited3]") {  // NOLINT
+  auto expr = expression::compile("${attr:getDelimitedField(1, ',', '\\\"', 
'\\\\', 'true')}");
+
+  auto flow_file_a = std::make_shared<MockFlowFile>();
+  flow_file_a->addAttribute("attr", "\"Jacobson, John\", 32, Mr.");
+  REQUIRE("Jacobson, John" == expr({flow_file_a}).asString());
+}
+
 TEST_CASE("Starts With", "[expressionLanguageStartsWith]") {  // NOLINT
   auto expr = expression::compile("${attr:startsWith('a brand')}");
 
@@ -409,7 +433,7 @@ TEST_CASE("Replace First Regex", 
"[expressionLanguageReplaceFirstRegex]") {  //
 }
 
 TEST_CASE("Replace All", "[expressionLanguageReplaceAll]") {  // NOLINT
-  auto expr = expression::compile("${attr:replaceAll('\\..*', '')}");
+  auto expr = expression::compile("${attr:replaceAll('\\\\..*', '')}");
 
   auto flow_file_a = std::make_shared<MockFlowFile>();
   flow_file_a->addAttribute("attr", "a brand new filename.txt");

Reply via email to