Repository: nifi
Updated Branches:
  refs/heads/master f171756a8 -> f044ba5d4


NIFI-808 Providing property to allow exclusion of capture group 0 for any regex 
expressions as an attribute.


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/f044ba5d
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/f044ba5d
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/f044ba5d

Branch: refs/heads/master
Commit: f044ba5d45150a0d17990b4c1a304fbbdf35a010
Parents: f171756
Author: Aldrin Piri <[email protected]>
Authored: Mon Aug 17 21:44:59 2015 -0400
Committer: Aldrin Piri <[email protected]>
Committed: Wed Aug 19 12:00:25 2015 -0400

----------------------------------------------------------------------
 .../nifi/processors/standard/ExtractText.java   | 17 ++++++++-
 .../processors/standard/TestExtractText.java    | 39 ++++++++++++++++++++
 2 files changed, 54 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/f044ba5d/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java
----------------------------------------------------------------------
diff --git 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java
 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java
index 2862c34..29b9c20 100644
--- 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java
+++ 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java
@@ -170,13 +170,22 @@ public class ExtractText extends AbstractProcessor {
 
     public static final PropertyDescriptor UNIX_LINES = new 
PropertyDescriptor.Builder()
             .name("Enable Unix Lines Mode")
-            .description("Indicates that only the '\n' line terminator is 
recognized int the behavior of '.', '^', and '$'.  Can also be specified "
+            .description("Indicates that only the '\n' line terminator is 
recognized in the behavior of '.', '^', and '$'.  Can also be specified "
                     + "via the embeded flag (?d).")
             .required(true)
             .allowableValues("true", "false")
             .defaultValue("false")
             .build();
 
+    public static final PropertyDescriptor INCLUDE_CAPTURE_GROUP_ZERO = new 
PropertyDescriptor.Builder()
+            .name("Include Capture Group 0")
+            .description("Indicates that Capture Group 0 should be included as 
an attribute. Capture Group 0 represents the entirety of the regular expression 
match, is typically not used, and "
+                    + "could have considerable length.")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("true")
+            .build();
+
     public static final Relationship REL_MATCH = new Relationship.Builder()
             .name("matched")
             .description("FlowFiles are routed to this relationship when the 
Regular Expression is successfully evaluated and the FlowFile is modified as a 
result")
@@ -212,6 +221,7 @@ public class ExtractText extends AbstractProcessor {
         props.add(UNICODE_CASE);
         props.add(UNICODE_CHARACTER_CLASS);
         props.add(UNIX_LINES);
+        props.add(INCLUDE_CAPTURE_GROUP_ZERO);
         this.properties = Collections.unmodifiableList(props);
     }
 
@@ -297,13 +307,16 @@ public class ExtractText extends AbstractProcessor {
         final Map<String, String> regexResults = new HashMap<>();
 
         final Map<String, Pattern> patternMap = compiledPattersMapRef.get();
+
+        final int startGroupIdx = 
context.getProperty(INCLUDE_CAPTURE_GROUP_ZERO).asBoolean() ? 0 : 1;
+
         for (final Map.Entry<String, Pattern> entry : patternMap.entrySet()) {
 
             final Matcher matcher = entry.getValue().matcher(contentString);
 
             if (matcher.find()) {
                 final String baseKey = entry.getKey();
-                for (int i = 0; i <= matcher.groupCount(); i++) {
+                for (int i = startGroupIdx; i <= matcher.groupCount(); i++) {
                     final String key = new 
StringBuilder(baseKey).append(".").append(i).toString();
                     String value = matcher.group(i);
                     if (value.length() > maxCaptureGroupLength) {

http://git-wip-us.apache.org/repos/asf/nifi/blob/f044ba5d/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java
----------------------------------------------------------------------
diff --git 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java
 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java
index fd47cf7..4b7c53c 100644
--- 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java
+++ 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java
@@ -310,4 +310,43 @@ public class TestExtractText {
         assertEquals(2, relationships.size());
     }
 
+    @Test
+    public void testIncludeZeroCaptureGroupProperty() throws Exception {
+        final TestRunner testRunner = TestRunners.newTestRunner(new 
ExtractText());
+
+        final String attributeKey = "regex.result";
+
+        testRunner.setProperty(attributeKey, "(?s)(.*)");
+
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
+        final MockFlowFile out = 
testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
+
+        // Ensure the zero capture group is in the resultant attributes
+        out.assertAttributeExists(attributeKey + ".0");
+        out.assertAttributeEquals(attributeKey, SAMPLE_STRING);
+    }
+
+    @Test
+    public void testIgnoreZeroCaptureGroupProperty() throws Exception {
+        final TestRunner testRunner = TestRunners.newTestRunner(new 
ExtractText());
+
+        testRunner.setProperty(ExtractText.INCLUDE_CAPTURE_GROUP_ZERO, 
"false");
+
+        final String attributeKey = "regex.result";
+
+        testRunner.setProperty(attributeKey, "(?s)(.*)");
+
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
+        final MockFlowFile out = 
testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
+
+        // Ensure the zero capture group is not in the resultant attributes
+        out.assertAttributeNotExists(attributeKey + ".0");
+        out.assertAttributeEquals(attributeKey, SAMPLE_STRING);
+    }
 }

Reply via email to