Repository: nifi Updated Branches: refs/heads/master eedf1237a -> f7f809c3d
NIFI-4272 support multiple captures when EL is present in replacement value This closes #2748 Signed-off-by: Mike Moser <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/f7f809c3 Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/f7f809c3 Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/f7f809c3 Branch: refs/heads/master Commit: f7f809c3d3632eea5234b31740984b73de322464 Parents: eedf123 Author: Otto Fowler <[email protected]> Authored: Wed May 30 16:53:55 2018 -0400 Committer: Mike Moser <[email protected]> Committed: Wed Jun 6 17:23:20 2018 +0000 ---------------------------------------------------------------------- .../nifi/processors/standard/ReplaceText.java | 110 +++++++++++++++---- .../processors/standard/TestReplaceText.java | 62 ++++++++++- 2 files changed, 150 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/f7f809c3/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java index de17213..f303796 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java @@ -18,16 +18,17 @@ package org.apache.nifi.processors.standard; import org.apache.commons.io.IOUtils; import org.apache.nifi.annotation.behavior.EventDriven; -import org.apache.nifi.annotation.behavior.SystemResourceConsideration; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.SideEffectFree; import org.apache.nifi.annotation.behavior.SupportsBatching; import org.apache.nifi.annotation.behavior.SystemResource; +import org.apache.nifi.annotation.behavior.SystemResourceConsideration; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.PropertyValue; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.components.Validator; @@ -79,7 +80,9 @@ import java.util.regex.Pattern; @SystemResourceConsideration(resource = SystemResource.MEMORY) public class ReplaceText extends AbstractProcessor { - private static Pattern REPLACEMENT_NORMALIZATION_PATTERN = Pattern.compile("(\\$\\D)"); + private static Pattern QUOTED_GROUP_REF_PATTERN = Pattern.compile("\\$\\{\\s*?'\\$\\d+?'.+?\\}"); + private static Pattern DOUBLE_QUOTED_GROUP_REF_PATTERN = Pattern.compile("\\$\\{\\s*?\"\\$\\d+?\".+?\\}"); + private static Pattern LITERAL_QUOTED_PATTERN = Pattern.compile("literal\\(('.*?')\\)",Pattern.DOTALL); // Constants public static final String LINE_BY_LINE = "Line-by-Line"; @@ -301,12 +304,8 @@ public class ReplaceText extends AbstractProcessor { // If we find a back reference that is not valid, then we will treat it as a literal string. For example, if we have 3 capturing // groups and the Replacement Value has the value is "I owe $8 to him", then we want to treat the $8 as a literal "$8", rather - // than attempting to use it as a back reference. + // than attempting to use it as a back reference. We do this even if there are no capture groups. private static String escapeLiteralBackReferences(final String unescaped, final int numCapturingGroups) { - if (numCapturingGroups == 0) { - return unescaped; - } - String value = unescaped; final Matcher backRefMatcher = unescapedBackReferencePattern.matcher(value); // consider unescaped back references while (backRefMatcher.find()) { @@ -542,12 +541,18 @@ public class ReplaceText extends AbstractProcessor { additionalAttrs.put("$" + i, groupValue); } - String replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, additionalAttrs, escapeBackRefDecorator).getValue(); + // prepare the string and do the regex replace first + // then evaluate the EL on the result + String replacement = context.getProperty(REPLACEMENT_VALUE).getValue(); replacement = escapeLiteralBackReferences(replacement, numCapturingGroups); + replacement = escapeExpressionDollarSigns(replacement); + replacement = wrapLiterals(replacement); + replacement = contentString.replaceAll(searchRegex, replacement); + replacement = escapeForEvaluation(replacement); - String replacementFinal = normalizeReplacementString(replacement); + PropertyValue tempValue = context.newPropertyValue(replacement); + final String updatedValue = tempValue.evaluateAttributeExpressions(flowFile, additionalAttrs, null).getValue(); - final String updatedValue = contentString.replaceAll(searchRegex, replacementFinal); updatedFlowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(final OutputStream out) throws IOException { @@ -574,12 +579,17 @@ public class ReplaceText extends AbstractProcessor { additionalAttrs.put("$" + i, groupValue); } - String replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, additionalAttrs, escapeBackRefDecorator).getValue(); + // prepare the string and do the regex replace first + // then evaluate the EL on the result + String replacement = context.getProperty(REPLACEMENT_VALUE).getValue(); replacement = escapeLiteralBackReferences(replacement, numCapturingGroups); + replacement = escapeExpressionDollarSigns(replacement); + replacement = wrapLiterals(replacement); + replacement = oneLine.replaceAll(searchRegex, replacement); + replacement = escapeForEvaluation(replacement); - String replacementFinal = normalizeReplacementString(replacement); - - final String updatedValue = oneLine.replaceAll(searchRegex, replacementFinal); + PropertyValue tempValue = context.newPropertyValue(replacement); + final String updatedValue = tempValue.evaluateAttributeExpressions(flowFile, additionalAttrs, null).getValue(); bw.write(updatedValue); } else { // No match. Just write out the line as it was. @@ -659,16 +669,76 @@ public class ReplaceText extends AbstractProcessor { } /** + * Wraps '$1' with the {@code literal} function for EL evaluation. + * @param possibleLiteral the {@code String} to evaluate. + * @return {@code String} with literals wrapped. If no literals or Expression Lanaguage present the passed string + * is returned. + */ + private static String wrapLiterals(String possibleLiteral) { + String replacementFinal = possibleLiteral; + if (!possibleLiteral.contains("${")) { + return possibleLiteral; + } + + if (QUOTED_GROUP_REF_PATTERN.matcher(replacementFinal).find()) { + replacementFinal = replacementFinal.replaceAll("(\\$\\{\\s*?)('\\$\\d+?')(.*\\})", "$1literal($2)$3"); + } + + if (DOUBLE_QUOTED_GROUP_REF_PATTERN.matcher(replacementFinal).find()) { + replacementFinal = replacementFinal.replaceAll("(\\$\\{\\s*?)(\"\\$\\d+?\")(.*\\})", "$1literal($2)$3"); + } + + return replacementFinal; + } + + /** * If we have a '$' followed by anything other than a number, then escape - * it. E.g., '$d' becomes '\$d' so that it can be used as a literal in a + * it if it is not already escaped. E.g., '$d' becomes '\$d' so that it can be used as a literal in a * regex. */ - private static String normalizeReplacementString(String replacement) { - String replacementFinal = replacement; - if (REPLACEMENT_NORMALIZATION_PATTERN.matcher(replacement).find()) { - replacementFinal = Matcher.quoteReplacement(replacement); + private static String escapeExpressionDollarSigns(String replacement) { + + // are there expressions or group references + if (replacement.indexOf('$') == -1) { + return replacement; } - return replacementFinal; + StringBuilder sb = new StringBuilder(); + boolean lastWasEscape = false; + for (int i=0; i<replacement.length(); i++) { + char c = replacement.charAt(i); + if (c == '\\' ) { + lastWasEscape = true; + } else { + if ( c == '$') { + if (!lastWasEscape && !Character.isDigit(replacement.charAt(i+1))) { + sb.append('\\'); + } + } + lastWasEscape = false; + } + sb.append(c); + } + return sb.toString(); + } + + /** + * Escapes a {@code String} containing literal('') EL values. + * @param contentString the {@code String} + * @return the escaped {@code String}. If no literal() is present, then the input {@code String} will be returned + */ + private static String escapeForEvaluation(String contentString) { + final Matcher matcher = LITERAL_QUOTED_PATTERN.matcher(contentString); + String returnString = contentString; + while(matcher.find()) { + for (int i = 1; i <= matcher.groupCount(); i ++) { + String replacement = matcher.group(i) + .replaceAll("\\n","\\\\n") + .replaceAll("\\r","\\\\r") + .replaceAll("\\t","\\\\t"); + returnString = new StringBuilder(returnString).replace(matcher.start(i),matcher.end(i),replacement).toString(); + } + } + return returnString; } private interface ReplacementStrategyExecutor { http://git-wip-us.apache.org/repos/asf/nifi/blob/f7f809c3/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java index 3755883..7505233 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java @@ -79,9 +79,23 @@ public class TestReplaceText { } @Test + public void testEscapedEnough$InReplacementCanReturnEscaped$() throws IOException { + final TestRunner runner = getRunner(); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "a\\\\\\$b"); + + runner.enqueue("a$a,b,c,d"); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("a\\$b".getBytes("UTF-8")); + } + + @Test public void testWithEscaped$InReplacement() throws IOException { final TestRunner runner = getRunner(); - runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s:^.*$)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "a\\$b"); runner.enqueue("a$a,b,c,d"); @@ -89,7 +103,7 @@ public class TestReplaceText { runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); - out.assertContentEquals("a\\$b".getBytes("UTF-8")); + out.assertContentEquals("a$b".getBytes("UTF-8")); } @Test @@ -107,6 +121,34 @@ public class TestReplaceText { } @Test + public void testWithSingleQuotedELInReplacement() throws IOException { + final TestRunner runner = getRunner(); + runner.setProperty(ReplaceText.SEARCH_VALUE, "\"([a-z]+)\":\"(\\w+)\""); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\"${'$1':toUpper()}\":\"$2\""); + runner.enqueue("{\"name\":\"Smith\",\"middle\":\"nifi\",\"firstname\":\"John\"}"); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("{\"NAME\":\"Smith\",\"MIDDLE\":\"nifi\",\"FIRSTNAME\":\"John\"}"); + + } + + @Test + public void testWithDoubleQuotedELInReplacement() throws IOException { + final TestRunner runner = getRunner(); + runner.setProperty(ReplaceText.SEARCH_VALUE, "\"([a-z]+)\":\"(\\w+)\""); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\"${\"$1\":toUpper()}\":\"$2\""); + runner.enqueue("{\"name\":\"Smith\",\"middle\":\"nifi\",\"firstname\":\"John\"}"); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("{\"NAME\":\"Smith\",\"MIDDLE\":\"nifi\",\"FIRSTNAME\":\"John\"}"); + + } + + @Test public void testPrependSimple() throws IOException { final TestRunner runner = getRunner(); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST"); @@ -1100,6 +1142,22 @@ public class TestReplaceText { } @Test + public void testRegexWithELAndELSpecialChars() throws Exception { + final TestRunner runner = getRunner(); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${'$1':toUpper()}"); // will uppercase group with good Java regex + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.REGEX_REPLACE); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.ENTIRE_TEXT); + + runner.enqueue("testing\n\t\r123".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("TESTING\n\t\r123"); + } + + @Test public void testRegexNoCaptureDefaultReplacement() throws IOException { // Test the old Default Regex and new Default Regex with the default replacement. This should fail // because the regex does not create a capture group.
