pvillard31 commented on code in PR #10923:
URL: https://github.com/apache/nifi/pull/10923#discussion_r2848256834
##########
nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java:
##########
@@ -2689,4 +2692,79 @@ public long getVersion() {
return 0;
}
}
+
+ @Test
+ public void testUnique() {
+ final Map<String, String> attributes = new HashMap<>();
+
+ // Test basic comma-separated list
+ attributes.put("list", "apple,banana,apple,orange,banana,grape");
+ verifyEquals("${list:unique(',')}", attributes,
"apple,banana,orange,grape");
+
+ // Test pipe-separated list
+ attributes.put("pipe_list", "red|blue|red|green|blue|yellow");
+ verifyEquals("${pipe_list:unique('|')}", attributes,
"red|blue|green|yellow");
+
+ // Test with spaces
+ attributes.put("space_list", "one two one three two four");
+ verifyEquals("${space_list:unique(' ')}", attributes, "one two three
four");
+
+ // Test with empty values in list
+ attributes.put("empty_list", "a,b,,c,,d,b");
+ verifyEquals("${empty_list:unique(',')}", attributes, "a,b,,c,d");
+
+ // Test with single value
+ attributes.put("single", "only_one");
+ verifyEquals("${single:unique(',')}", attributes, "only_one");
+
+ // Test with no duplicates
+ attributes.put("no_dups", "x,y,z");
+ verifyEquals("${no_dups:unique(',')}", attributes, "x,y,z");
+
+ // Test with all duplicates
+ attributes.put("all_dups", "same,same,same,same");
+ verifyEquals("${all_dups:unique(',')}", attributes, "same");
+
+ // Test with multi-character separator
+ attributes.put("multi_sep", "one::two::one::three::two");
+ verifyEquals("${multi_sep:unique('::')}", attributes,
"one::two::three");
+
+ // Test with special characters in separator
+ attributes.put("special_sep", "a|b|a|c|b");
+ verifyEquals("${special_sep:unique('|')}", attributes, "a|b|c");
+
+ // Test with empty string separator (should return original)
+ attributes.put("test_list", "abc");
+ verifyEquals("${test_list:unique('')}", attributes, "abc");
+
+ // Test with null subject
+ attributes.put("null_attr", "");
+ verifyEquals("${null_attr:unique(',')}", attributes, "");
Review Comment:
This does not test null subject, it is testing empty string. To test null
subject, I'd recommend testing the EL function call on a non-existent attribute:
```java
verifyEquals("${missing_attr:unique(',')}", attributes, "");
```
##########
nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/functions/UniqueEvaluator.java:
##########
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.attribute.expression.language.evaluation.functions;
+
+import org.apache.nifi.attribute.expression.language.EvaluationContext;
+import org.apache.nifi.attribute.expression.language.evaluation.Evaluator;
+import org.apache.nifi.attribute.expression.language.evaluation.QueryResult;
+import
org.apache.nifi.attribute.expression.language.evaluation.StringEvaluator;
+import
org.apache.nifi.attribute.expression.language.evaluation.StringQueryResult;
+
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+/**
+ * UniqueEvaluator removes duplicate values from a delimited string while
preserving
+ * the order of first occurrence.
+ *
+ * <p>This evaluator takes a separator as an argument and returns the unique
values
+ * from the subject string. The order of values is preserved based on their
first
+ * appearance in the original string.</p>
+ *
+ * <p>Examples:</p>
+ * <ul>
+ * <li>"red,blue,red,green" with separator "," returns "red,blue,green"</li>
+ * <li>"a::b::a::c" with separator "::" returns "a::b::c"</li>
+ * <li>"test" with separator "," returns "test" (no duplicates)</li>
+ * </ul>
+ *
+ * <p>Special cases:</p>
+ * <ul>
+ * <li>If the subject is null, returns null</li>
+ * <li>If the separator is null or empty, returns the original subject
unchanged</li>
+ * <li>Empty strings are treated as distinct values (e.g., "a,,b,," with
separator "," returns "a,,b,")</li>
Review Comment:
```suggestion
* <li>Empty strings are treated as distinct values (e.g., "a,,b,," with
separator "," returns "a,,b")</li>
```
##########
nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java:
##########
@@ -2689,4 +2692,79 @@ public long getVersion() {
return 0;
}
}
+
+ @Test
+ public void testUnique() {
+ final Map<String, String> attributes = new HashMap<>();
+
+ // Test basic comma-separated list
+ attributes.put("list", "apple,banana,apple,orange,banana,grape");
+ verifyEquals("${list:unique(',')}", attributes,
"apple,banana,orange,grape");
+
+ // Test pipe-separated list
+ attributes.put("pipe_list", "red|blue|red|green|blue|yellow");
+ verifyEquals("${pipe_list:unique('|')}", attributes,
"red|blue|green|yellow");
+
+ // Test with spaces
+ attributes.put("space_list", "one two one three two four");
+ verifyEquals("${space_list:unique(' ')}", attributes, "one two three
four");
+
+ // Test with empty values in list
+ attributes.put("empty_list", "a,b,,c,,d,b");
+ verifyEquals("${empty_list:unique(',')}", attributes, "a,b,,c,d");
+
+ // Test with single value
+ attributes.put("single", "only_one");
+ verifyEquals("${single:unique(',')}", attributes, "only_one");
+
+ // Test with no duplicates
+ attributes.put("no_dups", "x,y,z");
+ verifyEquals("${no_dups:unique(',')}", attributes, "x,y,z");
+
+ // Test with all duplicates
+ attributes.put("all_dups", "same,same,same,same");
+ verifyEquals("${all_dups:unique(',')}", attributes, "same");
+
+ // Test with multi-character separator
+ attributes.put("multi_sep", "one::two::one::three::two");
+ verifyEquals("${multi_sep:unique('::')}", attributes,
"one::two::three");
+
+ // Test with special characters in separator
+ attributes.put("special_sep", "a|b|a|c|b");
+ verifyEquals("${special_sep:unique('|')}", attributes, "a|b|c");
+
+ // Test with empty string separator (should return original)
+ attributes.put("test_list", "abc");
+ verifyEquals("${test_list:unique('')}", attributes, "abc");
+
+ // Test with null subject
+ attributes.put("null_attr", "");
+ verifyEquals("${null_attr:unique(',')}", attributes, "");
+ }
+
+ @Test
+ public void testUniquePreservesOrder() {
+ final Map<String, String> attributes = new HashMap<>();
+
+ // Verify that order of first occurrence is preserved
+ attributes.put("ordered", "3,1,4,1,5,9,2,6,5,3,5");
+ verifyEquals("${ordered:unique(',')}", attributes, "3,1,4,5,9,2,6");
+ }
+
+ @Test
+ public void testUniqueWithComplexData() {
+ final Map<String, String> attributes = new HashMap<>();
+
+ // Test with URLs
+ attributes.put("urls",
"http://example.com,http://test.com,http://example.com");
+ verifyEquals("${urls:unique(',')}", attributes,
"http://example.com,http://test.com");
+
+ // Test with file paths (Windows-style)
+ attributes.put("paths",
"C:\\Users\\test;D:\\Data;C:\\Users\\test;E:\\Backup");
+ verifyEquals("${paths:unique(';')}", attributes,
"C:\\Users\\test;D:\\Data;E:\\Backup");
+
+ // Test with dash separator (replaced tab test)
+ attributes.put("dash_separated", "field1-field2-field1-field3");
+ verifyEquals("${dash_separated:unique('-')}", attributes,
"field1-field2-field3");
+ }
Review Comment:
I think I would put everything in a single test method as we don't really
test different code paths.
##########
nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java:
##########
@@ -2689,4 +2692,79 @@ public long getVersion() {
return 0;
}
}
+
+ @Test
+ public void testUnique() {
+ final Map<String, String> attributes = new HashMap<>();
+
+ // Test basic comma-separated list
+ attributes.put("list", "apple,banana,apple,orange,banana,grape");
+ verifyEquals("${list:unique(',')}", attributes,
"apple,banana,orange,grape");
+
+ // Test pipe-separated list
+ attributes.put("pipe_list", "red|blue|red|green|blue|yellow");
+ verifyEquals("${pipe_list:unique('|')}", attributes,
"red|blue|green|yellow");
+
+ // Test with spaces
+ attributes.put("space_list", "one two one three two four");
+ verifyEquals("${space_list:unique(' ')}", attributes, "one two three
four");
+
+ // Test with empty values in list
+ attributes.put("empty_list", "a,b,,c,,d,b");
+ verifyEquals("${empty_list:unique(',')}", attributes, "a,b,,c,d");
+
+ // Test with single value
+ attributes.put("single", "only_one");
+ verifyEquals("${single:unique(',')}", attributes, "only_one");
+
+ // Test with no duplicates
+ attributes.put("no_dups", "x,y,z");
+ verifyEquals("${no_dups:unique(',')}", attributes, "x,y,z");
+
+ // Test with all duplicates
+ attributes.put("all_dups", "same,same,same,same");
+ verifyEquals("${all_dups:unique(',')}", attributes, "same");
+
+ // Test with multi-character separator
+ attributes.put("multi_sep", "one::two::one::three::two");
+ verifyEquals("${multi_sep:unique('::')}", attributes,
"one::two::three");
+
+ // Test with special characters in separator
+ attributes.put("special_sep", "a|b|a|c|b");
+ verifyEquals("${special_sep:unique('|')}", attributes, "a|b|c");
+
+ // Test with empty string separator (should return original)
+ attributes.put("test_list", "abc");
+ verifyEquals("${test_list:unique('')}", attributes, "abc");
+
+ // Test with null subject
+ attributes.put("null_attr", "");
+ verifyEquals("${null_attr:unique(',')}", attributes, "");
Review Comment:
I would suggest adjusting the comment and name of the attribute to reflect
that we are testing empty string. And add a test for null subject by
referencing an attribute that does not exist.
##########
nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/functions/UniqueEvaluator.java:
##########
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.attribute.expression.language.evaluation.functions;
+
+import org.apache.nifi.attribute.expression.language.EvaluationContext;
+import org.apache.nifi.attribute.expression.language.evaluation.Evaluator;
+import org.apache.nifi.attribute.expression.language.evaluation.QueryResult;
+import
org.apache.nifi.attribute.expression.language.evaluation.StringEvaluator;
+import
org.apache.nifi.attribute.expression.language.evaluation.StringQueryResult;
+
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+/**
+ * UniqueEvaluator removes duplicate values from a delimited string while
preserving
+ * the order of first occurrence.
+ *
+ * <p>This evaluator takes a separator as an argument and returns the unique
values
+ * from the subject string. The order of values is preserved based on their
first
+ * appearance in the original string.</p>
+ *
+ * <p>Examples:</p>
+ * <ul>
+ * <li>"red,blue,red,green" with separator "," returns "red,blue,green"</li>
+ * <li>"a::b::a::c" with separator "::" returns "a::b::c"</li>
+ * <li>"test" with separator "," returns "test" (no duplicates)</li>
+ * </ul>
+ *
+ * <p>Special cases:</p>
+ * <ul>
+ * <li>If the subject is null, returns null</li>
+ * <li>If the separator is null or empty, returns the original subject
unchanged</li>
+ * <li>Empty strings are treated as distinct values (e.g., "a,,b,," with
separator "," returns "a,,b,")</li>
+ * </ul>
+ */
+public class UniqueEvaluator extends StringEvaluator {
+
+ private final Evaluator<String> subject;
+ private final Evaluator<String> separator;
+
+ /**
+ * Constructs a new UniqueEvaluator.
+ *
+ * @param subject the evaluator that provides the delimited string to
process
+ * @param separator the evaluator that provides the delimiter to use for
splitting and joining
+ */
+ public UniqueEvaluator(final Evaluator<String> subject, final
Evaluator<String> separator) {
+ this.subject = subject;
+ this.separator = separator;
+ }
+
+ @Override
+ public QueryResult<String> evaluate(final EvaluationContext
evaluationContext) {
+ final String subjectValue =
subject.evaluate(evaluationContext).getValue();
+ if (subjectValue == null) {
+ return new StringQueryResult(null);
Review Comment:
I think we should return `new StringQueryResult("")` to be consistent with
similar evaluators, no?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]