Repository: metron Updated Branches: refs/heads/master 309d8097a -> 50abfa441
METRON-1039: Add ZIP function to Stellar closes apache/incubator-metron#652 Project: http://git-wip-us.apache.org/repos/asf/metron/repo Commit: http://git-wip-us.apache.org/repos/asf/metron/commit/50abfa44 Tree: http://git-wip-us.apache.org/repos/asf/metron/tree/50abfa44 Diff: http://git-wip-us.apache.org/repos/asf/metron/diff/50abfa44 Branch: refs/heads/master Commit: 50abfa44101642fc5413014f5cdc144e8bd618a5 Parents: 309d809 Author: cstella <[email protected]> Authored: Tue Jul 25 11:41:20 2017 +0100 Committer: cstella <[email protected]> Committed: Tue Jul 25 11:41:20 2017 +0100 ---------------------------------------------------------------------- metron-stellar/stellar-common/README.md | 18 +++ .../dsl/functions/FunctionalFunctions.java | 78 ++++++++++ .../dsl/functions/FunctionalFunctionsTest.java | 141 +++++++++++++++++++ 3 files changed, 237 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/metron/blob/50abfa44/metron-stellar/stellar-common/README.md ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/README.md b/metron-stellar/stellar-common/README.md index 7a4343a..0ffb096 100644 --- a/metron-stellar/stellar-common/README.md +++ b/metron-stellar/stellar-common/README.md @@ -202,6 +202,8 @@ In the core language functions, we support basic functional programming primitiv | [ `WEEK_OF_MONTH`](#week_of_month) | | [ `WEEK_OF_YEAR`](#week_of_year) | | [ `YEAR`](#year) | +| [ `ZIP`](#zip) | +| [ `ZIP_JAGGED`](#zip_jagged) | ### `APPEND_IF_MISSING` * Description: Appends the suffix to the end of the string if the string does not already end with any of the suffixes. @@ -711,6 +713,22 @@ In the core language functions, we support basic functional programming primitiv * dateTime - The datetime as a long representing the milliseconds since unix epoch * Returns: The current year +### `ZIP` + * Description: Zips lists into a single list where the ith element is an list containing the ith items from the constituent lists. + See [python](https://docs.python.org/3/library/functions.html#zip) + and [wikipedia](https://en.wikipedia.org/wiki/Convolution_(computer_science)) for more context. + * Input: + * list* - Lists to zip. + * Returns: The zip of the lists. The returned list is the min size of all the lists. e.g. `ZIP( [ 1, 2 ], [ 3, 4, 5] ) == [ [1, 3], [2, 4] ]` + +### `ZIP_LONGEST` + * Description: Zips lists into a single list where the ith element is an list containing the ith items from the constituent lists. + See [python](https://docs.python.org/3/library/itertools.html#itertools.zip_longest) + and [wikipedia](https://en.wikipedia.org/wiki/Convolution_(computer_science)) for more context. + * Input: + * list* - Lists to zip. + * Returns: The zip of the lists. The returned list is the max size of all the lists. Empty elements are null e.g. `ZIP_LONGEST( [ 1, 2 ], [ 3, 4, 5] ) == [ [1, 3], [2, 4], [null, 5] ]` + The following is an example query (i.e. a function which returns a boolean) which would be seen possibly in threat triage: http://git-wip-us.apache.org/repos/asf/metron/blob/50abfa44/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctions.java ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctions.java b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctions.java index a1871d1..8eb2498 100644 --- a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctions.java +++ b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctions.java @@ -24,6 +24,8 @@ import org.apache.metron.stellar.common.LambdaExpression; import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; public class FunctionalFunctions { @@ -111,6 +113,82 @@ public class FunctionalFunctions { } } + @Stellar(name="ZIP_LONGEST" + , description="Zips lists into a single list where the ith element is an list " + + "containing the ith items from the constituent lists. " + + "See [python](https://docs.python.org/3/library/itertools.html#itertools.zip_longest) " + + "and [wikipedia](https://en.wikipedia.org/wiki/Convolution_(computer_science)) for more context." + , params = { + "list* - Lists to zip." + } + , returns = "The zip of the lists. The returned list is the max size of all the lists. " + + "Empty elements are null " + + "e.g. ZIP_LONGEST( [ 1, 2 ], [ 3, 4, 5] ) == [ [1, 3], [2, 4], [null, 5] ]" + ) + public static class LongestZip extends BaseStellarFunction { + + @Override + public Object apply(List<Object> args) { + if(args == null || args.size() == 0) { + return new ArrayList<>(); + } + return zip(args, true); + } + } + + @Stellar(name="ZIP" + , description="Zips lists into a single list where the ith element is an list containing the ith items from the constituent lists. " + + "See [python](https://docs.python.org/3/library/functions.html#zip) and [wikipedia](https://en.wikipedia.org/wiki/Convolution_(computer_science)) for more context." + , params = { + "list* - Lists to zip." + } + ,returns = "The zip of the lists. The returned list is the min size of all the lists. " + + "e.g. ZIP( [ 1, 2 ], [ 3, 4, 5] ) == [ [1, 3], [2, 4] ]" + ) + public static class Zip extends BaseStellarFunction { + + @Override + public Object apply(List<Object> args) { + if(args == null || args.size() == 0) { + return new ArrayList<>(); + } + return zip(args, false); + } + } + + private static List<List<Object>> zip(List<Object> args, boolean jagged) { + List<List<Object>> lists = new ArrayList<>(); + Integer resultSize = null; + for(Object o : args) { + if(o instanceof List) { + List<Object> l = (List<Object>)o; + if( resultSize == null) { + resultSize = l.size(); + } + else if(jagged) { + resultSize = Math.max(l.size(), resultSize); + } + else { + resultSize = Math.min(l.size(), resultSize); + } + lists.add(l); + } + } + if(resultSize == null) { + return new ArrayList<>(); + } + + return IntStream.range(0, resultSize) + .mapToObj(i -> { + List<Object> o = new ArrayList<>(); + for(List<Object> list : lists) { + o.add( i < list.size() ? list.get(i): null); + } + return o; + }) + .collect(Collectors.toList()); + } + private static List<Object> listOf(Object... vals) { List<Object> ret = new ArrayList<>(vals.length); for(int i = 0;i < vals.length;++i) { http://git-wip-us.apache.org/repos/asf/metron/blob/50abfa44/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctionsTest.java ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctionsTest.java b/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctionsTest.java index 3eec2b0..32e863b 100644 --- a/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctionsTest.java +++ b/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/FunctionalFunctionsTest.java @@ -24,13 +24,154 @@ import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.Test; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.apache.metron.stellar.common.utils.StellarProcessorUtils.run; public class FunctionalFunctionsTest { @Test + public void testZipLongest_boundary() { + for (String expr : ImmutableList.of( "ZIP_LONGEST()" + , "ZIP_LONGEST( null, null )" + , "ZIP_LONGEST( [], null )" + , "ZIP_LONGEST( [], [] )" + , "ZIP_LONGEST( null, [] )" + ) + ) + { + List<List<Object>> o = (List<List<Object>>) run(expr, new HashMap<>()); + Assert.assertEquals(0, o.size()); + } + } + + @Test + public void testZip_longest() { + Map<String, Object> variables = ImmutableMap.of( + "list1" , ImmutableList.of(1, 2, 3) + ,"list2", ImmutableList.of(4, 5, 6, 7) + ); + for (String expr : ImmutableList.of( "ZIP_LONGEST(list1)" + , "ZIP_LONGEST( [1, 2, 3])" + ) + ) + { + List<List<Object>> o = (List<List<Object>>) run(expr, variables); + Assert.assertEquals(3, o.size()); + for (int i = 0; i < 3; ++i) { + List l = o.get(i); + Assert.assertEquals(1, l.size()); + Assert.assertEquals(i+1, l.get(0)); + } + + } + + for (String expr : ImmutableList.of( "ZIP_LONGEST(list1, list2)" + , "ZIP_LONGEST( [1, 2, 3], [4, 5, 6, 7] )" + ) + ) + { + List<List<Object>> o = (List<List<Object>>) run(expr, variables); + Assert.assertEquals(4, o.size()); + for (int i = 0; i < 3; ++i) { + List l = o.get(i); + Assert.assertEquals(2, l.size()); + Assert.assertEquals(i+1, l.get(0)); + Assert.assertEquals(i+4, l.get(1)); + } + { + int i = 3; + List l = o.get(i); + Assert.assertEquals(2, l.size()); + Assert.assertNull(l.get(0)); + Assert.assertEquals(i+4, l.get(1)); + } + } + + + for (String expr : ImmutableList.of( + "REDUCE(ZIP_LONGEST(list2, list1), (s, x) -> s + GET_FIRST(x) * GET_LAST(x), 0)" + , "REDUCE(ZIP_LONGEST( [1, 2, 3], [4, 5, 6, 7] ), (s, x) -> s + GET_FIRST(x) * GET_LAST(x), 0)" + , "REDUCE(ZIP_LONGEST(list1, list2), (s, x) -> s + GET_FIRST(x) * GET_LAST(x), 0)" //this works because stellar treats nulls as 0 in arithmetic operations. + , "REDUCE(ZIP_LONGEST(list1, list2), (s, x) -> s + (GET_FIRST(x) == null?0:GET_FIRST(x)) * (GET_LAST(x) == null?0:GET_LAST(x)), 0)" //with proper guarding NOT assuming stellar peculiarities + ) + ) + { + int o = (int) run(expr, variables); + Assert.assertEquals(1*4 + 2*5 + 3*6, o, 1e-7); + } + + } + + @Test + public void testZip_boundary() { + for (String expr : ImmutableList.of( "ZIP()" + , "ZIP( null, null )" + , "ZIP( [], null )" + , "ZIP( [], [] )" + , "ZIP( null, [] )" + ) + ) + { + List<List<Object>> o = (List<List<Object>>) run(expr, new HashMap<>()); + Assert.assertEquals(0, o.size()); + } + } + + @Test + public void testZip() { + Map<String, Object> variables = ImmutableMap.of( + "list1" , ImmutableList.of(1, 2, 3) + ,"list2", ImmutableList.of(4, 5, 6) + ); + + for (String expr : ImmutableList.of( "ZIP(list1)" + , "ZIP( [1, 2, 3])" + ) + ) + { + List<List<Object>> o = (List<List<Object>>) run(expr, variables); + Assert.assertEquals(3, o.size()); + for (int i = 0; i < 3; ++i) { + List l = o.get(i); + Assert.assertEquals(1, l.size()); + Assert.assertEquals(i+1, l.get(0)); + } + + } + for (String expr : ImmutableList.of( "ZIP(list1, list2)" + , "ZIP( [1, 2, 3], [4, 5, 6] )" + , "ZIP( [1, 2, 3], [4, 5, 6, 7] )" + ) + ) + { + List<List<Object>> o = (List<List<Object>>) run(expr, variables); + Assert.assertEquals(3, o.size()); + for (int i = 0; i < 3; ++i) { + List l = o.get(i); + Assert.assertEquals(2, l.size()); + Assert.assertEquals(i+1, l.get(0)); + Assert.assertEquals(i+4, l.get(1)); + } + } + + for (String expr : ImmutableList.of( + "REDUCE(ZIP(list1, list2), (s, x) -> s + GET_FIRST(x) * GET_LAST(x), 0)" + , "REDUCE(ZIP( [1, 2, 3], [4, 5, 6] ), (s, x) -> s + GET_FIRST(x) * GET_LAST(x), 0)" + , "REDUCE(ZIP( [1, 2, 3], [4, 5, 6, 7] ), (s, x) -> s + GET_FIRST(x) * GET_LAST(x), 0)" + ) + ) + { + int o = (int) run(expr, variables); + Assert.assertEquals(1*4 + 2*5 + 3*6, o, 1e-7); + } + + } + + @Test public void testRecursive() { for (String expr : ImmutableList.of( "MAP(list, inner_list -> REDUCE(inner_list, (x, y) -> x + y, 0) )" , "MAP(list, (inner_list) -> REDUCE(inner_list, (x, y) -> x + y, 0) )"
