[
https://issues.apache.org/jira/browse/METRON-640?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15815228#comment-15815228
]
ASF GitHub Bot commented on METRON-640:
---------------------------------------
Github user cestella commented on a diff in the pull request:
https://github.com/apache/incubator-metron/pull/403#discussion_r95382684
--- Diff:
metron-platform/metron-common/src/test/java/org/apache/metron/common/dsl/functions/StringFunctionsTest.java
---
@@ -32,124 +33,143 @@
public class StringFunctionsTest {
- @Test
- public void testStringFunctions() throws Exception {
- final Map<String, String> variableMap = new HashMap<String,
String>() {{
- put("foo", "casey");
- put("ip", "192.168.0.1");
- put("empty", "");
- put("spaced", "metron is great");
- }};
- Assert.assertTrue(runPredicate("true and TO_UPPER(foo) ==
'CASEY'", v -> variableMap.get(v)));
- Assert.assertTrue(runPredicate("foo in [ TO_LOWER('CASEY'),
'david' ]", v -> variableMap.get(v)));
- Assert.assertTrue(runPredicate("TO_UPPER(foo) in [
TO_UPPER('casey'), 'david' ] and IN_SUBNET(ip, '192.168.0.0/24')", v ->
variableMap.get(v)));
- Assert.assertFalse(runPredicate("TO_LOWER(foo) in [
TO_UPPER('casey'), 'david' ]", v -> variableMap.get(v)));
+ @Test
+ public void testStringFunctions() throws Exception {
+ final Map<String, String> variableMap = new HashMap<String, String>()
{{
+ put("foo", "casey");
+ put("ip", "192.168.0.1");
+ put("empty", "");
+ put("spaced", "metron is great");
+ }};
+ Assert.assertTrue(runPredicate("true and TO_UPPER(foo) == 'CASEY'", v
-> variableMap.get(v)));
+ Assert.assertTrue(runPredicate("foo in [ TO_LOWER('CASEY'), 'david'
]", v -> variableMap.get(v)));
+ Assert.assertTrue(runPredicate("TO_UPPER(foo) in [ TO_UPPER('casey'),
'david' ] and IN_SUBNET(ip, '192.168.0.0/24')", v -> variableMap.get(v)));
+ Assert.assertFalse(runPredicate("TO_LOWER(foo) in [ TO_UPPER('casey'),
'david' ]", v -> variableMap.get(v)));
+ }
+
+ @Test
+ public void testStringFunctions_advanced() throws Exception {
+ final Map<String, Object> variableMap = new HashMap<String, Object>()
{{
+ put("foo", "casey");
+ put("bar", "bar.casey.grok");
+ put("ip", "192.168.0.1");
+ put("empty", "");
+ put("spaced", "metron is great");
+ put("myList", ImmutableList.of("casey", "apple", "orange"));
+ }};
+ Assert.assertTrue(runPredicate("foo in SPLIT(bar, '.')", v ->
variableMap.get(v)));
+ Assert.assertFalse(runPredicate("foo in SPLIT(ip, '.')", v ->
variableMap.get(v)));
+ Assert.assertTrue(runPredicate("foo in myList", v ->
variableMap.get(v)));
+ Assert.assertFalse(runPredicate("foo not in myList", v ->
variableMap.get(v)));
+ }
+
+ @Test
+ public void testLeftRightFills() throws Exception{
+ final Map<String, Object> variableMap = new HashMap<String, Object>()
{{
+ put("foo", null);
+ put("bar", null);
+ put("notInt","oh my");
+ }};
+
+ //LEFT
+ Object left = run("FILL_LEFT('123','X', 10)",new HashedMap());
+ Assert.assertNotNull(left);
+ Assert.assertEquals(10,((String)left).length());
+ Assert.assertEquals("XXXXXXX123",(String)left);
+
+ //RIGHT
+ Object right = run("FILL_RIGHT('123','X', 10)", new HashedMap());
+ Assert.assertNotNull(right);
+ Assert.assertEquals(10,((String)right).length());
+ Assert.assertEquals("123XXXXXXX",(String)right);
+
+ //INPUT ALREADY LENGTH
+ Object same = run("FILL_RIGHT('123','X', 3)", new HashedMap());
+ Assert.assertEquals(3,((String)same).length());
+ Assert.assertEquals("123",(String)same);
+
+ //INPUT BIGGER THAN LENGTH
+ Object tooBig = run("FILL_RIGHT('1234567890','X', 3)", new
HashedMap());
+ Assert.assertEquals(10,((String)tooBig).length());
+ Assert.assertEquals("1234567890",(String)tooBig);
+
+ //NULL VARIABLES
+ boolean thrown = false;
+ try{
+ run("FILL_RIGHT('123',foo,bar)", variableMap);
+ }catch(ParseException pe) {
+ thrown = true;
+ Assert.assertTrue(pe.getMessage().contains("are both required"));
}
-
- @Test
- public void testStringFunctions_advanced() throws Exception {
- final Map<String, Object> variableMap = new HashMap<String,
Object>() {{
- put("foo", "casey");
- put("bar", "bar.casey.grok");
- put("ip", "192.168.0.1");
- put("empty", "");
- put("spaced", "metron is great");
- put("myList", ImmutableList.of("casey", "apple", "orange"));
- }};
- Assert.assertTrue(runPredicate("foo in SPLIT(bar, '.')", v ->
variableMap.get(v)));
- Assert.assertFalse(runPredicate("foo in SPLIT(ip, '.')", v ->
variableMap.get(v)));
- Assert.assertTrue(runPredicate("foo in myList", v ->
variableMap.get(v)));
- Assert.assertFalse(runPredicate("foo not in myList", v ->
variableMap.get(v)));
+ Assert.assertTrue(thrown);
+ thrown = false;
+
+ // NULL LENGTH
+ try{
+ run("FILL_RIGHT('123','X',bar)", variableMap);
+ }catch(ParseException pe) {
+ thrown = true;
+ Assert.assertTrue(pe.getMessage().contains("are both required"));
}
-
- @Test
- public void testLeftRightFills() throws Exception{
- final Map<String, Object> variableMap = new HashMap<String,
Object>() {{
- put("foo", null);
- put("bar", null);
- put("notInt","oh my");
- }};
-
- //LEFT
- Object left = run("FILL_LEFT('123','X', 10)",new HashedMap());
- Assert.assertNotNull(left);
- Assert.assertEquals(10,((String)left).length());
- Assert.assertEquals("XXXXXXX123",(String)left);
-
- //RIGHT
- Object right = run("FILL_RIGHT('123','X', 10)", new HashedMap());
- Assert.assertNotNull(right);
- Assert.assertEquals(10,((String)right).length());
- Assert.assertEquals("123XXXXXXX",(String)right);
-
- //INPUT ALREADY LENGTH
- Object same = run("FILL_RIGHT('123','X', 3)", new HashedMap());
- Assert.assertEquals(3,((String)same).length());
- Assert.assertEquals("123",(String)same);
-
- //INPUT BIGGER THAN LENGTH
- Object tooBig = run("FILL_RIGHT('1234567890','X', 3)", new
HashedMap());
- Assert.assertEquals(10,((String)tooBig).length());
- Assert.assertEquals("1234567890",(String)tooBig);
-
- //NULL VARIABLES
- boolean thrown = false;
- try{
- run("FILL_RIGHT('123',foo,bar)", variableMap);
- }catch(ParseException pe) {
- thrown = true;
- Assert.assertTrue(pe.getMessage().contains("are both
required"));
- }
- Assert.assertTrue(thrown);
- thrown = false;
-
- // NULL LENGTH
- try{
- run("FILL_RIGHT('123','X',bar)", variableMap);
- }catch(ParseException pe) {
- thrown = true;
- Assert.assertTrue(pe.getMessage().contains("are both
required"));
- }
- Assert.assertTrue(thrown);
- thrown = false;
-
- // NULL FILL
- try{
- run("FILL_RIGHT('123',foo, 7)", variableMap);
- }catch(ParseException pe) {
- thrown = true;
- Assert.assertTrue(pe.getMessage().contains("are both
required"));
- }
- Assert.assertTrue(thrown);
- thrown = false;
-
- // NON INTEGER LENGTH
- try {
- run("FILL_RIGHT('123','X', 'z' )", new HashedMap());
- }catch(ParseException pe){
- thrown = true;
- Assert.assertTrue(pe.getMessage().contains("not a valid
Integer"));
- }
- Assert.assertTrue(thrown);
- thrown = false;
-
- // EMPTY STRING PAD
- try {
- Object returnValue = run("FILL_RIGHT('123','', 10 )", new
HashedMap());
- }catch(ParseException pe) {
- thrown = true;
- Assert.assertTrue(pe.getMessage().contains("cannot be an
empty"));
- }
- Assert.assertTrue(thrown);
- thrown = false;
-
- //MISSING LENGTH PARAMETER
- try {
- run("FILL_RIGHT('123',foo)", variableMap);
- }catch(ParseException pe){
- thrown = true;
- Assert.assertTrue(pe.getMessage().contains("expects three"));
- }
- Assert.assertTrue(thrown);
+ Assert.assertTrue(thrown);
+ thrown = false;
+
+ // NULL FILL
+ try{
+ run("FILL_RIGHT('123',foo, 7)", variableMap);
+ }catch(ParseException pe) {
+ thrown = true;
+ Assert.assertTrue(pe.getMessage().contains("are both required"));
+ }
+ Assert.assertTrue(thrown);
+ thrown = false;
+
+ // NON INTEGER LENGTH
+ try {
+ run("FILL_RIGHT('123','X', 'z' )", new HashedMap());
+ }catch(ParseException pe){
+ thrown = true;
+ Assert.assertTrue(pe.getMessage().contains("not a valid Integer"));
+ }
+ Assert.assertTrue(thrown);
+ thrown = false;
+
+ // EMPTY STRING PAD
+ try {
+ Object returnValue = run("FILL_RIGHT('123','', 10 )", new
HashedMap());
+ }catch(ParseException pe) {
+ thrown = true;
+ Assert.assertTrue(pe.getMessage().contains("cannot be an empty"));
+ }
+ Assert.assertTrue(thrown);
+ thrown = false;
+
+ //MISSING LENGTH PARAMETER
+ try {
+ run("FILL_RIGHT('123',foo)", variableMap);
+ }catch(ParseException pe){
+ thrown = true;
+ Assert.assertTrue(pe.getMessage().contains("expects three"));
}
+ Assert.assertTrue(thrown);
+ }
+
+ @Test
+ public void shannonEntropyTest() throws Exception {
+ //test empty string
+ Assert.assertEquals(0.0, (Double)run("STRING_ENTROPY('')", new
HashMap<>()), 1e-6);
+ Assert.assertEquals(0.0, (Double)run("STRING_ENTROPY(foo)",
ImmutableMap.of("foo", "")), 1e-6);
+
+ /*
+ Now consider the string aaaaaaaaaabbbbbccccc or 10 a's followed by 5
b's and 5 c's.
+ The probabilities of each character is as follows:
+ p(a) = 1/2
+ p(b) = 1/4
+ p(c) = 1/4
+ so the shannon entropy should be
+ -p(a)*log_2(p(a)) - p(b)*log_2(p(b)) - p(c)*log_2(p(c)) =
+ -0.5*-1 - 0.25*-2 - 0.25*-2 = 1.5
+ */
+ Assert.assertEquals(1.5, (Double)run("STRING_ENTROPY(foo)",
ImmutableMap.of("foo", "aaaaaaaaaabbbbbccccc")), 1e-1);
--- End diff --
I also could use `0.0` as the epsilon in these cases if you think that's
clearer.
> Add a Stellar function to compute shannon entropy for strings
> -------------------------------------------------------------
>
> Key: METRON-640
> URL: https://issues.apache.org/jira/browse/METRON-640
> Project: Metron
> Issue Type: Improvement
> Reporter: Casey Stella
>
> A common feature used for models (especially DGA models) is shannon entropy
> of strings. We should have the ability to compute it in stellar.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)