[ 
https://issues.apache.org/jira/browse/METRON-640?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15815824#comment-15815824
 ] 

ASF GitHub Bot commented on METRON-640:
---------------------------------------

Github user cestella commented on a diff in the pull request:

    https://github.com/apache/incubator-metron/pull/403#discussion_r95430321
  
    --- Diff: 
metron-platform/metron-common/src/test/java/org/apache/metron/common/dsl/functions/StringFunctionsTest.java
 ---
    @@ -32,124 +33,143 @@
     
     public class StringFunctionsTest {
     
    -    @Test
    -    public void testStringFunctions() throws Exception {
    -        final Map<String, String> variableMap = new HashMap<String, 
String>() {{
    -            put("foo", "casey");
    -            put("ip", "192.168.0.1");
    -            put("empty", "");
    -            put("spaced", "metron is great");
    -        }};
    -        Assert.assertTrue(runPredicate("true and TO_UPPER(foo) == 
'CASEY'", v -> variableMap.get(v)));
    -        Assert.assertTrue(runPredicate("foo in [ TO_LOWER('CASEY'), 
'david' ]", v -> variableMap.get(v)));
    -        Assert.assertTrue(runPredicate("TO_UPPER(foo) in [ 
TO_UPPER('casey'), 'david' ] and IN_SUBNET(ip, '192.168.0.0/24')", v -> 
variableMap.get(v)));
    -        Assert.assertFalse(runPredicate("TO_LOWER(foo) in [ 
TO_UPPER('casey'), 'david' ]", v -> variableMap.get(v)));
    +  @Test
    +  public void testStringFunctions() throws Exception {
    +    final Map<String, String> variableMap = new HashMap<String, String>() 
{{
    +      put("foo", "casey");
    +      put("ip", "192.168.0.1");
    +      put("empty", "");
    +      put("spaced", "metron is great");
    +    }};
    +    Assert.assertTrue(runPredicate("true and TO_UPPER(foo) == 'CASEY'", v 
-> variableMap.get(v)));
    +    Assert.assertTrue(runPredicate("foo in [ TO_LOWER('CASEY'), 'david' 
]", v -> variableMap.get(v)));
    +    Assert.assertTrue(runPredicate("TO_UPPER(foo) in [ TO_UPPER('casey'), 
'david' ] and IN_SUBNET(ip, '192.168.0.0/24')", v -> variableMap.get(v)));
    +    Assert.assertFalse(runPredicate("TO_LOWER(foo) in [ TO_UPPER('casey'), 
'david' ]", v -> variableMap.get(v)));
    +  }
    +
    +  @Test
    +  public void testStringFunctions_advanced() throws Exception {
    +    final Map<String, Object> variableMap = new HashMap<String, Object>() 
{{
    +      put("foo", "casey");
    +      put("bar", "bar.casey.grok");
    +      put("ip", "192.168.0.1");
    +      put("empty", "");
    +      put("spaced", "metron is great");
    +      put("myList", ImmutableList.of("casey", "apple", "orange"));
    +    }};
    +    Assert.assertTrue(runPredicate("foo in SPLIT(bar, '.')", v -> 
variableMap.get(v)));
    +    Assert.assertFalse(runPredicate("foo in SPLIT(ip, '.')", v -> 
variableMap.get(v)));
    +    Assert.assertTrue(runPredicate("foo in myList", v -> 
variableMap.get(v)));
    +    Assert.assertFalse(runPredicate("foo not in myList", v -> 
variableMap.get(v)));
    +  }
    +
    +  @Test
    +  public void testLeftRightFills() throws Exception{
    +    final Map<String, Object> variableMap = new HashMap<String, Object>() 
{{
    +      put("foo", null);
    +      put("bar", null);
    +      put("notInt","oh my");
    +    }};
    +
    +    //LEFT
    +    Object left = run("FILL_LEFT('123','X', 10)",new HashedMap());
    +    Assert.assertNotNull(left);
    +    Assert.assertEquals(10,((String)left).length());
    +    Assert.assertEquals("XXXXXXX123",(String)left);
    +
    +    //RIGHT
    +    Object right = run("FILL_RIGHT('123','X', 10)", new HashedMap());
    +    Assert.assertNotNull(right);
    +    Assert.assertEquals(10,((String)right).length());
    +    Assert.assertEquals("123XXXXXXX",(String)right);
    +
    +    //INPUT ALREADY LENGTH
    +    Object same = run("FILL_RIGHT('123','X', 3)", new HashedMap());
    +    Assert.assertEquals(3,((String)same).length());
    +    Assert.assertEquals("123",(String)same);
    +
    +    //INPUT BIGGER THAN LENGTH
    +    Object tooBig = run("FILL_RIGHT('1234567890','X', 3)", new 
HashedMap());
    +    Assert.assertEquals(10,((String)tooBig).length());
    +    Assert.assertEquals("1234567890",(String)tooBig);
    +
    +    //NULL VARIABLES
    +    boolean thrown = false;
    +    try{
    +      run("FILL_RIGHT('123',foo,bar)", variableMap);
    +    }catch(ParseException pe) {
    +      thrown = true;
    +      Assert.assertTrue(pe.getMessage().contains("are both required"));
         }
    -
    -    @Test
    -    public void testStringFunctions_advanced() throws Exception {
    -        final Map<String, Object> variableMap = new HashMap<String, 
Object>() {{
    -            put("foo", "casey");
    -            put("bar", "bar.casey.grok");
    -            put("ip", "192.168.0.1");
    -            put("empty", "");
    -            put("spaced", "metron is great");
    -            put("myList", ImmutableList.of("casey", "apple", "orange"));
    -        }};
    -        Assert.assertTrue(runPredicate("foo in SPLIT(bar, '.')", v -> 
variableMap.get(v)));
    -        Assert.assertFalse(runPredicate("foo in SPLIT(ip, '.')", v -> 
variableMap.get(v)));
    -        Assert.assertTrue(runPredicate("foo in myList", v -> 
variableMap.get(v)));
    -        Assert.assertFalse(runPredicate("foo not in myList", v -> 
variableMap.get(v)));
    +    Assert.assertTrue(thrown);
    +    thrown = false;
    +
    +    // NULL LENGTH
    +    try{
    +      run("FILL_RIGHT('123','X',bar)", variableMap);
    +    }catch(ParseException pe) {
    +      thrown = true;
    +      Assert.assertTrue(pe.getMessage().contains("are both required"));
         }
    -
    -    @Test
    -    public void testLeftRightFills() throws Exception{
    -        final Map<String, Object> variableMap = new HashMap<String, 
Object>() {{
    -            put("foo", null);
    -            put("bar", null);
    -            put("notInt","oh my");
    -        }};
    -
    -        //LEFT
    -        Object left = run("FILL_LEFT('123','X', 10)",new HashedMap());
    -        Assert.assertNotNull(left);
    -        Assert.assertEquals(10,((String)left).length());
    -        Assert.assertEquals("XXXXXXX123",(String)left);
    -
    -        //RIGHT
    -        Object right = run("FILL_RIGHT('123','X', 10)", new HashedMap());
    -        Assert.assertNotNull(right);
    -        Assert.assertEquals(10,((String)right).length());
    -        Assert.assertEquals("123XXXXXXX",(String)right);
    -
    -        //INPUT ALREADY LENGTH
    -        Object same = run("FILL_RIGHT('123','X', 3)", new HashedMap());
    -        Assert.assertEquals(3,((String)same).length());
    -        Assert.assertEquals("123",(String)same);
    -
    -        //INPUT BIGGER THAN LENGTH
    -        Object tooBig = run("FILL_RIGHT('1234567890','X', 3)", new 
HashedMap());
    -        Assert.assertEquals(10,((String)tooBig).length());
    -        Assert.assertEquals("1234567890",(String)tooBig);
    -
    -        //NULL VARIABLES
    -        boolean thrown = false;
    -        try{
    -            run("FILL_RIGHT('123',foo,bar)", variableMap);
    -        }catch(ParseException pe) {
    -            thrown = true;
    -            Assert.assertTrue(pe.getMessage().contains("are both 
required"));
    -        }
    -        Assert.assertTrue(thrown);
    -        thrown = false;
    -
    -        // NULL LENGTH
    -        try{
    -            run("FILL_RIGHT('123','X',bar)", variableMap);
    -        }catch(ParseException pe) {
    -            thrown = true;
    -            Assert.assertTrue(pe.getMessage().contains("are both 
required"));
    -        }
    -        Assert.assertTrue(thrown);
    -        thrown = false;
    -
    -        // NULL FILL
    -        try{
    -            run("FILL_RIGHT('123',foo, 7)", variableMap);
    -        }catch(ParseException pe) {
    -            thrown = true;
    -            Assert.assertTrue(pe.getMessage().contains("are both 
required"));
    -        }
    -        Assert.assertTrue(thrown);
    -        thrown = false;
    -
    -        // NON INTEGER LENGTH
    -        try {
    -            run("FILL_RIGHT('123','X', 'z' )", new HashedMap());
    -        }catch(ParseException pe){
    -            thrown = true;
    -            Assert.assertTrue(pe.getMessage().contains("not a valid 
Integer"));
    -        }
    -        Assert.assertTrue(thrown);
    -        thrown = false;
    -
    -        // EMPTY STRING PAD
    -        try {
    -            Object returnValue = run("FILL_RIGHT('123','', 10 )", new 
HashedMap());
    -        }catch(ParseException pe) {
    -            thrown = true;
    -            Assert.assertTrue(pe.getMessage().contains("cannot be an 
empty"));
    -        }
    -        Assert.assertTrue(thrown);
    -        thrown = false;
    -
    -        //MISSING LENGTH PARAMETER
    -        try {
    -            run("FILL_RIGHT('123',foo)", variableMap);
    -        }catch(ParseException pe){
    -            thrown = true;
    -            Assert.assertTrue(pe.getMessage().contains("expects three"));
    -        }
    -        Assert.assertTrue(thrown);
    +    Assert.assertTrue(thrown);
    +    thrown = false;
    +
    +    // NULL FILL
    +    try{
    +      run("FILL_RIGHT('123',foo, 7)", variableMap);
    +    }catch(ParseException pe) {
    +      thrown = true;
    +      Assert.assertTrue(pe.getMessage().contains("are both required"));
    +    }
    +    Assert.assertTrue(thrown);
    +    thrown = false;
    +
    +    // NON INTEGER LENGTH
    +    try {
    +      run("FILL_RIGHT('123','X', 'z' )", new HashedMap());
    +    }catch(ParseException pe){
    +      thrown = true;
    +      Assert.assertTrue(pe.getMessage().contains("not a valid Integer"));
    +    }
    +    Assert.assertTrue(thrown);
    +    thrown = false;
    +
    +    // EMPTY STRING PAD
    +    try {
    +      Object returnValue = run("FILL_RIGHT('123','', 10 )", new 
HashedMap());
    +    }catch(ParseException pe) {
    +      thrown = true;
    +      Assert.assertTrue(pe.getMessage().contains("cannot be an empty"));
    +    }
    +    Assert.assertTrue(thrown);
    +    thrown = false;
    +
    +    //MISSING LENGTH PARAMETER
    +    try {
    +      run("FILL_RIGHT('123',foo)", variableMap);
    +    }catch(ParseException pe){
    +      thrown = true;
    +      Assert.assertTrue(pe.getMessage().contains("expects three"));
         }
    +    Assert.assertTrue(thrown);
    +  }
    +
    +  @Test
    +  public void shannonEntropyTest() throws Exception {
    +    //test empty string
    +    Assert.assertEquals(0.0, (Double)run("STRING_ENTROPY('')", new 
HashMap<>()), 0.0);
    +    Assert.assertEquals(0.0, (Double)run("STRING_ENTROPY(foo)", 
ImmutableMap.of("foo", "")), 0.0);
    +
    +    /*
    +    Now consider the string aaaaaaaaaabbbbbccccc or 10 a's followed by 5 
b's and 5 c's.
    +    The probabilities of each character is as follows:
    +    p(a) = 1/2
    +    p(b) = 1/4
    +    p(c) = 1/4
    +    so the shannon entropy should be
    +      -p(a)*log_2(p(a)) - p(b)*log_2(p(b)) - p(c)*log_2(p(c)) =
    +      -0.5*-1 - 0.25*-2 - 0.25*-2 = 1.5
    +     */
    +    Assert.assertEquals(1.5, (Double)run("STRING_ENTROPY(foo)", 
ImmutableMap.of("foo", "aaaaaaaaaabbbbbccccc")), 0.0);
    --- End diff --
    
    yeah, I suspect the reason why we can get away with such a small epsilon 
here is that the frequencies are of the form `(1/2)^n`.  I was a bit shocked it 
worked, frankly.


> Add a Stellar function to compute shannon entropy for strings
> -------------------------------------------------------------
>
>                 Key: METRON-640
>                 URL: https://issues.apache.org/jira/browse/METRON-640
>             Project: Metron
>          Issue Type: Improvement
>            Reporter: Casey Stella
>
> A common feature used for models (especially DGA models) is shannon entropy 
> of strings.  We should have the ability to compute it in stellar.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to