Github user nickwallen commented on a diff in the pull request:

    https://github.com/apache/metron/pull/1245#discussion_r237574064
  
    --- Diff: 
metron-platform/metron-parsers/src/test/java/org/apache/metron/parsers/regex/RegularExpressionsParserTest.java
 ---
    @@ -0,0 +1,152 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license
    + * agreements. See the NOTICE file distributed with this work for 
additional information regarding
    + * copyright ownership. The ASF licenses this file to you under the Apache 
License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance with the 
License. You may obtain a
    + * copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software 
distributed under the License
    + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 
ANY KIND, either express
    + * or implied. See the License for the specific language governing 
permissions and limitations under
    + * the License.
    + */
    +package org.apache.metron.parsers.regex;
    +
    +import org.json.simple.JSONObject;
    +import org.json.simple.parser.JSONParser;
    +import org.junit.Before;
    +import org.junit.Test;
    +
    +import java.nio.file.Files;
    +import java.nio.file.Paths;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +
    +import static org.junit.Assert.assertTrue;
    +
    +public class RegularExpressionsParserTest {
    +
    +  private RegularExpressionsParser regularExpressionsParser;
    +  private JSONObject parserConfig;
    +
    +  @Before
    +  public void setUp() throws Exception {
    +    regularExpressionsParser = new RegularExpressionsParser();
    +  }
    +
    +  @Test
    +  public void testSSHDParse() throws Exception {
    +    String message =
    +        "<38>Jun 20 15:01:17 deviceName sshd[11672]: Accepted publickey 
for prod from 22.22.22.22 port 55555 ssh2";
    +
    +    parserConfig = getJsonConfig(
    +        
Paths.get("src/test/resources/config/RegularExpressionsParserConfig.json").toString());
    --- End diff --
    
    The configuration contained in 
`src/test/resources/config/RegularExpressionsParserConfig.json` is hard to grok 
because it covers so many record types. I would get rid of this JSON file 
completely.  Actually all of the JSONs that you added in 
`src/test/resources/config`.
    
    Instead use the @Multiline annotation along with a more focused 
configuration that precedes each test case.  You don't need 30 different record 
types defined to test SSHD parsing.  Each test case would be preceded with a 
@Multiline annotated field containing the configuration for that test case.  
    
    For example your SSHD test might look-like this.
    
    ```
      /**
       * {
       *        "convertCamelCaseToUnderScore": true,
       *        "messageHeaderRegex": 
"(?<syslogpriority>(?<=^<)\\d{1,4}(?=>)).*?(?<timestampDeviceOriginal>(?<=>)[A-Za-z]{3}\\s{1,2}\\d{1,2}\\s\\d{1,2}:\\d{1,2}:\\d{1,2}(?=\\s)).*?(?<deviceName>(?<=\\s).*?(?=\\s))",
       *        "recordTypeRegex": 
"(?<dstProcessName>(?<=\\s)\\b(tch-replicant|audispd|syslog|ntpd|sendmail|pure-ftpd|usermod|useradd|anacron|unix_chkpwd|sudo|dovecot|postfix\\/smtpd|postfix\\/smtp|postfix\\/qmgr|klnagent|systemd|(?i)crond(?-i)|clamd|kesl|sshd|run-parts|automount|suexec|freshclam|kernel|vsftpd|ftpd|su)\\b(?=\\[|:))",
       *        "fields": [
       *    {
       *                        "recordType": "sshd",
       *                        "regex": [
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=\\]:\\s).*?(?=\\sfor)).*?(?<dstUserId>(?<=\\sfor\\s).*?(?=\\sfrom)).*?(?<ipSrcAddr>(?<=\\sfrom\\s).*?(?=\\sport)).*?(?<ipSrcPort>(?<=\\sport\\s).*?(?=\\s)).*?(?<appProtocol>(?<=port\\s\\d{1,5}\\s).*(?=:\\s)).*?(?<encryptionAlgorithm>(?<=:\\s).+?(?=\\s)).*(?<correlationId>(?<=\\s).+?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=\\]:\\s).*?(?=\\sfor)).*?(?<dstUserId>(?<=\\sfor\\s).*?(?=\\sfrom)).*?(?<ipSrcAddr>(?<=\\sfrom\\s).*?(?=\\sport)).*?(?<ipSrcPort>(?<=\\sport\\s).*?(?=\\s)).*?(?<appProtocol>(?<=port\\s\\d{1,5}\\s).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<ipDstAddr>(?<=Remote:).*?(?=\\-)).*?(?<ipDstPort>(?<=\\-).*?(?=;)).*?(?<appProtocol>(?<=Protocol:).*?(?=;)).*?(?<sshClient>(?<=Client:).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<appProtocol>(?<=\\]:).*?(?=:)).*?(?<ipDstAddr>(?<=Remote:).*?(?=\\-)).*?(?<ipDstPort>(?<=\\-).*?(?=;)).*?(?<encryptionAlgorithm>(?<=Enc:\\s).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<ipDstAddr>(?<=Remote:).*?(?=\\-)).*?(?<ipDstPort>(?<=\\-).*?(?=;)).*?(?<encryptionAlgorithm>(?<=Enc:\\s).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=:).*?(?=for)).*?(?<dstUserId>(?<=for).*?(?=from)).*?(?<ipSrcAddr>(?<=from).*?(?=port)).*?(?<ipSrcPort>(?<=port).*?(?=\\s)).*?(?<appProtocol>(?<=\\s).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\]))]:\\s.*?(?<eventInfo>subsystem.*?(?=by\\suser)).*?(?<srcUserId>(?<=user).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<action>(?<=Received).*?(?=from)).*?(?<ipSrcAddr>(?<=from).*?(?=:)).*?(?<eventInfo>(?<=11:).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=\\]:\\s)Server\\slistening(?=\\s)).*?(?<ipSrcAddr>(?<=\\son\\s).*?(?=port)).*?(?<ipSrcPort>(?<=port\\s)\\d{1,6}(?=\\.)).*$",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=\\]:\\s)Invalid 
user(?=\\s)).*?(?<dstUserId>(?<=\\s).*?(?=from)).*?(?<ipSrcAddr>(?<=from\\s).*(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=]:\\s)).*(?<subProcess>(?<=]:\\s).*\\)(?=:)).*(?<eventInfo>(?<=:\\s).*(?=;)).*(?<logname>(?<=logname=).*?(?=\\s)).*(?<dstUserId>(?<=uid=).*?(?=\\s)).*(?<effectiveUserId>(?<=euid=).*?(?=\\s)).*(?<sessionName>(?<=tty=).*?(?=\\s)).*(?<srcUserId>(?<=ruser=).*?(?=\\s)).*(?<ipSrcAddr>(?<=rhost=).*?(?=\\s)).*(?<userId>(?<=user=).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=]:\\s)).*(?<eventInfo>(?<=:\\s).*(?=;)).*(?<logname>(?<=logname=).*?(?=\\s)).*(?<dstUserId>(?<=uid=).*?(?=\\s)).*(?<effectiveUserId>(?<=euid=).*?(?=\\s)).*(?<sessionName>(?<=tty=).*?(?=\\s)).*(?<srcUserId>(?<=ruser=).*?(?=\\s)).*(?<ipSrcAddr>(?<=rhost=).*?(?=\\s)).*(?<userId>(?<=user=).*?(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=\\]:\\s).*?(?=for)).*?(?<dstUserId>(?<=\\sfor).*?(?=\\[)).*?(?<subProcess>(?<=\\[).*?(?=\\])).*$",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=:\\s)Excess 
permission or bad ownership on 
file(?=\\s\\/)).*?(?<filePath>(?<=\\s).*(?=\\/)).*?(?<fileName>(?<=\\/).*(?=$))",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=:).*?(?=;)).*$",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=:).*?(?=\\d)).*$",
       *                                
".*(?<dstProcessId>(?<=\\[).*?(?=\\])).*?(?<eventInfo>(?<=:).*?(?=$))"
       *                        ]
       *    }
       *   ]
       * }
       */
      @Multiline
      private String testSSHDParse;
    
      @Test
      public void testSSHDParse() throws Exception {
        String message = "<38>Jun 20 15:01:17 deviceName sshd[11672]: Accepted 
publickey for prod from 22.22.22.22 port 55555 ssh2";
    
        regularExpressionsParser.configure(toJSON(testSSHDParse));
        JSONObject parsed = parse(message);
        // Expected
       ...
      }
    
    ```
    
    This will make it much easier to grok the test cases. We do this in other 
[parts of the code 
base](https://github.com/apache/metron/blob/89a2beda4f07911c8b3cd7dee8a2c3426838d161/metron-analytics/metron-profiler-storm/src/test/java/org/apache/metron/profiler/storm/integration/ProfilerIntegrationTest.java#L151).


---

Reply via email to