This is an automated email from the ASF dual-hosted git repository. sgoeschl pushed a commit to branch FREEMARKER-181 in repository https://gitbox.apache.org/repos/asf/freemarker-generator.git
commit 1dcbe56cd621a89e6b9657c7475a9bed198add0e Author: Siegfried Goeschl <[email protected]> AuthorDate: Wed May 19 23:09:58 2021 +0200 FREEMARKER-181 Support custom pattern definitions for Grok tool --- freemarker-generator-cli/CHANGELOG.md | 3 + .../src/app/examples/data/logs/server.01.log | 14 +++ .../src/app/examples/data/logs/server.02.log | 13 +++ .../templates/logs/csv/serverlog-to-csv.ftl | 50 +++++++++ .../src/app/scripts/run-examples.bat | 3 + .../src/app/scripts/run-examples.sh | 3 + .../site/markdown/cli/usage/parsing-with-grok.md | 118 +++++++++++++++++++++ .../site/markdown/cli/usage/running-examples.md | 65 +----------- .../src/site/markdown/index.md | 1 + .../freemarker/generator/tools/grok/GrokTool.java | 46 +++++++- .../generator/tools/grok/impl/GrokWrapper.java | 3 + .../generator/tools/grok/GrokToolTest.java | 29 ++++- 12 files changed, 283 insertions(+), 65 deletions(-) diff --git a/freemarker-generator-cli/CHANGELOG.md b/freemarker-generator-cli/CHANGELOG.md index 3cbf959..13055af 100644 --- a/freemarker-generator-cli/CHANGELOG.md +++ b/freemarker-generator-cli/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. We try to a ## 0.1.0-SNAPSHOT ### Added +* [FREEMARKER-181] Support custom pattern definitions for Grok tool * Parse a list of `DataSources` for the various tools * [FREEMARKER-161] Allow multiple transformations on the CLI * [FREEMARKER-163] Integrate Java Faker library for test data generation @@ -78,3 +79,5 @@ All notable changes to this project will be documented in this file. We try to a [FREEMARKER-174]: https://issues.apache.org/jira/browse/FREEMARKER-174 [FREEMARKER-175]: https://issues.apache.org/jira/browse/FREEMARKER-175 [FREEMARKER-176]: https://issues.apache.org/jira/browse/FREEMARKER-176 +[FREEMARKER-181]: https://issues.apache.org/jira/browse/FREEMARKER-181 + diff --git a/freemarker-generator-cli/src/app/examples/data/logs/server.01.log b/freemarker-generator-cli/src/app/examples/data/logs/server.01.log new file mode 100644 index 0000000..c1edb4b --- /dev/null +++ b/freemarker-generator-cli/src/app/examples/data/logs/server.01.log @@ -0,0 +1,14 @@ +2021-05-18 20:00:32,120 INFO [aa.bb.ccc] (Thread-98) message response handled in: 1 ms; message counter: 2047; total message counter: 7093 +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) FinancialAdviceServiceImpl.createPostingLines() : 123456789012345678 +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) FinancialAdviceDAO.getCountryCode() countryCode : 40 +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) FinancialAdviceServiceImpl.createPostingLines() : 123456789012345678 +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) AuthorizationDAO.insertAuthorization() Start ... +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) FinancialAdviceServiceImpl.processFinancialAdvice() successfully insert before AUTHORIZATIONS: true +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) AuthorizationDAO.insertInterface() Start ... +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) FinancialAdviceServiceImpl.processFinancialAdvice() successfully inserted into DB: true +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) responding message 1230 data +2021-05-18 20:00:32,124 INFO [aa.bb.ccc] (Thread-99) no remote configuration available, will create local context +2021-05-18 20:00:32,140 INFO [aa.bb.ccc] (Thread-99) writing bytes of message 1230 +2021-05-18 20:00:32,140 INFO [aa.bb.ccc] (Thread-99) message response handled in: 62 ms; message counter: 2048; total message counter: 7094 +2021-05-18 20:00:32,187 INFO [aa.bb.ccc] (Thread-99) no remote configuration available, will create local context +2021-05-18 20:00:32,187 INFO [aa.bb.ccc] (Thread-99) no remote configuration available, will create local context \ No newline at end of file diff --git a/freemarker-generator-cli/src/app/examples/data/logs/server.02.log b/freemarker-generator-cli/src/app/examples/data/logs/server.02.log new file mode 100644 index 0000000..e603015 --- /dev/null +++ b/freemarker-generator-cli/src/app/examples/data/logs/server.02.log @@ -0,0 +1,13 @@ +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) FinancialAdviceServiceImpl.createPostingLines() : 123456789012345678 +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) FinancialAdviceDAO.getCountryCode() countryCode : 40 +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) FinancialAdviceServiceImpl.createPostingLines() : 123456789012345678 +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) AuthorizationDAO.insertAuthorization() Start ... +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) FinancialAdviceServiceImpl.processFinancialAdvice() successfully insert before AUTHORIZATIONS: true +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) AuthorizationDAO.insertInterface() Start ... +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) FinancialAdviceServiceImpl.processFinancialAdvice() successfully inserted into DB: true +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) responding message 1230 data +2021-05-18 21:00:32,124 INFO [aa.bb.ccc] (Thread-97) no remote configuration available, will create local context +2021-05-18 21:00:32,140 INFO [aa.bb.ccc] (Thread-97) writing bytes of message 1230 +2021-05-18 21:00:32,140 INFO [aa.bb.ccc] (Thread-97) message response handled in: 162 ms; message counter: 2049; total message counter: 7095 +2021-05-18 21:00:32,187 INFO [aa.bb.ccc] (Thread-97) no remote configuration available, will create local context +2021-05-18 21:00:32,187 INFO [aa.bb.ccc] (Thread-97) no remote configuration available, will create local context \ No newline at end of file diff --git a/freemarker-generator-cli/src/app/examples/templates/logs/csv/serverlog-to-csv.ftl b/freemarker-generator-cli/src/app/examples/templates/logs/csv/serverlog-to-csv.ftl new file mode 100644 index 0000000..a8e9752 --- /dev/null +++ b/freemarker-generator-cli/src/app/examples/templates/logs/csv/serverlog-to-csv.ftl @@ -0,0 +1,50 @@ +<#ftl output_format="plainText" strip_whitespace=true> +<#-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<#-- + Define custom grok pattern as map to match something like using "MY_SERVERLOG" + 2019-05-17 20:00:32,140 INFO [xx.yyy.zzzz] (Thread-99) message response handled in: 62 ms; message counter: 2048; total message counter: 7094 +--> +<#assign patternDefinitions = { +"MY_DATE": "%{YEAR}-%{MONTHNUM}-%{MONTHDAY}", +"MY_TIMESTAMP": "%{MY_DATE:date} %{TIME:time},%{INT:millis}", +"MY_MODULE": "\\[%{NOTSPACE}\\]", +"MY_THREAD": "\\(%{NOTSPACE}\\)", +"MY_SERVERLOG": "%{MY_TIMESTAMP} %{LOGLEVEL}%{SPACE:UNWANTED}%{MY_MODULE} %{MY_THREAD} message response handled in: %{INT:response_time} ms; %{GREEDYDATA:UNWANTED}" +}> + +<#-- Instantiante the grok tool --> +<#assign grok = tools.grok.compile("%{MY_SERVERLOG}", patternDefinitions)> + +<#-- Iterate over all data sources and convert matching lines to CSV output --> +<#compress> + TIMESTAMP;MILLIS + <#if dataSources?has_content> + <#list dataSources?values as dataSource> + <#list dataSource.getLineIterator() as line> + <#assign parts = grok.match(line)> + <#if parts?has_content> + <#-- Skip all response times less than 5 ms because these are boring pings --> + <#if parts.response_time?number gt 5> + ${parts.date}T${parts.time}.${parts.millis}+02:00;${parts.response_time} + </#if> + </#if> + </#list> + </#list> + </#if> +</#compress> \ No newline at end of file diff --git a/freemarker-generator-cli/src/app/scripts/run-examples.bat b/freemarker-generator-cli/src/app/scripts/run-examples.bat index 7ca035e..6512dca 100644 --- a/freemarker-generator-cli/src/app/scripts/run-examples.bat +++ b/freemarker-generator-cli/src/app/scripts/run-examples.bat @@ -105,6 +105,9 @@ REM ========================================================================= echo "examples\templates\accesslog\combined-access.ftl" %FREEMARKER_CMD% -t examples\templates\accesslog\combined-access.ftl examples\data\accesslog\combined-access.log > target\out\combined-access.log.txt +echo "examples\templates\logs\csv\serverlog-to-csv.ftl" +%FREEMARKER_CMD% -t examples\templates\logs\csv\serverlog-to-csv.ftl examples\data\logs > target\out\server.log.csv + REM ========================================================================= REM Excel REM ========================================================================= diff --git a/freemarker-generator-cli/src/app/scripts/run-examples.sh b/freemarker-generator-cli/src/app/scripts/run-examples.sh index a0d316c..09d09af 100755 --- a/freemarker-generator-cli/src/app/scripts/run-examples.sh +++ b/freemarker-generator-cli/src/app/scripts/run-examples.sh @@ -125,6 +125,9 @@ $FREEMARKER_CMD -PCSV_SOURCE_DELIMITER=SEMICOLON -PCSV_SOURCE_WITH_HEADER=true - echo "examples/templates/accesslog/combined-access.ftl" $FREEMARKER_CMD -t examples/templates/accesslog/combined-access.ftl examples/data/accesslog/combined-access.log > target/out/combined-access.log.txt || { echo >&2 "Test failed. Aborting."; exit 1; } +echo "examples/templates/logs/csv/serverlog-to-csv.ftl" +$FREEMARKER_CMD -t examples/templates/logs/csv/serverlog-to-csv.ftl examples/data/logs > target/out/server.log.csv || { echo >&2 "Test failed. Aborting."; exit 1; } + ############################################################################# # Excel ############################################################################# diff --git a/freemarker-generator-cli/src/site/markdown/cli/usage/parsing-with-grok.md b/freemarker-generator-cli/src/site/markdown/cli/usage/parsing-with-grok.md new file mode 100644 index 0000000..b758527 --- /dev/null +++ b/freemarker-generator-cli/src/site/markdown/cli/usage/parsing-with-grok.md @@ -0,0 +1,118 @@ +## Parsing With Grok + +### Unleashing The Power Of Grok + +Think of `Grok` as modular regular expressions with a pre-defined functionality to parse access logs or any other data where you can't comprehend the regular expression any longer, one very simple example is `QUOTEDSTRING` + +``` +QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) +``` + +And with `Grok` the `QUOTEDSTRING` is just a building block for an even more complex regular expression such as `COMBINEDAPACHELOG` + +> bin/freemarker-generator -t examples/templates/accesslog/combined-access.ftl examples/data/accesslog/combined-access.log + +which gives you the following output + +``` +TIMESTAMP;VERB;REQUEST;HTTPVERSION +19/Jun/2005:06:44:17 +0200;GET;/wximages/wxwidgets02-small.png;1.1 +19/Jun/2005:06:46:05 +0200;GET;/wximages/wxwidgets02-small.png;1.1 +19/Jun/2005:06:47:37 +0200;GET;/wximages/wxwidgets02-small.png;1.1 +19/Jun/2005:06:48:40 +0200;GET;/wiki.pl?WxWidgets_Bounties;1.1 +19/Jun/2005:06:50:49 +0200;GET;/wiki.pl?WxWidgets_Compared_To_Other_Toolkits;1.1 +19/Jun/2005:06:50:49 +0200;GET;/wxwiki.css;1.1 +19/Jun/2005:06:50:49 +0200;GET;/wximages/wxwidgets02-small.png;1.1 +19/Jun/2005:06:50:50 +0200;GET;/favicon.ico;1.1 +19/Jun/2005:06:52:36 +0200;GET;/wximages/wxwidgets02-small.png;1.1 +19/Jun/2005:06:53:14 +0200;GET;/;1.0 +``` + +using the following FreeMarker template + +```text +<#ftl output_format="plainText" strip_whitespace=true> +<#assign grok = tools.grok.compile("%{COMBINEDAPACHELOG}")> +<#assign dataSource = dataSources?values[0]> +<#assign lines = dataSource.getLineIterator()> + +<#compress> + TIMESTAMP;VERB;REQUEST;HTTPVERSION + <#list lines as line> + <#assign parts = grok.match(line)> + <#assign timestamp = parts["timestamp"]> + <#assign verb = parts["verb"]> + <#assign request = parts["request"]> + <#assign httpversion = parts["httpversion"]> + ${timestamp};${verb};${request};${httpversion} + </#list> +</#compress> +``` + +While this looks small and tidy there are some nifty features + +* `tools.grok.compile("%{COMBINEDAPACHELOG}")` builds the `Grok` instance to parse access logs in `Combined Format` +* The data source is streamed line by line and not loaded into memory in one piece +* This also works for using `stdin` so are able to parse GB of access log or other files + +### Parse Server Log File And Generate CSV + +A more practical example consists of parsing dozens of server logs files to determine response time of message processing, e.g. + +``` +2021-05-18 20:00:32,140 INFO [aa.bb.ccc] (Thread-99) message response handled in: 62 ms; message counter: 2048; total message counter: 7094 +``` + +In technical terms the FTL + +* Defines custom Grok pattern definitions +* Compiles the Grok expression `MY_SERVERLOG` +* Reads through all data sources passed on the command line +* Use Grok to match lines and extract the timestamp and response times +* Skip all execution times less than 5 ms because these are `pings` we are not interested in +* Creates a simple CSV file + +``` +<#ftl output_format="plainText" strip_whitespace=true> +<#-- + Define custom grok pattern as map to match something like using "MY_SERVERLOG" + 2019-05-17 20:00:32,140 INFO [xx.yyy.zzzz] (Thread-99) message response handled in: 62 ms; message counter: 2048; total message counter: 7094 +--> +<#assign patternDefinitions = { +"MY_DATE": "%{YEAR}-%{MONTHNUM}-%{MONTHDAY}", +"MY_TIMESTAMP": "%{MY_DATE:date} %{TIME:time},%{INT:millis}", +"MY_MODULE": "\\[%{NOTSPACE}\\]", +"MY_THREAD": "\\(%{NOTSPACE}\\)", +"MY_SERVERLOG": "%{MY_TIMESTAMP} %{LOGLEVEL}%{SPACE:UNWANTED}%{MY_MODULE} %{MY_THREAD} message response handled in: %{INT:response_time} ms; %{GREEDYDATA:UNWANTED}" +}> + +<#-- Instantiante the grok tool --> +<#assign grok = tools.grok.compile("%{MY_SERVERLOG}", patternDefinitions)> + +<#-- Iterate over all data sources and convert matching lines to CSV output --> +<#compress> + TIMESTAMP;MILLIS + <#if dataSources?has_content> + <#list dataSources?values as dataSource> + <#list dataSource.getLineIterator() as line> + <#assign parts = grok.match(line)> + <#if parts?has_content> + <#-- Skip all response times less than 5 ms because these are boring pings --> + <#if parts.response_time?number gt 5> + ${parts.date}T${parts.time}.${parts.millis}+02:00;${parts.response_time} + </#if> + </#if> + </#list> + </#list> + </#if> +</#compress> +``` +Executing the FTL yields the following output + +``` +> bin/freemarker-generator -t examples/templates/logs/csv/serverlog-to-csv.ftl examples/data/logs; echo +TIMESTAMP;MILLIS +2021-05-18T20:00:32.140;62 +2021-05-18T21:00:32.140;162 +``` + diff --git a/freemarker-generator-cli/src/site/markdown/cli/usage/running-examples.md b/freemarker-generator-cli/src/site/markdown/cli/usage/running-examples.md index d13b2f2..e4cc875 100644 --- a/freemarker-generator-cli/src/site/markdown/cli/usage/running-examples.md +++ b/freemarker-generator-cli/src/site/markdown/cli/usage/running-examples.md @@ -649,62 +649,7 @@ time,user,status,duration,size 2019-09-27T21:02:54,DDDDDDD,200,0.528268,206 ``` -### 10. Unleashing The Power Of Grok - -Think of `Grok` as modular regular expressions with a pre-defined functionality to parse access logs or any other data where you can't comprehend the regular expression any longer, one very simple example is `QUOTEDSTRING` - -``` -QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) -``` - -And with `Grok` the `QUOTEDSTRING` is just a building block for an even more complex regular expression such as `COMBINEDAPACHELOG` - -> bin/freemarker-generator -t examples/templates/accesslog/combined-access.ftl examples/data/accesslog/combined-access.log - -which gives you the following output - -``` -TIMESTAMP;VERB;REQUEST;HTTPVERSION -19/Jun/2005:06:44:17 +0200;GET;/wximages/wxwidgets02-small.png;1.1 -19/Jun/2005:06:46:05 +0200;GET;/wximages/wxwidgets02-small.png;1.1 -19/Jun/2005:06:47:37 +0200;GET;/wximages/wxwidgets02-small.png;1.1 -19/Jun/2005:06:48:40 +0200;GET;/wiki.pl?WxWidgets_Bounties;1.1 -19/Jun/2005:06:50:49 +0200;GET;/wiki.pl?WxWidgets_Compared_To_Other_Toolkits;1.1 -19/Jun/2005:06:50:49 +0200;GET;/wxwiki.css;1.1 -19/Jun/2005:06:50:49 +0200;GET;/wximages/wxwidgets02-small.png;1.1 -19/Jun/2005:06:50:50 +0200;GET;/favicon.ico;1.1 -19/Jun/2005:06:52:36 +0200;GET;/wximages/wxwidgets02-small.png;1.1 -19/Jun/2005:06:53:14 +0200;GET;/;1.0 -``` - -using the following FreeMarker template - -```text -<#ftl output_format="plainText" strip_whitespace=true> -<#assign grok = tools.grok.compile("%{COMBINEDAPACHELOG}")> -<#assign dataSource = dataSources?values[0]> -<#assign lines = dataSource.getLineIterator()> - -<#compress> - TIMESTAMP;VERB;REQUEST;HTTPVERSION - <#list lines as line> - <#assign parts = grok.match(line)> - <#assign timestamp = parts["timestamp"]> - <#assign verb = parts["verb"]> - <#assign request = parts["request"]> - <#assign httpversion = parts["httpversion"]> - ${timestamp};${verb};${request};${httpversion} - </#list> -</#compress> -``` - -While this looks small and tidy there are some nifty features - -* `tools.grok.compile("%{COMBINEDAPACHELOG}")` builds the `Grok` instance to parse access logs in `Combined Format` -* The data source is streamed line by line and not loaded into memory in one piece -* This also works for using `stdin` so are able to parse GB of access log or other files - -### 11. Executing Arbitrary Commands +### 10. Executing Arbitrary Commands Using Apache Commons Exec allows to execute arbitrary commands - nice but dangerous. It was recently quite useful to to invoke AWS CLI to generate a Confluence page about the overall setup of our AWS accounts. @@ -760,7 +705,7 @@ h3. AWS EC2 Instance </#macro> ``` -## 12. Interactive Templates +## 11. Interactive Templates Sometime you need to apply a CSS, JSON or XPath query in ad ad-hoc way without installing `xmllint`, `jq` or `pup` - in this case you can pass a FreeMarker template in an interactive fashion @@ -788,7 +733,7 @@ SHELL ==> /bin/bash EDITOR ==> vi ``` -## 13. Filtering & Transforming CSV +## 12. Filtering & Transforming CSV During an integration project we imported large transactions CSV files (500.000+ records) and in case of import failures the developers would be happy to get a nice outline of the transactions causing the problem (the CSV records have 60+ columns) - in essence it is filtering (based on some primary key) and and transforming into a human-readable output format (Markdown). @@ -884,7 +829,7 @@ yields | Total Profit | 498855.44 | ``` -### 14. Converting Between JSON And YAML +### 13. Converting Between JSON And YAML Sometimes we simply need to transform a JSON into an equivalent YAML or the other way around @@ -898,7 +843,7 @@ Sometimes we simply need to transform a JSON into an equivalent YAML or the othe > freemarker-generator -i '${tools.yaml.toYaml(json)}' -m > json=examples/data/json/swagger-spec.json ``` -### 15. Using Advanced FreeMarker Features +### 14. Using Advanced FreeMarker Features There is a `demo.ftl` which shows some advanced FreeMarker functionality diff --git a/freemarker-generator-cli/src/site/markdown/index.md b/freemarker-generator-cli/src/site/markdown/index.md index e6cde71..9370911 100644 --- a/freemarker-generator-cli/src/site/markdown/index.md +++ b/freemarker-generator-cli/src/site/markdown/index.md @@ -31,6 +31,7 @@ * [Using DataFrames](cli/usage/using-dataframes.html) * [Transforming CSV](cli/usage/transforming-csv.html) * [Generating Test Data](cli/usage/generating-testdata.html) +* [Parsing with Grok](cli/usage/parsing-with-grok.html) ### Advanced Topics diff --git a/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/grok/GrokTool.java b/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/grok/GrokTool.java index 0beab72..207c54d 100644 --- a/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/grok/GrokTool.java +++ b/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/grok/GrokTool.java @@ -20,14 +20,40 @@ import io.krakens.grok.api.Grok; import io.krakens.grok.api.GrokCompiler; import org.apache.freemarker.generator.tools.grok.impl.GrokWrapper; +import java.util.Map; + public class GrokTool { - private static final String DEFAULT_PATTERN = "/patterns/patterns"; + private static final String DEFAULT_PATTERN_FILE = "/patterns/patterns"; + /** + * Compile the Grok pattern. + * + * @param pattern Grok pattern to compile + * @return Grok wrapper + */ public GrokWrapper compile(String pattern) { - return compile(DEFAULT_PATTERN, pattern); + return compile(DEFAULT_PATTERN_FILE, pattern); + } + + /** + * Compile the Grok pattern. + * + * @param pattern Grok pattern to compile + * @param patternDefinitions custom patterns to be registered + * @return Grok wrapper + */ + public GrokWrapper compile(String pattern, Map<String, String> patternDefinitions) { + return compile(DEFAULT_PATTERN_FILE, pattern, patternDefinitions); } + /** + * Compile the Grok pattern. + * + * @param path classpath file for default patterns to register + * @param pattern Grok pattern to compile + * @return Grok wrapper + */ public GrokWrapper compile(String path, String pattern) { final GrokCompiler grokCompiler = GrokCompiler.newInstance(); grokCompiler.registerPatternFromClasspath(path); @@ -35,6 +61,22 @@ public class GrokTool { return new GrokWrapper(grok); } + /** + * Compile the Grok pattern. + * + * @param path classpath file for default patterns to register + * @param pattern Grok pattern to compile + * @param patternDefinitions custom patterns to be registered + * @return Grok wrapper + */ + public GrokWrapper compile(String path, String pattern, Map<String, String> patternDefinitions) { + final GrokCompiler grokCompiler = GrokCompiler.newInstance(); + grokCompiler.registerPatternFromClasspath(path); + grokCompiler.register(patternDefinitions); + final Grok grok = grokCompiler.compile(pattern); + return new GrokWrapper(grok); + } + @Override public String toString() { return "Process text files using Grok expressions (see https://github.com/thekrakken/java-grok)"; diff --git a/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/grok/impl/GrokWrapper.java b/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/grok/impl/GrokWrapper.java index 626a94f..0589469 100644 --- a/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/grok/impl/GrokWrapper.java +++ b/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/grok/impl/GrokWrapper.java @@ -24,6 +24,9 @@ import java.util.Map; import static java.util.Objects.requireNonNull; +/** + * Convenience wrapper around Grok instance. + */ public class GrokWrapper { private final Grok grok; diff --git a/freemarker-generator-tools/src/test/java/org/apache/freemarker/generator/tools/grok/GrokToolTest.java b/freemarker-generator-tools/src/test/java/org/apache/freemarker/generator/tools/grok/GrokToolTest.java index 30c0ed7..816654b 100644 --- a/freemarker-generator-tools/src/test/java/org/apache/freemarker/generator/tools/grok/GrokToolTest.java +++ b/freemarker-generator-tools/src/test/java/org/apache/freemarker/generator/tools/grok/GrokToolTest.java @@ -20,18 +20,19 @@ import org.apache.freemarker.generator.tools.grok.impl.GrokWrapper; import org.junit.Test; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.junit.Assert.assertEquals; public class GrokToolTest { - private static final String LOG = "112.169.19.192 - - [06/Mar/2013:01:36:30 +0900] \"GET / HTTP/1.1\" 200 44346 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.152 Safari/537.22\""; - @Test public void shallParseCombinedAccessLog() { + final String line = "112.169.19.192 - - [06/Mar/2013:01:36:30 +0900] \"GET / HTTP/1.1\" 200 44346 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.152 Safari/537.22\""; final GrokWrapper grok = grokTool().compile("%{COMBINEDAPACHELOG}"); - final Map<String, Object> map = grok.match(LOG); + final Map<String, Object> map = grok.match(line); assertEquals("GET", map.get("verb")); assertEquals("06/Mar/2013:01:36:30 +0900", map.get("timestamp")); @@ -40,6 +41,28 @@ public class GrokToolTest { assertEquals("1.1", map.get("httpversion")); } + @Test + public void shallParseServerLogWithCustomPatternDefinitions() { + final String line = "2019-05-17 20:00:32,140 INFO [xx.yyy.zzzz] (Thread-99) message response handled in: 62 ms; message counter: 2048; total message counter: 7094"; + Map<String, String> patternDefinitions = Stream.of(new String[][] { + { "MY_DATE", "%{YEAR}-%{MONTHNUM}-%{MONTHDAY}" }, + { "MY_TIMESTAMP", "%{MY_DATE:date} %{TIME:time},%{INT:millis}" }, + { "MY_MODULE", "\\[%{NOTSPACE}\\]" }, + { "MY_THREAD", "\\(%{NOTSPACE}\\)" }, + { "MY_SERVERLOG", "%{MY_TIMESTAMP} %{LOGLEVEL}%{SPACE:UNWANTED}%{MY_MODULE} %{MY_THREAD} message response handled in: %{INT:response_time} ms; %{GREEDYDATA:UNWANTED}" }, + }).collect(Collectors.toMap(data -> data[0], data -> data[1])); + + final GrokWrapper grok = grokTool().compile("%{MY_SERVERLOG}", patternDefinitions); + + final Map<String, Object> map = grok.match(line); + + assertEquals(16, map.size()); + assertEquals("2019-05-17", map.get("date")); + assertEquals("20:00:32", map.get("time")); + assertEquals("140", map.get("millis")); + assertEquals("62", map.get("response_time")); + } + private GrokTool grokTool() { return new GrokTool(); }
