Gage has uploaded a new change for review.
https://gerrit.wikimedia.org/r/168941
Change subject: logstash: hadoop: syntax #3
......................................................................
logstash: hadoop: syntax #3
* rearrange to rename short_message to message first
* lower case field names
* split apart to isolate parse failure
Change-Id: Ica640a6bcec9f9bd59f7e15308b49c255fdd5f48
---
M files/logstash/filter-gelf.conf
1 file changed, 48 insertions(+), 25 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/41/168941/1
diff --git a/files/logstash/filter-gelf.conf b/files/logstash/filter-gelf.conf
index 1e46da1..19e0f71 100644
--- a/files/logstash/filter-gelf.conf
+++ b/files/logstash/filter-gelf.conf
@@ -1,5 +1,18 @@
filter {
if [type] == "gelf" {
+ mutate {
+ # tag gelf messages for storage in elasticsearch
+ add_tag => [ "es", "gelf" ]
+ }
+ mutate {
+ # move field names to what we're expecting
+ replace => [ "type", "%{facility}" ]
+ replace => [ "message", "%{short_message}" ]
+ }
+ mutate {
+ # get rid of fields we moved
+ remove_field => [ "facility", "short_message" ]
+ }
if [facility] == "Hadoop" {
mutate {
replace => [ "channel", "%{SourceSimpleClassName}" ]
@@ -15,36 +28,46 @@
match => [ "Thread",
"(?<Thread>CacheReplicationMonitor)\(%{NUMBER:CacheReplicationMonitor_id}\)" ]
overwrite => [ "Thread" ]
}
- grok {
- # so that we can search by job id and find tasks and attempts:
- # extract attempt ID to field: attempt_1409078537822_52431_m_000009_1,
attempt_1409078537822_55176_r_000000_0
- match => [ "message",
"attempt_(?<Attempt_id>[0-9]+_[0-9]+_[mr]_[0-9]+_[0-9]+)" ]
- # extract task ID to field: task_1409078537822_52431_m_000044
- match => [ "message",
"task_(?<Task_id>[0-9]+_[0-9]+_[mr]_[0-9]+)" ]
- # extract job ID to field: job_1409078537822_52431
- match => [ "message", "job_(?<Job_id>[0-9]+_[0-9]+)" ]
- # infer task ID from attempt ID
- match => [ "Attempt_id",
"(?<Task_id>[0-9]+_[0-9]+_[mr]_[0-9]+)_[0-9]+" ]
- # infer job ID from task ID
- match => [ "Task_id", "(?<Job_id>[0-9]+_[0-9]+)_[mr]_[0-9]+" ]
+ if [message] =~ /attempt_/ {
+ grok {
+ # so that we can search by job id and find tasks and attempts:
+ # extract attempt ID to field:
attempt_1409078537822_52431_m_000009_1, attempt_1409078537822_55176_r_000000_0
+ match => [ "message",
"attempt_(?<attempt_id>[0-9]+_[0-9]+_[mr]_[0-9]+_[0-9]+)" ]
+ tag_on_failure => ["_grokparsefailure_attempt"]
+ }
+ }
+ if [message] =~ /task_/ {
+ grok {
+ # extract task ID to field: task_1409078537822_52431_m_000044
+ match => [ "message",
"task_(?<task_id>[0-9]+_[0-9]+_[mr]_[0-9]+)" ]
+ tag_on_failure => ["_grokparsefailure_task"]
+ }
+ }
+ if [message] =~ /job_/ {
+ grok {
+ # extract job ID to field: job_1409078537822_52431
+ match => [ "message", "job_(?<job_id>[0-9]+_[0-9]+)" ]
+ tag_on_failure => ["_grokparsefailure_job"]
+ }
+ }
+ if [task_id] != "" {
+ grok {
+ # infer task ID from attempt ID
+ match => [ "attempt_id",
"(?<task_id>[0-9]+_[0-9]+_[mr]_[0-9]+)_[0-9]+" ]
+ tag_on_failure => ["_grokparsefailure_infer_task"]
+ }
+ }
+ if [job_id] != "" {
+ grok {
+ # infer job ID from task ID
+ match => [ "task_id", "(?<job_id>[0-9]+_[0-9]+)_[mr]_[0-9]+" ]
+ tag_on_failure => ["_grokparsefailure_infer_job"]
+ }
}
dns {
reverse => [ "host" ]
action => "replace"
}
- }
- mutate {
- # tag gelf messages for storage in elasticsearch
- add_tag => [ "es", "gelf" ]
- }
- mutate {
- # move field names to what we're expecting
- replace => [ "type", "%{facility}" ]
- replace => [ "message", "%{short_message}" ]
- }
- mutate {
- # get rid of fields we moved
- remove_field => [ "facility", "short_message" ]
}
prune {
# get rid of a whole bunch of useless _ fields that are duplicated
--
To view, visit https://gerrit.wikimedia.org/r/168941
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ica640a6bcec9f9bd59f7e15308b49c255fdd5f48
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Gage <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits