Gage has submitted this change and it was merged.

Change subject: logstash: hadoop: syntax #3
......................................................................


logstash: hadoop: syntax #3

 * rearrange to rename short_message to message first
 * lower case field names
 * split apart to isolate parse failure

Change-Id: Ica640a6bcec9f9bd59f7e15308b49c255fdd5f48
---
M files/logstash/filter-gelf.conf
1 file changed, 48 insertions(+), 25 deletions(-)

Approvals:
  Gage: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/files/logstash/filter-gelf.conf b/files/logstash/filter-gelf.conf
index 1e46da1..19e0f71 100644
--- a/files/logstash/filter-gelf.conf
+++ b/files/logstash/filter-gelf.conf
@@ -1,5 +1,18 @@
 filter {
   if [type] == "gelf" {
+    mutate {
+      # tag gelf messages for storage in elasticsearch
+      add_tag => [ "es", "gelf" ]
+    }
+    mutate {
+      # move field names to what we're expecting
+      replace => [ "type", "%{facility}" ]
+      replace => [ "message", "%{short_message}" ]
+    }
+    mutate {
+      # get rid of fields we moved
+      remove_field => [ "facility", "short_message" ]
+    }
     if [facility] == "Hadoop" {
       mutate {
         replace => [ "channel", "%{SourceSimpleClassName}" ]
@@ -15,36 +28,46 @@
         match => [ "Thread", 
"(?<Thread>CacheReplicationMonitor)\(%{NUMBER:CacheReplicationMonitor_id}\)" ]
         overwrite => [ "Thread" ]
       }
-      grok {
-        # so that we can search by job id and find tasks and attempts:
-        # extract attempt ID to field: attempt_1409078537822_52431_m_000009_1, 
attempt_1409078537822_55176_r_000000_0
-        match => [ "message",         
"attempt_(?<Attempt_id>[0-9]+_[0-9]+_[mr]_[0-9]+_[0-9]+)" ]
-        # extract task ID to field:       task_1409078537822_52431_m_000044
-        match => [ "message",            
"task_(?<Task_id>[0-9]+_[0-9]+_[mr]_[0-9]+)" ]
-        # extract job ID to field:         job_1409078537822_52431
-        match => [ "message",             "job_(?<Job_id>[0-9]+_[0-9]+)" ]
-        # infer task ID from attempt ID
-        match => [ "Attempt_id", 
"(?<Task_id>[0-9]+_[0-9]+_[mr]_[0-9]+)_[0-9]+" ]
-        # infer job ID from task ID
-        match => [ "Task_id",    "(?<Job_id>[0-9]+_[0-9]+)_[mr]_[0-9]+" ]
+      if [message] =~ /attempt_/ {
+        grok {
+          # so that we can search by job id and find tasks and attempts:
+          # extract attempt ID to field: 
attempt_1409078537822_52431_m_000009_1, attempt_1409078537822_55176_r_000000_0
+          match => [ "message",         
"attempt_(?<attempt_id>[0-9]+_[0-9]+_[mr]_[0-9]+_[0-9]+)" ]
+          tag_on_failure => ["_grokparsefailure_attempt"]
+        }
+      }
+      if [message] =~ /task_/ {
+        grok {
+          # extract task ID to field:       task_1409078537822_52431_m_000044
+          match => [ "message",            
"task_(?<task_id>[0-9]+_[0-9]+_[mr]_[0-9]+)" ]
+          tag_on_failure => ["_grokparsefailure_task"]
+        }
+      }
+      if [message] =~ /job_/ {
+        grok {
+          # extract job ID to field:         job_1409078537822_52431
+          match => [ "message",             "job_(?<job_id>[0-9]+_[0-9]+)" ]
+          tag_on_failure => ["_grokparsefailure_job"]
+        }
+      }
+      if [task_id] != "" {
+        grok {
+          # infer task ID from attempt ID
+          match => [ "attempt_id", 
"(?<task_id>[0-9]+_[0-9]+_[mr]_[0-9]+)_[0-9]+" ]
+          tag_on_failure => ["_grokparsefailure_infer_task"]
+        }
+      }
+      if [job_id] != "" {
+        grok {
+          # infer job ID from task ID
+          match => [ "task_id",    "(?<job_id>[0-9]+_[0-9]+)_[mr]_[0-9]+" ]
+          tag_on_failure => ["_grokparsefailure_infer_job"]
+        }
       }
       dns {
         reverse => [ "host" ]
         action  => "replace"
       }
-    }
-    mutate {
-      # tag gelf messages for storage in elasticsearch
-      add_tag => [ "es", "gelf" ]
-    }
-    mutate {
-      # move field names to what we're expecting
-      replace => [ "type", "%{facility}" ]
-      replace => [ "message", "%{short_message}" ]
-    }
-    mutate {
-      # get rid of fields we moved
-      remove_field => [ "facility", "short_message" ]
     }
     prune {
       # get rid of a whole bunch of useless _ fields that are duplicated

-- 
To view, visit https://gerrit.wikimedia.org/r/168941
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ica640a6bcec9f9bd59f7e15308b49c255fdd5f48
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Gage <[email protected]>
Gerrit-Reviewer: Gage <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to