This is an automated email from the ASF dual-hosted git repository.
ash pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow-ci-infra.git
The following commit(s) were added to refs/heads/master by this push:
new b1a93d1 Upload job output logs to Cloudwatch too (#8)
b1a93d1 is described below
commit b1a93d123c57c6e8acf55ac8cd8ad751c7dcda4e
Author: Ash Berlin-Taylor <[email protected]>
AuthorDate: Mon Mar 15 14:47:09 2021 +0000
Upload job output logs to Cloudwatch too (#8)
We have some cases where logs aren't being uploaded to Github, which
makes debugging failures hard.
This is a problem with GitHub's hosted runners too, but for self-hosted
runners we can at least do something about it.
---
cloud-init.yml | 60 ++++++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 48 insertions(+), 12 deletions(-)
diff --git a/cloud-init.yml b/cloud-init.yml
index b52aedd..e65b954 100644
--- a/cloud-init.yml
+++ b/cloud-init.yml
@@ -231,6 +231,15 @@ write_files:
type = "journald"
include_units = ["actions.runner.service",
"actions.runner-supervisor.service"]
+ [transforms.without_systemd_fields]
+ type = "remove_fields"
+ inputs = ["logs"]
+ fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP",
+ "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID",
+ "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT",
+ "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "PRIORITY",
+ "source_type"]
+
[sources.runner-logs]
type = "file"
include = ["/home/runner/actions-runner/_diag/*.log"]
@@ -242,23 +251,50 @@ write_files:
timeout_ms = 250
[transforms.grok-runner-logs]
- type = "grok_parser"
+ type = "remap"
inputs=["runner-logs"]
- pattern = "(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level}
%{NOTSPACE:logger}\\] %{GREEDYDATA:message}"
- types.timestamp = "timestamp|%F %TZ"
+ source = '''
+ structured, err = parse_grok(.message,
"(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{NOTSPACE:logger}\\]
%{GREEDYDATA:message}")
+
+ if err != null {
+ .err = err
+ } else {
+ . = merge(., structured)
+ }
+ '''
+ [transforms.filter-runner-logs]
+ type = "filter"
+ inputs = ['grok-runner-logs']
+ condition.type = "remap"
+ condition.source = '''
+ if .logger == "JobServerQueue" {
+ !match!(.message, r'Try to append \d+ batches web console lines
for record')
+ } else if .logger == "HostContext" {
+ !starts_with!(.message, "Well known directory")
+ } else {
+ true
+ }
+ '''
+
+ [sources.job-logs]
+ type = "file"
+ include = ["/home/runner/actions-runner/_diag/pages/*.log"]
- [transforms.without_systemd_fields]
- type = "remove_fields"
- inputs = ["logs"]
- fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP",
- "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID",
- "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT",
- "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "PRIORITY",
- "source_type"]
+ [transforms.grok-job-logs]
+ type = "remap"
+ inputs = ["job-logs"]
+ source = '''
+ structured, err = parse_grok(.message,
"%{TIMESTAMP_ISO8601:timestamp} %{GREEDYDATA:message}")
+
+ if err == null {
+ . = merge(., structured)
+ .type = "job-output"
+ }
+ '''
# Output data
[sinks.cloudwatch]
- inputs = ["without_systemd_fields", "grok-runner-logs"]
+ inputs = ["without_systemd_fields", "filter-runner-logs",
"grok-job-logs"]
type = "aws_cloudwatch_logs"
encoding = "json"
create_missing_group = false