This is an automated email from the ASF dual-hosted git repository. ash pushed a commit to branch upload-task-logs-cloudwatch-too in repository https://gitbox.apache.org/repos/asf/airflow-ci-infra.git
commit e46584050bc8c21178c1254655758f6c99f45e1d Author: Ash Berlin-Taylor <[email protected]> AuthorDate: Mon Mar 15 10:14:45 2021 +0000 Upload job output logs to Cloudwatch too We have some cases where logs aren't being uploaded to Github, which makes debugging failures hard. This is a problem with GitHub's hosted runners too, but for self-hosted runners we can at least do something about it --- cloud-init.yml | 60 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/cloud-init.yml b/cloud-init.yml index b52aedd..e65b954 100644 --- a/cloud-init.yml +++ b/cloud-init.yml @@ -231,6 +231,15 @@ write_files: type = "journald" include_units = ["actions.runner.service", "actions.runner-supervisor.service"] + [transforms.without_systemd_fields] + type = "remove_fields" + inputs = ["logs"] + fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP", + "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID", + "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT", + "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "PRIORITY", + "source_type"] + [sources.runner-logs] type = "file" include = ["/home/runner/actions-runner/_diag/*.log"] @@ -242,23 +251,50 @@ write_files: timeout_ms = 250 [transforms.grok-runner-logs] - type = "grok_parser" + type = "remap" inputs=["runner-logs"] - pattern = "(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{NOTSPACE:logger}\\] %{GREEDYDATA:message}" - types.timestamp = "timestamp|%F %TZ" + source = ''' + structured, err = parse_grok(.message, "(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{NOTSPACE:logger}\\] %{GREEDYDATA:message}") + + if err != null { + .err = err + } else { + . = merge(., structured) + } + ''' + [transforms.filter-runner-logs] + type = "filter" + inputs = ['grok-runner-logs'] + condition.type = "remap" + condition.source = ''' + if .logger == "JobServerQueue" { + !match!(.message, r'Try to append \d+ batches web console lines for record') + } else if .logger == "HostContext" { + !starts_with!(.message, "Well known directory") + } else { + true + } + ''' + + [sources.job-logs] + type = "file" + include = ["/home/runner/actions-runner/_diag/pages/*.log"] - [transforms.without_systemd_fields] - type = "remove_fields" - inputs = ["logs"] - fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP", - "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID", - "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT", - "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "PRIORITY", - "source_type"] + [transforms.grok-job-logs] + type = "remap" + inputs = ["job-logs"] + source = ''' + structured, err = parse_grok(.message, "%{TIMESTAMP_ISO8601:timestamp} %{GREEDYDATA:message}") + + if err == null { + . = merge(., structured) + .type = "job-output" + } + ''' # Output data [sinks.cloudwatch] - inputs = ["without_systemd_fields", "grok-runner-logs"] + inputs = ["without_systemd_fields", "filter-runner-logs", "grok-job-logs"] type = "aws_cloudwatch_logs" encoding = "json" create_missing_group = false
