RuinedYourLife has proposed merging ~ruinedyourlife/launchpad-buildd:redact-secrets into launchpad-buildd:master.
Commit message: Redact secrets in logs Requested reviews: Launchpad code reviewers (launchpad-reviewers) For more details, see: https://code.launchpad.net/~ruinedyourlife/launchpad-buildd/+git/launchpad-buildd/+merge/491155 The new build types have added new form of secrets that can appear in the logs. We wish to redact these, the same way the URLs are. -- Your team Launchpad code reviewers is requested to review the proposed merge of ~ruinedyourlife/launchpad-buildd:redact-secrets into launchpad-buildd:master.
diff --git a/lpbuildd/builder.py b/lpbuildd/builder.py index 42e49bb..90fb821 100644 --- a/lpbuildd/builder.py +++ b/lpbuildd/builder.py @@ -36,14 +36,16 @@ devnull = open("/dev/null") # coverity[COV_PY_SIGMA.hardcoded_secret_pattern_low:SUPPRESS] doc example -def _sanitizeURLs(bytes_seq): - """A generator that deletes URL passwords from a bytes sequence. +def _sanitize_log_lines(bytes_seq): + """A generator that sanitizes sensitive data from log line bytes. - This generator removes user/password data from URLs if embedded - in the latter as follows: scheme://user:passwd@netloc/path. + Removes user/password data embedded in URLs (scheme://user:passwd@host), + strips builder proxy auth parameters, and redacts common secret-bearing + patterns such as Authorization headers and secret-like environment + variable assignments. - :param bytes_seq: A sequence of byte strings (that may contain URLs). - :return: A (sanitized) line stripped of authentication credentials. + :param bytes_seq: A sequence of byte strings representing log lines. + :return: Sanitized lines with credentials and secrets removed or masked. """ # This regular expression will be used to remove authentication # credentials from URLs. @@ -51,9 +53,35 @@ def _sanitizeURLs(bytes_seq): # Builder proxy passwords are UUIDs. proxy_auth_re = re.compile(rb",proxyauth=[^:]+:[A-Za-z0-9-]+") + # Additional redaction patterns for common secret-bearing content. + # Redact environment assignments for keys that likely contain secrets. + # Matches KEY=VALUE where KEY ends with sensitive strings. + env_secret_re = re.compile( + rb"(\b(?:[A-Z][A-Z0-9_]*?(?:PASSWORD|TOKEN|SECRET|API(?:_|)?KEY|READ_AUTH)\b)\s*=\s*)(?:\"[^\"]*\"|'[^']*'|\S+)" + ) + # Handle env assignments of the form KEY=Bearer <token> (value with a space). + env_bearer_pair_re = re.compile( + rb"(\b(?:[A-Z][A-Z0-9_]*?(?:PASSWORD|TOKEN|SECRET|API(?:_|)?KEY|READ_AUTH)\b)\s*=\s*)Bearer\s+\S+" + ) + # Redact Authorization headers. + authorization_header_re = re.compile(rb"\bAuthorization:\s*\S.*") + # Redact Bearer tokens that might appear outside of Authorization headers. + bearer_re = re.compile(rb"(\bBearer\s+)([A-Za-z0-9._-]+)") + # Redact value passed to the fetch-service MITM certificate flag. + mitm_cert_flag_re = re.compile( + rb"(--fetch-service-mitm-certificate\s+)(?:\"[^\"]*\"|'[^']*'|\S+)" + ) + for line in bytes_seq: sanitized_line = password_re.sub(rb"://\2", line) sanitized_line = proxy_auth_re.sub(b"", sanitized_line) + sanitized_line = env_bearer_pair_re.sub(rb"\1REDACTED", sanitized_line) + sanitized_line = env_secret_re.sub(rb"\1REDACTED", sanitized_line) + sanitized_line = authorization_header_re.sub( + b"Authorization: REDACTED", sanitized_line + ) + sanitized_line = bearer_re.sub(rb"\1REDACTED", sanitized_line) + sanitized_line = mitm_cert_flag_re.sub(rb"\1<redacted>", sanitized_line) yield sanitized_line @@ -607,7 +635,7 @@ class Builder: When the 'buildlog' is present it returns up to 2 KiB bytes of the end of the file. - The returned content will be 'sanitized', see `_sanitizeURLs` for + The returned content will be 'sanitized', see `_sanitize_log_lines` for further information. """ if self._log is None: @@ -642,7 +670,7 @@ class Builder: # Please note: we are throwing away the first line (of the # excerpt to be scrubbed) because it may be cut off thus # thwarting the detection of embedded passwords. - clean_content_iter = _sanitizeURLs(log_lines[1:]) + clean_content_iter = _sanitize_log_lines(log_lines[1:]) ret = b"\n".join(clean_content_iter) return ret @@ -759,7 +787,7 @@ class Builder: sanitized_file = open(log_path, "wb") # Scrub the buildlog file line by line - clean_content_iter = _sanitizeURLs(unsanitized_file) + clean_content_iter = _sanitize_log_lines(unsanitized_file) for line in clean_content_iter: sanitized_file.write(line) finally: diff --git a/lpbuildd/tests/test_builder.py b/lpbuildd/tests/test_builder.py index b43e7e1..cd5b8e3 100644 --- a/lpbuildd/tests/test_builder.py +++ b/lpbuildd/tests/test_builder.py @@ -17,12 +17,12 @@ from twisted.internet import defer from twisted.logger import FileLogObserver, formatEvent, globalLogPublisher from unittest import mock -from lpbuildd.builder import Builder, BuildManager, _sanitizeURLs +from lpbuildd.builder import Builder, BuildManager, _sanitize_log_lines from lpbuildd.tests.fakebuilder import FakeConfig class TestSanitizeURLs(TestCase): - """Unit-test URL sanitization. + """Unit-test log sanitization (URLs, env secrets, headers). `lpbuildd.tests.test_buildd.LaunchpadBuilddTests` also covers some of this, but at a higher level. @@ -30,18 +30,18 @@ class TestSanitizeURLs(TestCase): def test_non_urls(self): lines = [b"not a URL", b"still not a URL"] - self.assertEqual(lines, list(_sanitizeURLs(lines))) + self.assertEqual(lines, list(_sanitize_log_lines(lines))) def test_url_without_credentials(self): lines = [b"Get:1 http://ftpmaster.internal focal InRelease"] - self.assertEqual(lines, list(_sanitizeURLs(lines))) + self.assertEqual(lines, list(_sanitize_log_lines(lines))) def test_url_with_credentials(self): lines = [ b"Get:1 http://buildd:secret@ftpmaster.internal focal InRelease", ] expected_lines = [b"Get:1 http://ftpmaster.internal focal InRelease"] - self.assertEqual(expected_lines, list(_sanitizeURLs(lines))) + self.assertEqual(expected_lines, list(_sanitize_log_lines(lines))) def test_multiple_urls(self): lines = [ @@ -52,7 +52,7 @@ class TestSanitizeURLs(TestCase): b"http_proxy=http://squid.internal:3128/ " b"GOPROXY=http://example.com/goproxy", ] - self.assertEqual(expected_lines, list(_sanitizeURLs(lines))) + self.assertEqual(expected_lines, list(_sanitize_log_lines(lines))) def test_proxyauth(self): lines = [ @@ -63,7 +63,36 @@ class TestSanitizeURLs(TestCase): b"socat STDIO PROXY:builder-proxy.launchpad.dev:github.com:443," b"proxyport=3128", ] - self.assertEqual(expected_lines, list(_sanitizeURLs(lines))) + self.assertEqual(expected_lines, list(_sanitize_log_lines(lines))) + + def test_cargo_read_auth_env_is_redacted(self): + lines = [b"CARGO_ARTIFACTORY1_READ_AUTH=user:token123"] + expected_lines = [b"CARGO_ARTIFACTORY1_READ_AUTH=REDACTED"] + self.assertEqual(expected_lines, list(_sanitize_log_lines(lines))) + + def test_cargo_token_env_is_redacted(self): + lines = [b"CARGO_REGISTRIES_ARTIFACTORY1_TOKEN=Bearer token123"] + expected_lines = [b"CARGO_REGISTRIES_ARTIFACTORY1_TOKEN=REDACTED"] + self.assertEqual(expected_lines, list(_sanitize_log_lines(lines))) + + def test_authorization_header_is_redacted(self): + lines = [b"Authorization: Bearer abc.def.ghi"] + expected_lines = [b"Authorization: REDACTED"] + self.assertEqual(expected_lines, list(_sanitize_log_lines(lines))) + + def test_standalone_bearer_token_is_redacted(self): + lines = [b"Some tool printed: Bearer abcdef123456"] + expected_lines = [b"Some tool printed: Bearer REDACTED"] + self.assertEqual(expected_lines, list(_sanitize_log_lines(lines))) + + def test_mitm_certificate_flag_value_is_redacted(self): + lines = [ + b"RUN: build-craft --fetch-service-mitm-certificate 'BEGINCERTDATA'" + ] + expected_lines = [ + b"RUN: build-craft --fetch-service-mitm-certificate <redacted>" + ] + self.assertEqual(expected_lines, list(_sanitize_log_lines(lines))) class TestBuildManager(TestCase):
_______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp