[GH] Fix: Mitigate CRLF injection in email headers (Issue #603) (tooling-trusted-releases)

via GitHub Thu, 29 Jan 2026 10:37:49 -0800


sbp commented on code in PR #608:
URL: 
https://github.com/apache/tooling-trusted-releases/pull/608#discussion_r2742984386



##########
tests/unit/test_mail.py:
##########
@@ -0,0 +1,345 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Tests for CRLF injection protection in atr.mail module."""
+
+from typing import TYPE_CHECKING
+from unittest.mock import AsyncMock
+
+import pytest
+
+import atr.mail as mail
+
+if TYPE_CHECKING:
+    from pytest import MonkeyPatch
+
+
[email protected]
+async def test_send_rejects_crlf_in_subject(monkeypatch: "MonkeyPatch") -> 
None:
+    """Test that CRLF injection in subject field is rejected."""
+    # Mock _send_many to ensure we never actually send emails
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message with CRLF in the subject
+    malicious_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Legitimate Subject\r\nBcc: [email protected]",
+        body="This is a test message",
+    )
+
+    # Call send and expect it to catch the injection
+    _, errors = await mail.send(malicious_message)
+
+    # Assert that the function returned an error
+    assert len(errors) == 1
+    assert "CRLF injection detected" in errors[0]
+
+    # Assert that _send_many was never called (email was not sent)
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_send_rejects_crlf_in_from_address(monkeypatch: "MonkeyPatch") 
-> None:
+    """Test that CRLF injection in from address field is rejected.
+
+    Note: The from_addr validation happens before EmailMessage processing,
+    so this test verifies the early validation layer also protects against 
injection.
+    """
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message with CRLF in the from address
+    malicious_message = mail.Message(
+        email_sender="[email protected]\r\nBcc: [email protected]",
+        email_recipient="[email protected]",
+        subject="Test Subject",
+        body="This is a test message",
+    )
+
+    # Call send and expect it to raise ValueError due to invalid from_addr 
format
+    with pytest.raises(ValueError, match=r"from_addr must end with 
@apache.org"):

Review Comment:
   This is not testing CRLF injection, it's testing the `@apache.org` 
constraint. Perhaps change the BCC address to `[email protected]`?



##########
atr/mail.py:
##########
@@ -16,6 +16,9 @@
 # under the License.
 
 import dataclasses
+import email.headerregistry as headerregistry
+import email.message as emailmessage

Review Comment:
   This should be `import email.message as message`.



##########
atr/mail.py:
##########
@@ -59,28 +62,34 @@ async def send(message: Message) -> tuple[str, list[str]]:
     # UUID4 is entirely random, with no timestamp nor namespace
     # It does have 6 version and variant bits, so only 122 bits are random
     mid = f"{uuid.uuid4()}@{global_domain}"
-    headers = [
-        f"From: {from_addr}",
-        f"To: {to_addr}",
-        f"Subject: {message.subject}",
-        f"Date: {utils.formatdate(localtime=True)}",
-        f"Message-ID: <{mid}>",
-    ]
-    if message.in_reply_to is not None:
-        headers.append(f"In-Reply-To: <{message.in_reply_to}>")
-        # TODO: Add message.references if necessary
-        headers.append(f"References: <{message.in_reply_to}>")
-
-    # Normalise the body padding and ensure that line endings are CRLF
-    body = message.body.strip()
-    body = body.replace("\r\n", "\n")
-    body = body.replace("\n", "\r\n")
-    body = body + "\r\n"
-
-    # Construct the message
-    msg_text = "\r\n".join(headers) + "\r\n\r\n" + body
+
+    # Use EmailMessage with Address objects for CRLF injection protection
+    msg = emailmessage.EmailMessage(policy=policy.SMTPUTF8)
+
+    try:
+        from_local, from_domain = _split_address(from_addr)
+        to_local, to_domain = _split_address(to_addr)
+
+        msg["From"] = headerregistry.Address(username=from_local, 
domain=from_domain)
+        msg["To"] = headerregistry.Address(username=to_local, domain=to_domain)
+        msg["Subject"] = message.subject
+        msg["Date"] = utils.formatdate(localtime=True)

Review Comment:
   I think we should make this UTC. Should add a test for this being UTC too.



##########
uv.lock:
##########
@@ -3,7 +3,7 @@ revision = 3
 requires-python = "==3.13.*"
 
 [options]
-exclude-newer = "2026-01-28T19:01:47Z"
+exclude-newer = "2026-01-29T03:49:27Z"

Review Comment:
   We need to have zero changes to the `uv.lock` file, because this PR does not 
require any changes to dependencies.



##########
atr/mail.py:
##########
@@ -16,6 +16,9 @@
 # under the License.
 
 import dataclasses
+import email.headerregistry as headerregistry
+import email.message as emailmessage

Review Comment:
   Any subsequent use of `message` will have to be renamed `msg`.



##########
tests/unit/test_mail.py:
##########
@@ -0,0 +1,345 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Tests for CRLF injection protection in atr.mail module."""
+
+from typing import TYPE_CHECKING
+from unittest.mock import AsyncMock
+
+import pytest
+
+import atr.mail as mail
+
+if TYPE_CHECKING:
+    from pytest import MonkeyPatch
+
+
[email protected]
+async def test_send_rejects_crlf_in_subject(monkeypatch: "MonkeyPatch") -> 
None:
+    """Test that CRLF injection in subject field is rejected."""
+    # Mock _send_many to ensure we never actually send emails
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message with CRLF in the subject
+    malicious_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Legitimate Subject\r\nBcc: [email protected]",
+        body="This is a test message",
+    )
+
+    # Call send and expect it to catch the injection
+    _, errors = await mail.send(malicious_message)
+
+    # Assert that the function returned an error
+    assert len(errors) == 1
+    assert "CRLF injection detected" in errors[0]
+
+    # Assert that _send_many was never called (email was not sent)
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_send_rejects_crlf_in_from_address(monkeypatch: "MonkeyPatch") 
-> None:
+    """Test that CRLF injection in from address field is rejected.
+
+    Note: The from_addr validation happens before EmailMessage processing,
+    so this test verifies the early validation layer also protects against 
injection.
+    """
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message with CRLF in the from address
+    malicious_message = mail.Message(
+        email_sender="[email protected]\r\nBcc: [email protected]",
+        email_recipient="[email protected]",
+        subject="Test Subject",
+        body="This is a test message",
+    )
+
+    # Call send and expect it to raise ValueError due to invalid from_addr 
format
+    with pytest.raises(ValueError, match=r"from_addr must end with 
@apache.org"):
+        await mail.send(malicious_message)
+
+    # Assert that _send_many was never called
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_send_rejects_crlf_in_to_address(monkeypatch: "MonkeyPatch") -> 
None:
+    """Test that CRLF injection in to address field is rejected.
+
+    Note: The _validate_recipient check happens before EmailMessage processing,
+    so this test verifies the early validation layer also protects against 
injection.
+    """
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message with CRLF in the to address
+    malicious_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]\r\nBcc: [email protected]",
+        subject="Test Subject",
+        body="This is a test message",
+    )
+
+    # Call send and expect it to raise ValueError due to invalid recipient 
format
+    with pytest.raises(ValueError, match=r"Email recipient must be 
@apache.org"):
+        await mail.send(malicious_message)
+
+    # Assert that _send_many was never called
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_send_rejects_crlf_in_reply_to(monkeypatch: "MonkeyPatch") -> 
None:
+    """Test that CRLF injection in in_reply_to field is rejected."""
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message with CRLF in the in_reply_to field
+    malicious_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Test Subject",
+        body="This is a test message",
+        in_reply_to="[email protected]\r\nBcc: [email protected]",
+    )
+
+    # Call send and expect it to catch the injection
+    _, errors = await mail.send(malicious_message)
+
+    # Assert that the function returned an error
+    assert len(errors) == 1
+    assert "CRLF injection detected" in errors[0]
+
+    # Assert that _send_many was never called
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_send_accepts_legitimate_message(monkeypatch: "MonkeyPatch") -> 
None:
+    """Test that a legitimate message without CRLF is accepted."""
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a legitimate message without any CRLF injection attempts
+    legitimate_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Legitimate Subject",
+        body="This is a legitimate test message with no injection attempts.",
+    )
+
+    # Call send
+    mid, errors = await mail.send(legitimate_message)
+
+    # Assert that no errors were returned
+    assert len(errors) == 0
+
+    # Assert that _send_many was called (email was sent)
+    mock_send_many.assert_called_once()
+
+    # Verify the Message-ID was generated
+    assert "@apache.org" in mid
+
+
[email protected]
+async def test_send_accepts_message_with_reply_to(monkeypatch: "MonkeyPatch") 
-> None:
+    """Test that a legitimate message with in_reply_to is accepted."""
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a legitimate message with a valid in_reply_to
+    legitimate_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Re: Previous Subject",
+        body="This is a reply message.",
+        in_reply_to="[email protected]",
+    )
+
+    # Call send
+    mid, errors = await mail.send(legitimate_message)
+
+    # Assert that no errors were returned
+    assert len(errors) == 0
+
+    # Assert that _send_many was called (email was sent)
+    mock_send_many.assert_called_once()
+
+    # Verify the Message-ID was generated
+    assert "@apache.org" in mid
+
+
[email protected]
+async def test_send_rejects_lf_only_injection(monkeypatch: "MonkeyPatch") -> 
None:
+    """Test that injection with LF only (\\n) is also rejected."""
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message with just LF (no CR)
+    malicious_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Legitimate Subject\nBcc: [email protected]",
+        body="This is a test message",
+    )
+
+    # Call send and expect it to catch the injection
+    _, errors = await mail.send(malicious_message)
+
+    # Assert that the function returned an error
+    assert len(errors) == 1
+    assert "CRLF injection detected" in errors[0]
+
+    # Assert that _send_many was never called
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_send_rejects_cr_only_injection(monkeypatch: "MonkeyPatch") -> 
None:
+    """Test that injection with CR only (\\r) is also rejected."""
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message with just CR (no LF)
+    malicious_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Legitimate Subject\rBcc: [email protected]",
+        body="This is a test message",
+    )
+
+    # Call send and expect it to catch the injection
+    _, errors = await mail.send(malicious_message)
+
+    # Assert that the function returned an error
+    assert len(errors) == 1
+    assert "CRLF injection detected" in errors[0]
+
+    # Assert that _send_many was never called
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_send_rejects_bcc_header_injection(monkeypatch: "MonkeyPatch") 
-> None:
+    """Test a realistic Bcc header injection attack scenario."""
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message attempting to inject a Bcc header
+    malicious_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Important Notice\r\nBcc: 
[email protected]\r\nX-Priority: 1",
+        body="This message attempts to secretly copy an attacker.",
+    )
+
+    # Call send and expect it to catch the injection
+    _, errors = await mail.send(malicious_message)
+
+    # Assert that the function returned an error
+    assert len(errors) == 1
+    assert "CRLF injection detected" in errors[0]
+
+    # Assert that _send_many was never called
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_send_rejects_content_type_injection(monkeypatch: "MonkeyPatch") 
-> None:
+    """Test injection attempting to override Content-Type header."""
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    # Create a malicious message attempting to inject Content-Type
+    malicious_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Test\r\nContent-Type: 
text/html\r\n\r\n<html><script>alert('XSS')</script></html>",
+        body="Normal body",
+    )
+
+    # Call send and expect it to catch the injection
+    _, errors = await mail.send(malicious_message)
+
+    # Assert that the function returned an error
+    assert len(errors) == 1
+    assert "CRLF injection detected" in errors[0]
+
+    # Assert that _send_many was never called
+    mock_send_many.assert_not_called()
+
+
[email protected]
+async def test_address_objects_used_for_from_to_headers(monkeypatch: 
"MonkeyPatch") -> None:
+    """Test that Address objects are used for From/To headers."""
+    mock_send_many = AsyncMock(return_value=[])
+    monkeypatch.setattr("atr.mail._send_many", mock_send_many)
+
+    legitimate_message = mail.Message(
+        email_sender="[email protected]",
+        email_recipient="[email protected]",
+        subject="Test Subject",
+        body="Test body",
+    )
+
+    _, errors = await mail.send(legitimate_message)
+
+    # Verify the message was sent successfully
+    assert len(errors) == 0
+    mock_send_many.assert_called_once()
+
+    # Verify the generated email bytes contain properly formatted addresses
+    call_args = mock_send_many.call_args
+    msg_text = call_args[0][2]  # already a str
+
+    # Address objects format email addresses properly
+    assert "From: [email protected]" in msg_text
+    assert "To: [email protected]" in msg_text
+
+
[email protected]
+async def test_send_handles_non_ascii_headers(monkeypatch: "MonkeyPatch") -> 
None:

Review Comment:
   Can you please also add a test showing that it _fails_ if the policy is set 
to just `SMTP` and an `é` is used in the subject?



##########
atr/mail.py:
##########
@@ -98,7 +106,8 @@ async def send(message: Message) -> tuple[str, list[str]]:
 
 async def _send_many(from_addr: str, to_addrs: list[str], msg_text: str) -> 
list[str]:
     """Send an email to multiple recipients."""
-    message_bytes = bytes(msg_text, "utf-8")
+    # Logic change: convert to bytes here to satisfy the internal relay 
expectation
+    message_bytes = msg_text.encode("utf-8")

Review Comment:
   I still see the change to `msg_text.encode("utf-8")`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GH] Fix: Mitigate CRLF injection in email headers (Issue #603) (tooling-trusted-releases)

Reply via email to