This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 3e819876532 Handle mixed plaintext and RFC 2047 encoded attachment
filenames in ImapHook. Add unit test covering mixed plaintext and encoded
filename decoding. (#66672)
3e819876532 is described below
commit 3e81987653217c0ba3ca895194377a6e9aa1de3c
Author: SameerMesiah97 <[email protected]>
AuthorDate: Sun May 10 23:39:53 2026 +0100
Handle mixed plaintext and RFC 2047 encoded attachment filenames in
ImapHook. Add unit test covering mixed plaintext and encoded filename decoding.
(#66672)
Co-authored-by: Sameer Mesiah <[email protected]>
---
providers/imap/src/airflow/providers/imap/hooks/imap.py | 17 +++++++----------
providers/imap/tests/unit/imap/hooks/test_imap.py | 16 ++++++++++++++++
2 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/providers/imap/src/airflow/providers/imap/hooks/imap.py
b/providers/imap/src/airflow/providers/imap/hooks/imap.py
index ec11bb313f5..b3c9ebf0b9a 100644
--- a/providers/imap/src/airflow/providers/imap/hooks/imap.py
+++ b/providers/imap/src/airflow/providers/imap/hooks/imap.py
@@ -24,12 +24,12 @@ It uses the imaplib library that is already integrated in
python 3.
from __future__ import annotations
import email
-import email.header
import imaplib
import os
import re
import ssl
from collections.abc import Iterable
+from email.header import decode_header, make_header
from typing import TYPE_CHECKING, Any
from airflow.providers.common.compat.sdk import AirflowException, BaseHook
@@ -392,18 +392,15 @@ class MailPart:
@staticmethod
def _decode_filename(filename: str | None) -> str | None:
"""
- Decode an RFC 2047 MIME-encoded filename into a Unicode string.
+ Decode a filename that may contain RFC 2047 encoded segments.
- :param filename: The raw filename, possibly RFC 2047 encoded.
- :returns: The decoded Unicode filename, or None if the input is None.
+ :param filename: The filename extracted from the email part. It may
contain
+ plain text, RFC 2047 encoded text, or a mix of both.
+ :returns: The decoded Unicode filename, or the original value if
decoding fails.
"""
if filename is None:
- return None
- decoded_parts = email.header.decode_header(filename)
- return "".join(
- part.decode(encoding or "utf-8") if isinstance(part, bytes) else
part
- for part, encoding in decoded_parts
- )
+ return ""
+ return str(make_header(decode_header(filename)))
def has_matching_name(self, name: str) -> tuple[Any, Any] | None:
"""
diff --git a/providers/imap/tests/unit/imap/hooks/test_imap.py
b/providers/imap/tests/unit/imap/hooks/test_imap.py
index 02e629702cb..54537aa29af 100644
--- a/providers/imap/tests/unit/imap/hooks/test_imap.py
+++ b/providers/imap/tests/unit/imap/hooks/test_imap.py
@@ -479,6 +479,22 @@ class TestImapHook:
assert len(attachments) == 1
assert attachments[0][0] == "тест.csv"
+ @patch(imaplib_string)
+ def
test_retrieve_mail_attachments_with_plaintext_rfc2047_encoded_filename(self,
mock_imaplib):
+
+ # Filename contains a mix of plain text and RFC 2047 encoded text.
+ # Example: 'bar =?utf-8?B?ZsOzbw==?=' decodes to 'bar fóo'
+ encoded_name = "bar =?utf-8?B?ZsOzbw==?="
+ decoded_name = "bar fóo"
+
+ _create_fake_imap(mock_imaplib, with_mail=True,
attachment_name=encoded_name)
+
+ with ImapHook() as imap_hook:
+ attachments = imap_hook.retrieve_mail_attachments(decoded_name)
+
+ assert len(attachments) == 1
+ assert attachments[0][0] == decoded_name
+
@patch(imaplib_string)
def test_has_mail_attachment_with_max_mails(self, mock_imaplib):
mock_conn = _create_fake_imap(mock_imaplib, with_mail=True)