SameerMesiah97 commented on code in PR #66578:
URL: https://github.com/apache/airflow/pull/66578#discussion_r3211001711
##########
providers/imap/src/airflow/providers/imap/hooks/imap.py:
##########
@@ -388,14 +389,29 @@ def is_attachment(self) -> bool:
"""
return self.part.get_content_maintype() != "multipart" and
self.part.get("Content-Disposition")
+ def _decode_filename(self, raw_filename: str | None) -> str:
+ """
+ Decode a filename that may be RFC 2047 encoded.
+
+ :param raw_filename: The raw filename from the email part (may be
encoded or plain str).
+ :returns: A decoded Unicode string, or the original value if it cannot
be decoded.
+ """
+ if raw_filename is None:
+ return ""
+ decoded_parts = decode_header(raw_filename)
+ return "".join(
+ part.decode(enc) if isinstance(part, bytes) else part
+ for part, enc in decoded_parts
+ )
Review Comment:
There is one case where your implementation may break. If raw_filename is a
mix of plain text and encoded string (for e.g. 'bar =?utf-8?B?ZsOzbw==?='),
decode_parts will return `[(b'bar ', None), (b'f\xc3\xb3o', 'utf-8')]`.
Therefore, for the first item in the list, enc will become `None` and this will
raise a `TypeError`. Why not use `make_header` instead? This a documented edge
case. Please refer to the link below:
https://docs.python.org/3/library/email.header.html
##########
providers/imap/tests/unit/imap/hooks/test_imap.py:
##########
@@ -458,3 +458,51 @@ def test_has_mail_attachment_with_max_mails(self,
mock_imaplib):
assert result is True
assert 1 <= mock_conn.fetch.call_count <= 2
+
+ @patch(imaplib_string)
+ def test_retrieve_mail_attachments_with_rfc2047_encoded_filename(self,
mock_imaplib):
+ """Test that non-ASCII (RFC 2047 encoded) attachment filenames are
decoded to Unicode."""
+ # RFC 2047 encoded Cyrillic filename
+ # Decoded: "Перечень точек продаж_2026-04-21.xlsx"
+ encoded_filename =
"=?UTF-8?B?0J/QtdGA0LXRh9C10L3RjCDRgtC+0YfQtdC6INC/0YDQvtC00LDQtl8yMDI2LTA0LTIxLnhsc3g=?="
+ expected_filename = "Перечень точек продаж_2026-04-21.xlsx"
+
+ _create_fake_imap(mock_imaplib, with_mail=True,
attachment_name=encoded_filename)
+
+ with ImapHook() as imap_hook:
+ attachments =
imap_hook.retrieve_mail_attachments(expected_filename)
+
+ assert len(attachments) == 1
+ assert attachments[0][0] == expected_filename
+
+ @patch(imaplib_string)
+ def test_has_mail_attachment_with_rfc2047_encoded_filename(self,
mock_imaplib):
+ """Test that has_mail_attachment correctly matches RFC 2047 encoded
filenames."""
+ # RFC 2047 encoded Cyrillic filename
+ # Decoded: "Перечень точек продаж_2026-04-21.xlsx"
+ encoded_filename =
"=?UTF-8?B?0J/QtdGA0LXRh9C10L3RjCDRgtC+0YfQtdC6INC/0YDQvtC00LDQtl8yMDI2LTA0LTIxLnhsc3g=?="
+ expected_filename = "Перечень точек продаж_2026-04-21.xlsx"
+
+ _create_fake_imap(mock_imaplib, with_mail=True,
attachment_name=encoded_filename)
+
+ with ImapHook() as imap_hook:
+ result = imap_hook.has_mail_attachment(name=expected_filename)
+
+ assert result is True
+
+ @patch(open_string, new_callable=mock_open)
+ @patch(imaplib_string)
+ def test_download_mail_attachments_with_rfc2047_encoded_filename(self,
mock_imaplib, mock_open_method):
+ """Test that download_mail_attachments correctly handles RFC 2047
encoded filenames."""
+ # RFC 2047 encoded Cyrillic filename
+ # Decoded: "Перечень точек продаж_2026-04-21.xlsx"
+ encoded_filename =
"=?UTF-8?B?0J/QtdGA0LXRh9C10L3RjCDRgtC+0YfQtdC6INC/0YDQvtC00LDQtl8yMDI2LTA0LTIxLnhsc3g=?="
+ expected_filename = "Перечень точек продаж_2026-04-21.xlsx"
+
+ _create_fake_imap(mock_imaplib, with_mail=True,
attachment_name=encoded_filename)
+
+ with ImapHook() as imap_hook:
+ imap_hook.download_mail_attachments(name=expected_filename,
local_output_directory="test_directory")
+
+
mock_open_method.assert_called_once_with(f"test_directory/{expected_filename}",
"wb")
+
mock_open_method.return_value.write.assert_called_once_with(b"SWQsTmFtZQoxLEZlbGl4")
Review Comment:
I would add another test to cover the edge case I mentioned above i.e. mix
of plaintext and encoded string.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]