sbp commented on a change in pull request #517:
URL: https://github.com/apache/incubator-ponymail/pull/517#discussion_r483526359
##########
File path: tools/generators.py
##########
@@ -19,14 +19,137 @@
This file contains the various ID generators for Pony Mail's archivers.
"""
+import base64
import hashlib
import email.utils
import time
import re
+# For optional nonce
+config = None
+
+# Headers from RFC 4871, the precursor to RFC 6376
+rfc4871_subset = {
+ b"from", b"sender", b"reply-to", b"subject", b"date",
+ b"message-id", b"to", b"cc", b"mime-version", b"content-type",
+ b"content-transfer-encoding", b"content-id",
+ b"content-description", b"resent-date", b"resent-from",
+ b"resent-sender", b"resent-to", b"resent-cc",
+ b"resent-message-id", b"in-reply-to", b"references", b"list-id",
+ b"list-help", b"list-unsubscribe", b"list-subscribe",
+ b"list-post", b"list-owner", b"list-archive", b"dkim-signature"
+}
+
+# Authenticity headers from RFC 8617
+rfc4871_and_rfc8617_subset = rfc4871_subset | {
+ b"arc-authentication-results", b"arc-message-signature",
+ b"arc-seal"
+}
+
+def rfc822_parse_dkim(suffix,
+ head_canon = False, body_canon = False,
+ head_subset = None, archive_list_id = None):
+ headers = []
+ keep = True
+ list_ids = set()
+
+ while suffix:
+ # Edge case: headers don't end LF (add LF)
+ line, suffix = (suffix.split(b"\n", 1) + [None])[:2]
+ if line in {b"\r", b"", None}:
+ break
+ lf = line.endswith(b"\r") and (suffix is not None)
+ end = b"\n" if lf else b"\r\n"
+ if line[0] in {0x09, 0x20}:
+ # Edge case: starts with a continuation (treat like From)
+ if headers and (keep is True):
+ headers[-1][1] += line + end
+ elif not line.startswith(b"From "):
+ # Edge case: header start contains no colon (use whole line)
+ # "A field-name MUST be contained on one line." (RFC 822 B.2)
+ k, v = (line.split(b":", 1) + [b""])[:2]
+ k_lower = k.lower()
+ if k_lower == "list-id":
+ list_ids.add(k_lower)
+ if (head_subset is None) or (k_lower in head_subset):
+ keep = True
+ headers.append([k, v + end])
+ else:
+ keep = False
+ # The remaining suffix is the body
+ body = (suffix or b"").replace(b"\r\n", b"\n")
+ body = body.replace(b"\n", b"\r\n")
+
+ # Optional X-Archive-List-ID augmentation
+ if (archive_list_id is not None) and (archive_list_id not in list_ids):
+ xali_value = b" " + bytes(archive_list_id, "ascii")
+ headers.append([b"X-Archive-List-ID", xali_value])
+ # Optional nonce from local config
+ if config is not None:
+ if (config.has_section("archiver") and
+ config.has_option("archiver", "nonce")):
+ nonce = config.get("archiver", "nonce")
+ headers.append([b"X-Archive-Nonce", nonce])
+ # Optional head canonicalisation (DKIM relaxed)
+ if head_canon is True:
+ for i in range(len(headers)):
+ k, v = headers[i]
+ crlf = v.endswith(b"\r\n")
+ if crlf is True:
+ v = v[:-2]
+ v = v.replace(b"\r\n", b"")
+ v = v.replace(b"\t", b" ")
+ v = v.strip(b" ")
+ v = b" ".join(vv for vv in v.split(b" ") if vv)
+ if crlf is True:
+ v = v + b"\r\n"
Review comment:
Lines
[58-62](https://github.com/apache/incubator-ponymail/pull/517/commits/b1643f7bedad52276a0977e6a9e497cedb9fe799#diff-29178abd2d43d3075e7721e865041930R58-R62)
have the effect of normalising LF line endings to CRLF. The following message,
for example, converts to the same output whether it uses LF or CRLF as
separators:
```
>>> from generators import rfc822_parse_dkim
>>> rfc822_parse_dkim(b"To: You\nFrom: Me\n\nHello\n")
([[b'To', b' You\r\n'], [b'From', b' Me\r\n']], b'Hello\r\n')
>>> rfc822_parse_dkim(b"To: You\r\nFrom: Me\r\n\r\nHello\r\n")
([[b'To', b' You\r\n'], [b'From', b' Me\r\n']], b'Hello\r\n')
```
This normalisation is part of the RFC 6376 algorithm. In a revision of the
code which I am preparing, this normalisation will be modularised, tested, and
documented in such a way that it will be made much clearer.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]