On Mon, 5 Apr 2021 06:04:49 + "Surla, Sai Kalyan" wrote:
> Is there any update on the issues.
I finally found time to work on the first issue (header detection)
where we had a workaround already and created proper patches (attached)
for the issue and sent them to the upstream maintainer.
--
bye,
pabs
https://wiki.debian.org/PaulWise
From a4aa24ae5675b09385d0c88add48c3ab046e699d Mon Sep 17 00:00:00 2001
From: Paul Wise
Date: Sun, 30 May 2021 10:02:14 +0800
Subject: [PATCH 1/3] Add debugging for header detection
---
src/readpst.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/readpst.c b/src/readpst.c
index 6d94f15..b5910e9 100644
--- a/src/readpst.c
+++ b/src/readpst.c
@@ -1591,6 +1591,8 @@ void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode,
DEBUG_ENT("write_normal_email");
pst_convert_utf8_null(item, >email->header);
+DEBUG_INFO(("PST headers\n%s\n", *item->email->header.str));
+DEBUG_INFO(("Extra MIME headers\n%s\n", *extra_mime_headers));
headers = valid_headers(item->email->header.str) ? item->email->header.str :
valid_headers(*extra_mime_headers) ? *extra_mime_headers :
NULL;
--
2.30.2
From bade93dcdb435bc7bec50cf4b54481731beea45c Mon Sep 17 00:00:00 2001
From: Paul Wise
Date: Sun, 30 May 2021 09:49:57 +0800
Subject: [PATCH 2/3] Also detect email headers wrapped with space instead of
tab
Spaces are commonly used for email header wrapping.
---
src/readpst.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/readpst.c b/src/readpst.c
index b5910e9..6663771 100644
--- a/src/readpst.c
+++ b/src/readpst.c
@@ -1275,8 +1275,10 @@ int header_match(char *header, char*field) {
if (strncasecmp(header, field, n) == 0) return 1; // tag:{space}
if ((field[n-1] == ' ') && (strncasecmp(header, field, n-1) == 0)) {
char *crlftab = "\r\n\t";
+char *crlfspc = "\r\n ";
DEBUG_INFO(("Possible wrapped header = %s\n", header));
if (strncasecmp(header+n-1, crlftab, 3) == 0) return 1; // tag:{cr}{lf}{tab}
+if (strncasecmp(header+n-1, crlfspc, 3) == 0) return 1; // tag:{cr}{lf}{space}
}
return 0;
}
--
2.30.2
From da5f159caa66db380b793f9062a36888c9b12467 Mon Sep 17 00:00:00 2001
From: Paul Wise
Date: Sun, 30 May 2021 09:51:26 +0800
Subject: [PATCH 3/3] Detect reasonable email headers too
RFC 5322 specifies the syntax of email headers, most header fields are more
restricted though so use a restricted check in case the headers are bogus
parts of the body that happen to match RFC 5322.
Fixes: https://bugs.debian.org/984581
---
src/readpst.c | 60 +++
1 file changed, 60 insertions(+)
diff --git a/src/readpst.c b/src/readpst.c
index 6663771..97ba127 100644
--- a/src/readpst.c
+++ b/src/readpst.c
@@ -1283,6 +1283,65 @@ int header_match(char *header, char*field) {
return 0;
}
+// https://en.wikipedia.org/wiki/Email#Message_header
+// https://www.rfc-editor.org/rfc/rfc5322.html
+// https://www.iana.org/assignments/message-headers/message-headers.xhtml
+int header_is_reasonable(char *header)
+{
+char *c;
+#define C *c
+
+// The header must not be NULL
+if (header) c = header;
+else return 0;
+
+// usually the header field name starts with upper-case: A-Z
+if (C >= 'A' && C <= 'Z') c++;
+else return 0;
+
+while(1) {
+// most header field names use a limited set of characters: - 0-9 A-Z a-z
+if (
+(C >= 'A' && C <= 'Z') ||
+(C >= 'a' && C <= 'z') ||
+(C >= '0' && C <= '9') ||
+(C == '-')
+ ) {
+c++;
+// the header field name is then terminated with a colon
+} else if (C == ':') {
+ c++;
+ goto parse_header_field_value;
+// other characters are an indicator of an invalid header
+} else {
+ return 0;
+}
+}
+
+parse_header_field_value:
+while(1) {
+// header field values are printable US-ASCII plus space/tab
+if (
+(C >= 33 && C <= 126) ||
+(C == ' ' || C == '\t')
+ ) {
+c++;
+// the header field value is then terminated with CRLF
+} else if (C == '\r' && *(c+1) == '\n') {
+c += 2;
+// the value could continue to the next line though
+if (C == ' ' || C == '\t') c++;
+else return 1;
+// other characters are an indicator of an invalid header
+} else {
+ return 0;
+}
+}
+
+#undef C
+
+}
+
int valid_headers(char *header)
{
// headers are sometimes really bogus - they seem to be fragments of the
@@ -1303,6 +1362,7 @@ int valid_headers(char *header)
if (header_match(header, "X-ASG-Debug-ID: " )) return 1;
if (header_match(header, "X-Barracuda-URL: " )) return 1;
if