Hey, it's the overzealous newb again. APR doesn't have any function for parsing an RFC 822 date. Since these dates show up in e-mail and http headers, I've attached a patch to trunk that adds apr_parse_rfc822_date. I've also added it to test_rfcstr, but the test is just for one date. I'm guessing this needs to be added to time/ win32/timestr.c as well, but I don't have a Windows box for testing.

If you're wondering why the time zone parsing in apr_parse_rfc822_time_zone is so crazy, take a look at section 5.1 of the spec (http://www.faqs.org/rfcs/rfc822.html). The UTC offset can be of the following forms:

1. +/-HHMM
2. A single letter corresponding to an offset (A = UTC-1, B = UTC-2, but J is skipped so I is UTC-9 and K is UTC-10). Letter order is reversed for positive UTC offsets (N is UTC+1, O is UTC+2). Also Z is UTC. 3. A popular abbreviation (GMT and UT are both UTC. MST and PDT are UTC-7).

Anyways, it works, but it's not written very well. There are too many hard-coded constants hanging around. Validation is pretty weak as well. Nonexistent dates (Wed, 17 Aug 2008) can be parsed without returning APR_EBADDATE.

It's a start, though. Suggestions or comments would be nice.

Geoff

Index: time/unix/timestr.c
===================================================================
--- time/unix/timestr.c (revision 689153)
+++ time/unix/timestr.c (working copy)
@@ -39,6 +39,36 @@
     "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
 };
 
+APR_DECLARE_DATA const char *apr_rfc822_time_zones[26][4] =
+{
+    {"M", NULL}, //UTC-12
+    {"L", NULL},
+    {"K", NULL},
+    {"I", NULL},
+    {"H", "PST", NULL}, //UTC-8
+    {"G", "MST", "PDT", NULL}, //UTC-7
+    {"F", "CST", "MDT", NULL}, //UTC-6
+    {"E", "EST", "CDT", NULL}, //UTC-5
+    {"D", "EDT", NULL}, //UTC-4
+    {"C", NULL},
+    {"B", NULL},
+    {"A", NULL},
+    {"Z", "UT", "GMT", NULL}, //UTC
+    {"N", NULL},
+    {"O", NULL},
+    {"P", NULL},
+    {"Q", NULL},
+    {"R", NULL},
+    {"S", NULL},
+    {"T", NULL},
+    {"U", NULL},
+    {"V", NULL},
+    {"W", NULL},
+    {"X", NULL},
+    {"Y", NULL}, //UTC+12
+    {NULL}
+};
+
 apr_status_t apr_rfc822_date(char *date_str, apr_time_t t)
 {
     apr_time_exp_t xt;
@@ -87,6 +117,108 @@
     return APR_SUCCESS;
 }
 
+apr_int32_t apr_parse_day_of_week(char *day_of_week) {
+    apr_int32_t i;
+    for(i = 0; i < 7; i++) {
+        if(strcmp(day_of_week, apr_day_snames[i]) == 0) {
+            return i;
+        }
+    }
+    
+    return -1;
+}
+
+apr_int32_t apr_parse_month(char *month) {
+    apr_int32_t i;
+    for(i = 0; i < 12; i++) {
+        if(strcmp(month, apr_month_snames[i]) == 0) {
+            return i;
+        }
+    }
+    
+    return -1;
+}
+
+apr_status_t apr_parse_rfc822_time_zone(apr_int32_t *offset, const char *s) {
+    apr_int32_t hours = 0, minutes = 0, sign = 1;
+
+    /* +HHMM */
+    if(strlen(s) == 5 && (s[0] == '+' || s[0] == '-')) {
+        if (s[0] == '-')
+            sign = -1;
+
+        /* The pointer is passed by value so we're not actually corrupting the 
string here. */
+        s++;
+        hours = atoi(s) / 100;
+        s+=2;
+        minutes = atoi(s);
+
+        *offset = sign * ((hours * 60) + minutes) * 60;
+
+        return APR_SUCCESS;
+    }
+
+    /* UT, GMT, PST, etc */
+    int i;
+    for(i = 0; apr_rfc822_time_zones[i] != NULL; i++) {
+        const char **offset_array = apr_rfc822_time_zones[i];
+
+        int j;
+        for(j = 0; offset_array[j] != NULL; j++) {
+            const char *offset_string = offset_array[j];
+
+            if(strcmp(s, offset_string) == 0) {
+                *offset = (i - 12) * 60 * 60;
+                return APR_SUCCESS;
+            }
+        }
+    }
+
+    return APR_EBADDATE;
+}
+
+apr_status_t apr_parse_rfc822_date(apr_time_exp_t *xt, char *date_str)
+{
+    apr_int32_t year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 
0, day_of_week = 0, offset = 0;
+    char day_of_week_str[4];
+    char month_str[4];
+    char zone[6];
+    apr_status_t err = APR_SUCCESS;
+
+    //Day of week, seconds
+    if (sscanf(date_str, "%3s%*c %u %3s %u %u:%u:%u %5s", &day_of_week_str, 
&day, &month_str, &year, &hour, &minute, &second, &zone) == 8) {}
+    //Day of week, no seconds
+    else if (sscanf(date_str, "%3s%*c %u %3s %u %u:%u %5s", &day_of_week_str, 
&day, month_str, &year, &hour, &minute, &zone) == 7) {}
+    //No day of week, seconds
+    else if (sscanf(date_str, "%u %3s %u %u:%u:%u %5s", &day, &month_str, 
&year, &hour, &minute, &second, &zone) == 7) {}
+    //No day of week, no seconds
+    else if (sscanf(date_str, "%u %3s %u %u:%u %5s", &day, &month_str, &year, 
&hour, &minute, &zone) == 6) {}
+    else {
+        return APR_EBADDATE;
+    }
+    
+    day_of_week = apr_parse_day_of_week(day_of_week_str);
+    month = apr_parse_month(month_str);
+    err = apr_parse_rfc822_time_zone(&offset, zone);
+    
+    if(day_of_week == -1 || month == -1 || err != APR_SUCCESS) {
+        return APR_EBADDATE;
+    }
+
+    xt->tm_year   = year - 1900;
+    xt->tm_mon    = month;
+    xt->tm_mday   = day;
+    xt->tm_hour   = hour;
+    xt->tm_min    = minute;
+    xt->tm_sec    = second;
+    xt->tm_wday   = day_of_week;
+    xt->tm_gmtoff = offset;
+    xt->tm_usec   = 0;
+    /* XXXX: set xt->tm_isdst if time zone string is EDT, CDT, etc */
+
+    return APR_SUCCESS;
+}
+
 apr_status_t apr_ctime(char *date_str, apr_time_t t)
 {
     apr_time_exp_t xt;
Index: test/testtime.c
===================================================================
--- test/testtime.c     (revision 689153)
+++ test/testtime.c     (working copy)
@@ -166,9 +166,20 @@
 {
     apr_status_t rv;
     char str[STR_SIZE];
+    apr_time_exp_t xt;
+    apr_time_t imp;
 
-    rv = apr_rfc822_date(str, now);
+    rv = apr_parse_rfc822_date(&xt, "Sat, 14 Sep 2002 19:05:36 GMT");
     if (rv == APR_ENOTIMPL) {
+        ABTS_NOT_IMPL(tc, "apr_parse_rfc822_date");
+    }
+    ABTS_TRUE(tc, rv == APR_SUCCESS);
+
+    rv = apr_time_exp_get(&imp, &xt);
+    ABTS_TRUE(tc, rv == APR_SUCCESS);
+
+    rv = apr_rfc822_date(str, imp);
+    if (rv == APR_ENOTIMPL) {
         ABTS_NOT_IMPL(tc, "apr_rfc822_date");
     }
     ABTS_TRUE(tc, rv == APR_SUCCESS);
Index: include/apr_time.h
===================================================================
--- include/apr_time.h  (revision 689153)
+++ include/apr_time.h  (working copy)
@@ -189,6 +189,21 @@
  */
 APR_DECLARE(apr_status_t) apr_rfc822_date(char *date_str, apr_time_t t);
 
+/**
+ * apr_parse_rfc822_time_zone parses RFC822 time zone for use in 
apr_time_exp_t.tm_gmtoff.
+ * This is a helper for apr_parse_rfc822_date.
+ * @param offset tm_gmtoff seconds east of UTC
+ * @param s String to parse
+ */
+APR_DECLARE(apr_status_t) apr_parse_rfc822_time_zone(apr_int32_t *offset, 
const char *s);
+
+/**
+ * apr_parse_rfc822_date parses RFC822 dates into an apr_time_exp_t
+ * @param xt time struct to write to
+ * @param date_str String to parse
+ */
+APR_DECLARE(apr_status_t) apr_parse_rfc822_date(apr_time_exp_t *xt, char 
*date_str);
+
 /** length of a CTIME date */
 #define APR_CTIME_LEN (25)
 /**



Reply via email to