According to Joe R. Jah:
> On Fri, 23 Nov 2001, Gilles Detillieux wrote:
> > So, unless there are objections from other developers, I'm planning to
> > put this code into 3.1.6's htdig/Document.cc next week, as well as
> > eventually into 3.2.0b4's htlib/HtDateTime.cc, to clear up all the
> > problems we've had.  I think it will allow us to completely do away
> > with strptime and mktime.
> > 
> > I'd appreciate it if you'd have a look at this code and offer your
> > critique.
> 
> (How) can it be applied as a patch to the last/next snapshot?

Like this...  Use "patch -p0 < this-message" in the htdig-3.1.6 source
directory from the latest snapshot to use the new date parsing code.
I'll probably post it to CVS today or tomorrow.


--- htdig/Document.cc.orig      Fri Sep 14 09:21:05 2001
+++ htdig/Document.cc   Tue Nov 27 15:06:08 2001
@@ -184,62 +184,206 @@ Document::Url(char *u)
 }
 
 
-//*****************************************************************************
-// time_t Document::getdate(char *datestring)
-//   Convert a RFC850 date string into a time value
+#define EPOCH  1970
+
+//
+// time_t parsedate(char *date)
+//   - converts RFC850 or RFC1123 date string into a time value
 //
 time_t
-Document::getdate(char *datestring)
+parsedate(char *date)
 {
-    struct tm   tm;
-    time_t      ret;    
-    char        *s;    
+    char       *s;
+    int                day, month, year, hour, minute, second;
 
     //
     // Two possible time designations:
-    //      Tuesday, 01-Jul-97 16:48:02 GMT
+    //      Tuesday, 01-Jul-97 16:48:02 GMT     (RFC850)
     // or
-    //      Thu, 01 May 1997 00:40:42 GMT
+    //      Thu, 01 May 1997 00:40:42 GMT       (RFC1123)
     //
-    // We strip off the weekday before sending to strptime
+    // We strip off the weekday because we don't need it, and
     // because some servers send invalid weekdays!
     // (Some don't even send a weekday, but we'll be flexible...)
- 
-    s = strchr(datestring, ',');
-    if (s)
-        s++;
+
+    s = date;
+    while (*s && *s != ',')
+       s++;
+    if (*s)
+       s++;
     else
-        s = datestring;
+       s = date;
     while (isspace(*s))
-        s++;
-    if (strchr(s, '-') && mystrptime(s, "%d-%b-%y %T", &tm) ||
-            mystrptime(s, "%d %b %Y %T", &tm))
-      {
-       // correct for mystrptime, if %Y format saw only a 2 digit year
-       if (tm.tm_year < 0)
-         tm.tm_year += 1900;
-       tm.tm_yday = 0; // clear these to prevent problems in strftime()
-       tm.tm_wday = 0;
-       
-       if (debug > 2)
-         {
-           cout << "Translated " << datestring << " to ";
-           char        buffer[100];
-           // Leave out %a for weekday, because we don't set it anymore...
-           //strftime(buffer, sizeof(buffer), "%a, %d %b %Y %T", &tm);
-           // Let's just do away with strftime() altogether for this...
-           //strftime(buffer, sizeof(buffer), "%d %b %Y %T", &tm);
-           sprintf(buffer, "%4d-%02d-%02d %02d:%02d:%02d", tm.tm_year+1900,
-               tm.tm_mon+1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
-           cout << buffer << " (" << tm.tm_year << ")" << endl;
-         }
-#if HAVE_TIMEGM
-       ret = timegm(&tm);
-#else
-       ret = mytimegm(&tm);
-#endif
-      }
-    else
+       s++;
+
+    // get day...
+    if (!isdigit(*s))
+       return 0;
+    day = 0;
+    while (isdigit(*s))
+       day = day * 10 + (*s++ - '0');
+    if (day > 31)
+       return 0;
+    while (*s == '-' || isspace(*s))
+       s++;
+
+    // get month...
+    switch (*s++) {
+    case 'J': case 'j':
+       switch (*s++) {
+       case 'A': case 'a':
+           month = 1;
+           s++;
+           break;
+       case 'U': case 'u':
+           switch (*s++) {
+           case 'N': case 'n':
+               month = 6;
+               break;
+           case 'L': case 'l':
+               month = 7;
+               break;
+           default:
+               return 0;
+           }
+           break;
+       default:
+           return 0;
+       }
+       break;
+    case 'F': case 'f':
+       month = 2;
+       s += 2;
+       break;
+    case 'M': case 'm':
+       switch (*s++) {
+       case 'A': case 'a':
+           switch (*s++) {
+           case 'R': case 'r':
+               month = 3;
+               break;
+           case 'Y': case 'y':
+               month = 5;
+               break;
+           default:
+               return 0;
+           }
+           break;
+       default:
+           return 0;
+       }
+       break;
+    case 'A': case 'a':
+       switch (*s++) {
+       case 'P': case 'p':
+           month = 4;
+           s++;
+           break;
+       case 'U': case 'u':
+           month = 8;
+           s++;
+           break;
+       default:
+           return 0;
+       }
+       break;
+    case 'S': case 's':
+       month = 9;
+       s += 2;
+       break;
+    case 'O': case 'o':
+       month = 10;
+       s += 2;
+       break;
+    case 'N': case 'n':
+       month = 11;
+       s += 2;
+       break;
+    case 'D': case 'd':
+       month = 12;
+       s += 2;
+       break;
+    default:
+       return 0;
+    }
+    while (*s == '-' || isspace(*s))
+       s++;
+
+    // get year...
+    if (!isdigit(*s))
+       return 0;
+    year = 0;
+    while (isdigit(*s))
+       year = year * 10 + (*s++ - '0');
+    if (year < 69)
+       year += 2000;
+    else if (year < 1900)
+       year += 1900;
+    else if (year >= 19100)    // seen some programs do it, why not check?
+       year -= (19100-2000);
+    while (isspace(*s))
+       s++;
+
+    // get hour...
+    if (!isdigit(*s))
+       return 0;
+    hour = 0;
+    while (isdigit(*s))
+       hour = hour * 10 + (*s++ - '0');
+    if (hour > 23)
+       return 0;
+    while (*s == ':' || isspace(*s))
+       s++;
+
+    // get minute...
+    if (!isdigit(*s))
+       return 0;
+    minute = 0;
+    while (isdigit(*s))
+       minute = minute * 10 + (*s++ - '0');
+    if (minute > 59)
+       return 0;
+    while (*s == ':' || isspace(*s))
+       s++;
+
+    // get second...
+    if (!isdigit(*s))
+       return 0;
+    second = 0;
+    while (isdigit(*s))
+       second = second * 10 + (*s++ - '0');
+    if (second > 59)
+       return 0;
+    while (*s == ':' || isspace(*s))
+       s++;
+
+    //
+    // Calculate date as seconds since 01 Jan 1970 00:00:00 GMT
+    // This is based somewhat on the date calculation code in NetBSD's
+    // cd9660_node.c code, for which I was unable to find a reference.
+    // It works, though!
+    //
+    return (time_t) (((((367L*year - 7L*(year+(month+9)/12)/4
+                                  - 3L*(((year)+((month)+9)/12-1)/100+1)/4
+                                  + 275L*(month)/9 + day) -
+                       (367L*EPOCH - 7L*(EPOCH+(1+9)/12)/4
+                                  - 3L*((EPOCH+(1+9)/12-1)/100+1)/4
+                                  + 275L*1/9 + 1))
+                      * 24 + hour) * 60 + minute) * 60 + second);
+}
+
+
+//*****************************************************************************
+// time_t Document::getdate(char *datestring)
+//   Convert a RFC850 date string into a time value
+//
+time_t
+Document::getdate(char *datestring)
+{
+    time_t      ret;    
+
+    ret = parsedate(datestring);
+    if (!ret)
       {
        if (debug > 2)
          {
@@ -249,13 +393,12 @@ Document::getdate(char *datestring)
        ret = time(0); // This isn't the best, but it works. *fix*
       }
     if (debug > 2)
-    {
-        cout << "And converted to ";
-        struct tm *tm2 = gmtime(&ret);
+      {
+        struct tm *tm = gmtime(&ret);
         char    buffer[100];
-        strftime(buffer, sizeof(buffer), "%a, %d %b %Y %T", tm2);
-        cout << buffer << endl;
-    }
+        strftime(buffer, sizeof(buffer), "%a, %d %b %Y %T", tm);
+       cout << "Converted " << datestring << " to " << buffer << endl;
+      }
     return ret;
 }
 


-- 
Gilles R. Detillieux              E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930

_______________________________________________
htdig-dev mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/htdig-dev

Reply via email to