Hello,

it seems that the class PdfDate does not comply to the standard when parsing dates.

pdf_reference_1_7.pdf (https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf) has as example "D:199812231952-08'00'" (3.8.3 Dates) as date, but PdfDate does not parse it correctly.

See the attached patch to make, PdfDate conformant.

A possible test would look like

----
struct name_date {
        std::string name;
        std::string date;
};

const name_date data[] = {
{"sample from pdf_reference_1_7.pdf", "D:199812231952-08'00'"}, // UTC 1998-12-24 03:52:00 {"all fields set", "D:20201223195200-08'00'"}, // UTC 2020-12-23 03:52:00
    {"set year", "D:2020"},   // UTC 2020-01-01 00:00:00
    {"set year, month", "D:202001"},   // UTC 2020-01-01 00:00:00
    {"set year, month, day", "D:20200101"},   // UTC 202001-01 00:00:00
{"only year and timezone set", "D:2020-08'00'"}, // UTC 2020-01-01 08:00:00
    {"berlin", "D:20200315120820+01'00'"},   // UTC 2020-03-15 11:08:20
};

for (const auto& d : data) {
        std::cout << "Parse " << d.name << "\n";
        assert(PoDoFo::PdfDate(d.date).IsValid());
}
----

but I was not sure where to put it.
Index: src/podofo/base/PdfDate.cpp
===================================================================
--- src/podofo/base/PdfDate.cpp	(revision 2016)
+++ src/podofo/base/PdfDate.cpp	(working copy)
@@ -47,17 +47,14 @@
 }
 
 PdfDate::PdfDate( const time_t & t )
-    : m_bValid( false )
+    : m_time( t ), m_bValid( false )
 {
-    m_time = t;
     CreateStringRepresentation();
 }
 
 PdfDate::PdfDate( const PdfString & sDate )
-    : m_bValid( false )
+    : m_time( -1 ), m_bValid( false )
 {
-    m_time = -1;
-
     if ( !sDate.IsValid() ) 
     {
         m_szDate[0] = 0;
@@ -66,11 +63,8 @@
 
     strncpy(m_szDate,sDate.GetString(),PDF_DATE_BUFFER_SIZE);
 
-    struct tm _tm;
-    memset( &_tm, 0, sizeof(_tm) );
-    int nZoneShift = 0;
-    int nZoneHour = 0;
-    int nZoneMin = 0;
+    struct tm _tm{};
+    _tm.tm_mday = 1;
 
     const char * pszDate = sDate.GetString();
     if ( pszDate == NULL ) return;
@@ -79,51 +73,53 @@
         if ( *pszDate++ != ':' ) return;
     }
 
-    if ( ParseFixLenNumber(pszDate,4,0,9999,_tm.tm_year) == false ) 
+    // year is not optional
+    if ( !ParseFixLenNumber(pszDate,4,0,9999,_tm.tm_year) )
         return;
-
     _tm.tm_year -= 1900;
-    if ( *pszDate != '\0' ) {
-        if ( ParseFixLenNumber(pszDate,2,1,12,_tm.tm_mon) == false ) 
-            return;
 
+    // all other values are optional, if not set they are 0-init (except mday)
+    if ( ParseFixLenNumber(pszDate,2,1,12,_tm.tm_mon) )
+    {
         _tm.tm_mon--;
-        if ( *pszDate != '\0' ) {
-            if ( ParseFixLenNumber(pszDate,2,1,31,_tm.tm_mday) == false ) return;
-            if ( *pszDate != '\0' ) {
-                if ( ParseFixLenNumber(pszDate,2,0,23,_tm.tm_hour) == false ) return;
-                if ( *pszDate != '\0' ) {
-                    if ( ParseFixLenNumber(pszDate,2,0,59,_tm.tm_min) == false ) return;
-                    if ( *pszDate != '\0' ) {
-                        if ( ParseFixLenNumber(pszDate,2,0,59,_tm.tm_sec) == false ) return;
-                        if ( *pszDate != '\0' ) {
-                            switch(*pszDate++) {
-                            case '+':
-                                nZoneShift = -1;
-                                break;
-                            case '-':
-                                nZoneShift = 1;
-                                break;
-                            case 'Z':
-                                nZoneShift = 0;
-                                break;
-                            default:
-                                return;
-                            }
-                            if ( ParseFixLenNumber(pszDate,2,0,59,nZoneHour) == false ) return;
-                            if ( *pszDate == '\'' ) {
-                                pszDate++;
-                                if ( ParseFixLenNumber(pszDate,2,0,59,nZoneMin) == false ) return;
-                                if ( *pszDate != '\'' ) return;
-                                pszDate++;
-                            }
-                        }
-                    }
-                }
+        if ( ParseFixLenNumber(pszDate,2,1,31,_tm.tm_mday) )
+        {
+            if ( ParseFixLenNumber(pszDate,2,0,23,_tm.tm_hour) )
+            {
+                if ( ParseFixLenNumber(pszDate,2,0,59,_tm.tm_min) )
+                    ParseFixLenNumber(pszDate,2,0,59,_tm.tm_sec);
             }
         }
     }
 
+    // zone is optional
+    int nZoneShift = 0;
+    int nZoneHour = 0;
+    int nZoneMin = 0;
+
+    if (*pszDate == 'Z') {
+        ++pszDate;
+    } else if (*pszDate != '\0') {
+        switch (*pszDate++) {
+        case '+':
+            nZoneShift = -1;
+            break;
+        case '-':
+            nZoneShift = 1;
+            break;
+        default:
+            return;
+        }
+        if ( !ParseFixLenNumber(pszDate,2,0,59,nZoneHour) ) return;
+        if (*pszDate == '\'') {
+            pszDate++;
+            if ( !ParseFixLenNumber(pszDate,2,0,59,nZoneMin) ) return;
+            if (*pszDate != '\'')
+                return;
+            pszDate++;
+        }
+    }
+
     if ( *pszDate != '\0' ) 
     {
         return;
@@ -206,9 +202,9 @@
 }
 
 
-bool PdfDate::ParseFixLenNumber(const char *&in, unsigned int length, int min, int max, int &ret)
+bool PdfDate::ParseFixLenNumber(const char *&in, unsigned int length, int min, int max, int &ret_)
 {
-    ret = 0;
+    int ret = 0;
     for(unsigned int i=0;i<length;i++)
     {
         if ( in == NULL || !isdigit(*in)) return false;
@@ -216,6 +212,7 @@
         in++;
     }
     if ( ret < min || ret > max ) return false;
+    ret_ = ret;
     return true;
 }
 
Index: src/podofo/base/PdfDate.h
===================================================================
--- src/podofo/base/PdfDate.h	(revision 2016)
+++ src/podofo/base/PdfDate.h	(working copy)
@@ -124,7 +124,7 @@
      *  \param length of number to read 
      *  \param min minimal value of number
      *  \param max maximal value of number
-     *  \param ret parsed number
+     *  \param ret parsed number (updated only on success)
      */
     bool ParseFixLenNumber(const char *&in, unsigned int length, int min, int max, int &ret);
 
_______________________________________________
Podofo-users mailing list
Podofo-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/podofo-users

Reply via email to