Hello,
it seems that the class PdfDate does not comply to the standard when
parsing dates.
pdf_reference_1_7.pdf
(https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf)
has as example "D:199812231952-08'00'" (3.8.3 Dates) as date, but
PdfDate does not parse it correctly.
See the attached patch to make, PdfDate conformant.
A possible test would look like
----
struct name_date {
std::string name;
std::string date;
};
const name_date data[] = {
{"sample from pdf_reference_1_7.pdf", "D:199812231952-08'00'"},
// UTC 1998-12-24 03:52:00
{"all fields set", "D:20201223195200-08'00'"}, // UTC 2020-12-23
03:52:00
{"set year", "D:2020"}, // UTC 2020-01-01 00:00:00
{"set year, month", "D:202001"}, // UTC 2020-01-01 00:00:00
{"set year, month, day", "D:20200101"}, // UTC 202001-01 00:00:00
{"only year and timezone set", "D:2020-08'00'"}, // UTC
2020-01-01 08:00:00
{"berlin", "D:20200315120820+01'00'"}, // UTC 2020-03-15 11:08:20
};
for (const auto& d : data) {
std::cout << "Parse " << d.name << "\n";
assert(PoDoFo::PdfDate(d.date).IsValid());
}
----
but I was not sure where to put it.
Index: src/podofo/base/PdfDate.cpp
===================================================================
--- src/podofo/base/PdfDate.cpp (revision 2016)
+++ src/podofo/base/PdfDate.cpp (working copy)
@@ -47,17 +47,14 @@
}
PdfDate::PdfDate( const time_t & t )
- : m_bValid( false )
+ : m_time( t ), m_bValid( false )
{
- m_time = t;
CreateStringRepresentation();
}
PdfDate::PdfDate( const PdfString & sDate )
- : m_bValid( false )
+ : m_time( -1 ), m_bValid( false )
{
- m_time = -1;
-
if ( !sDate.IsValid() )
{
m_szDate[0] = 0;
@@ -66,11 +63,8 @@
strncpy(m_szDate,sDate.GetString(),PDF_DATE_BUFFER_SIZE);
- struct tm _tm;
- memset( &_tm, 0, sizeof(_tm) );
- int nZoneShift = 0;
- int nZoneHour = 0;
- int nZoneMin = 0;
+ struct tm _tm{};
+ _tm.tm_mday = 1;
const char * pszDate = sDate.GetString();
if ( pszDate == NULL ) return;
@@ -79,51 +73,53 @@
if ( *pszDate++ != ':' ) return;
}
- if ( ParseFixLenNumber(pszDate,4,0,9999,_tm.tm_year) == false )
+ // year is not optional
+ if ( !ParseFixLenNumber(pszDate,4,0,9999,_tm.tm_year) )
return;
-
_tm.tm_year -= 1900;
- if ( *pszDate != '\0' ) {
- if ( ParseFixLenNumber(pszDate,2,1,12,_tm.tm_mon) == false )
- return;
+ // all other values are optional, if not set they are 0-init (except mday)
+ if ( ParseFixLenNumber(pszDate,2,1,12,_tm.tm_mon) )
+ {
_tm.tm_mon--;
- if ( *pszDate != '\0' ) {
- if ( ParseFixLenNumber(pszDate,2,1,31,_tm.tm_mday) == false ) return;
- if ( *pszDate != '\0' ) {
- if ( ParseFixLenNumber(pszDate,2,0,23,_tm.tm_hour) == false ) return;
- if ( *pszDate != '\0' ) {
- if ( ParseFixLenNumber(pszDate,2,0,59,_tm.tm_min) == false ) return;
- if ( *pszDate != '\0' ) {
- if ( ParseFixLenNumber(pszDate,2,0,59,_tm.tm_sec) == false ) return;
- if ( *pszDate != '\0' ) {
- switch(*pszDate++) {
- case '+':
- nZoneShift = -1;
- break;
- case '-':
- nZoneShift = 1;
- break;
- case 'Z':
- nZoneShift = 0;
- break;
- default:
- return;
- }
- if ( ParseFixLenNumber(pszDate,2,0,59,nZoneHour) == false ) return;
- if ( *pszDate == '\'' ) {
- pszDate++;
- if ( ParseFixLenNumber(pszDate,2,0,59,nZoneMin) == false ) return;
- if ( *pszDate != '\'' ) return;
- pszDate++;
- }
- }
- }
- }
+ if ( ParseFixLenNumber(pszDate,2,1,31,_tm.tm_mday) )
+ {
+ if ( ParseFixLenNumber(pszDate,2,0,23,_tm.tm_hour) )
+ {
+ if ( ParseFixLenNumber(pszDate,2,0,59,_tm.tm_min) )
+ ParseFixLenNumber(pszDate,2,0,59,_tm.tm_sec);
}
}
}
+ // zone is optional
+ int nZoneShift = 0;
+ int nZoneHour = 0;
+ int nZoneMin = 0;
+
+ if (*pszDate == 'Z') {
+ ++pszDate;
+ } else if (*pszDate != '\0') {
+ switch (*pszDate++) {
+ case '+':
+ nZoneShift = -1;
+ break;
+ case '-':
+ nZoneShift = 1;
+ break;
+ default:
+ return;
+ }
+ if ( !ParseFixLenNumber(pszDate,2,0,59,nZoneHour) ) return;
+ if (*pszDate == '\'') {
+ pszDate++;
+ if ( !ParseFixLenNumber(pszDate,2,0,59,nZoneMin) ) return;
+ if (*pszDate != '\'')
+ return;
+ pszDate++;
+ }
+ }
+
if ( *pszDate != '\0' )
{
return;
@@ -206,9 +202,9 @@
}
-bool PdfDate::ParseFixLenNumber(const char *&in, unsigned int length, int min, int max, int &ret)
+bool PdfDate::ParseFixLenNumber(const char *&in, unsigned int length, int min, int max, int &ret_)
{
- ret = 0;
+ int ret = 0;
for(unsigned int i=0;i<length;i++)
{
if ( in == NULL || !isdigit(*in)) return false;
@@ -216,6 +212,7 @@
in++;
}
if ( ret < min || ret > max ) return false;
+ ret_ = ret;
return true;
}
Index: src/podofo/base/PdfDate.h
===================================================================
--- src/podofo/base/PdfDate.h (revision 2016)
+++ src/podofo/base/PdfDate.h (working copy)
@@ -124,7 +124,7 @@
* \param length of number to read
* \param min minimal value of number
* \param max maximal value of number
- * \param ret parsed number
+ * \param ret parsed number (updated only on success)
*/
bool ParseFixLenNumber(const char *&in, unsigned int length, int min, int max, int &ret);
_______________________________________________
Podofo-users mailing list
Podofo-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/podofo-users