Author: rgauss
Date: Mon Jul 30 19:45:08 2012
New Revision: 1367249
URL: http://svn.apache.org/viewvc?rev=1367249&view=rev
Log:
TIKA-963: Backwards Compatibility for Metadata.DATE is Incorrect
- Added tests for backwards compatibility for Metadata.DATE and
Metadata.CREATION_DATE
- Moved Metadata.DATE to be part of the TikaCoreProperties.MODIFIED
composite property
- Added setting of Metadata.DATE to PRTParser
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
Mon Jul 30 19:45:08 2012
@@ -152,16 +152,17 @@ public interface TikaCoreProperties {
public static final Property CREATED =
Property.composite(DublinCore.CREATED,
new Property[] {
Office.CREATION_DATE,
- MSOffice.CREATION_DATE,
- Metadata.DATE
+ MSOffice.CREATION_DATE
});
/**
* @see DublinCore#MODIFIED
+ * @see Metadata#DATE
* @see Office#SAVE_DATE
*/
public static final Property MODIFIED =
Property.composite(DublinCore.MODIFIED,
new Property[] {
+ Metadata.DATE,
Office.SAVE_DATE,
MSOffice.LAST_SAVED,
Property.internalText(Metadata.MODIFIED),
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
Mon Jul 30 19:45:08 2012
@@ -86,6 +86,8 @@ public class PRTParser extends AbstractP
"-" + dateStr.substring(6,8) + "T" + dateStr.substring(8,10) +
":" +
dateStr.substring(10, 12) + ":00";
metadata.set(TikaCoreProperties.CREATED, formattedDate);
+ // TODO Metadata.DATE is used as modified, should it be here?
+ metadata.set(Metadata.DATE, formattedDate);
}
metadata.set(Metadata.CONTENT_TYPE, PRT_MIME_TYPE);
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
Mon Jul 30 19:45:08 2012
@@ -53,9 +53,11 @@ public class ExcelParserTest extends Tes
// Mon Oct 01 17:13:56 BST 2007
assertEquals("2007-10-01T16:13:56Z",
metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2007-10-01T16:13:56Z",
metadata.get(Metadata.CREATION_DATE));
// Mon Oct 01 17:31:43 BST 2007
assertEquals("2007-10-01T16:31:43Z",
metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2007-10-01T16:31:43Z", metadata.get(Metadata.DATE));
String content = handler.toString();
assertTrue(content.contains("Sample Excel Worksheet"));
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
Mon Jul 30 19:45:08 2012
@@ -208,7 +208,9 @@ public class PowerPointParserTest extend
assertEquals("EJ04325S",
metadata.get(TikaCoreProperties.MODIFIER));
assertEquals("EJ04325S",
metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2011-08-22T13:32:58Z",
metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2011-08-22T13:32:58Z", metadata.get(Metadata.DATE));
assertEquals("2011-08-22T13:30:53Z",
metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2011-08-22T13:30:53Z",
metadata.get(Metadata.CREATION_DATE));
assertEquals("1", metadata.get(Office.SLIDE_COUNT));
assertEquals("3", metadata.get(Office.WORD_COUNT));
assertEquals("Test extraction properties pptx",
metadata.get(TikaCoreProperties.TITLE));
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java
Mon Jul 30 19:45:08 2012
@@ -78,7 +78,9 @@ public class ProjectParserTest extends T
assertEquals("CompanyA", metadata.get(OfficeOpenXMLExtended.COMPANY));
assertEquals("2011-11-24T10:58:00Z",
metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2011-11-24T10:58:00Z",
metadata.get(Metadata.CREATION_DATE));
assertEquals("2011-11-24T11:31:00Z",
metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2011-11-24T11:31:00Z", metadata.get(Metadata.DATE));
// Custom Project metadata is present with prefix
assertEquals("0%", metadata.get("custom:% Complete"));
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
Mon Jul 30 19:45:08 2012
@@ -286,7 +286,9 @@ public class WordParserTest extends Tika
assertEquals("Etienne Jouvin",
metadata.get(TikaCoreProperties.MODIFIER));
assertEquals("Etienne Jouvin",
metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2012-01-03T22:14:00Z",
metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2012-01-03T22:14:00Z", metadata.get(Metadata.DATE));
assertEquals("2010-10-05T09:03:00Z",
metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2010-10-05T09:03:00Z",
metadata.get(Metadata.CREATION_DATE));
assertEquals("Microsoft Office
Word",metadata.get(OfficeOpenXMLExtended.APPLICATION));
assertEquals("1", metadata.get(Office.PAGE_COUNT));
assertEquals("2", metadata.get(Office.WORD_COUNT));
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
Mon Jul 30 19:45:08 2012
@@ -815,7 +815,9 @@ public class OOXMLParserTest extends Tik
assertEquals("EJ04325S",
metadata.get(TikaCoreProperties.MODIFIER));
assertEquals("EJ04325S",
metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2011-08-22T13:30:53Z",
metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2011-08-22T13:30:53Z",
metadata.get(Metadata.CREATION_DATE));
assertEquals("2011-08-22T13:32:49Z",
metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2011-08-22T13:32:49Z", metadata.get(Metadata.DATE));
assertEquals("1", metadata.get(Office.SLIDE_COUNT));
assertEquals("3", metadata.get(Office.WORD_COUNT));
assertEquals("Test extraction properties pptx",
metadata.get(TikaCoreProperties.TITLE));
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
Mon Jul 30 19:45:08 2012
@@ -57,7 +57,9 @@ public class MP4ParserTest extends TestC
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
assertEquals("2012-01-28T18:39:18Z",
metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2012-01-28T18:39:18Z",
metadata.get(Metadata.CREATION_DATE));
assertEquals("2012-01-28T18:40:25Z",
metadata.get(TikaCoreProperties.MODIFIED));
+ assertEquals("2012-01-28T18:40:25Z", metadata.get(Metadata.DATE));
// Check the textual contents
String content = handler.toString();
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java?rev=1367249&r1=1367248&r2=1367249&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
Mon Jul 30 19:45:08 2012
@@ -88,7 +88,7 @@ public class ODFParserTest extends TikaT
// Check date metadata, both old-style and new-style
assertEquals("2007-09-14T11:07:10",
metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("2007-09-14T11:07:10",
metadata.get(Metadata.MODIFIED));
- assertEquals("2007-09-14T11:06:08", metadata.get(Metadata.DATE));
+ assertEquals("2007-09-14T11:07:10", metadata.get(Metadata.DATE));
assertEquals("2007-09-14T11:06:08",
metadata.get(TikaCoreProperties.CREATED));
assertEquals("2007-09-14T11:06:08",
metadata.get(Metadata.CREATION_DATE));