Author: jukka
Date: Wed Aug 11 21:48:50 2010
New Revision: 984599
URL: http://svn.apache.org/viewvc?rev=984599&view=rev
Log:
TIKA-476: Add page count to metadata
Use the xmpPTg:NPages property for the MS Office page count metadata
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java?rev=984599&r1=984598&r2=984599&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
Wed Aug 11 21:48:50 2010
@@ -32,6 +32,7 @@ import org.apache.poi.poifs.filesystem.D
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
/**
@@ -97,6 +98,7 @@ class SummaryExtractor {
set(Metadata.EDIT_TIME, summary.getEditTime());
set(Metadata.LAST_SAVED, summary.getLastSaveDateTime());
set(Metadata.PAGE_COUNT, summary.getPageCount());
+ metadata.set(PagedText.N_PAGES, summary.getPageCount());
set(Metadata.SECURITY, summary.getSecurity());
set(Metadata.WORD_COUNT, summary.getWordCount());
set(Metadata.LAST_PRINTED, summary.getLastPrinted());
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java?rev=984599&r1=984598&r2=984599&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
Wed Aug 11 21:48:50 2010
@@ -25,6 +25,7 @@ import org.apache.poi.openxml4j.opc.inte
import org.apache.poi.openxml4j.util.Nullable;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
import
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
@@ -106,6 +107,7 @@ public class MetadataExtractor {
addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
addProperty(metadata, Metadata.PAGE_COUNT, propsHolder.getPages());
+ metadata.set(PagedText.N_PAGES, propsHolder.getPages());
addProperty(metadata, Metadata.PARAGRAPH_COUNT,
propsHolder.getParagraphs());
addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder
.getPresentationFormat());