Author: jukka
Date: Wed Aug 11 21:48:50 2010
New Revision: 984599

URL: http://svn.apache.org/viewvc?rev=984599&view=rev
Log:
TIKA-476: Add page count to metadata

Use the xmpPTg:NPages property for the MS Office page count metadata

Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java?rev=984599&r1=984598&r2=984599&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
 Wed Aug 11 21:48:50 2010
@@ -32,6 +32,7 @@ import org.apache.poi.poifs.filesystem.D
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.PagedText;
 import org.apache.tika.metadata.Property;
 
 /**
@@ -97,6 +98,7 @@ class SummaryExtractor {
         set(Metadata.EDIT_TIME, summary.getEditTime());
         set(Metadata.LAST_SAVED, summary.getLastSaveDateTime());
         set(Metadata.PAGE_COUNT, summary.getPageCount());
+        metadata.set(PagedText.N_PAGES, summary.getPageCount());
         set(Metadata.SECURITY, summary.getSecurity());
         set(Metadata.WORD_COUNT, summary.getWordCount());
         set(Metadata.LAST_PRINTED, summary.getLastPrinted());

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java?rev=984599&r1=984598&r2=984599&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
 Wed Aug 11 21:48:50 2010
@@ -25,6 +25,7 @@ import org.apache.poi.openxml4j.opc.inte
 import org.apache.poi.openxml4j.util.Nullable;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.PagedText;
 import org.apache.tika.metadata.Property;
 import 
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
 
@@ -106,6 +107,7 @@ public class MetadataExtractor {
         addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
         addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
         addProperty(metadata, Metadata.PAGE_COUNT, propsHolder.getPages());
+        metadata.set(PagedText.N_PAGES, propsHolder.getPages());
         addProperty(metadata, Metadata.PARAGRAPH_COUNT, 
propsHolder.getParagraphs());
         addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder
                 .getPresentationFormat());


Reply via email to