Author: jukka
Date: Wed Aug 11 22:07:03 2010
New Revision: 984605
URL: http://svn.apache.org/viewvc?rev=984605&view=rev
Log:
TIKA-468: Missing Silde-Count metadata for PPT files
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java?rev=984605&r1=984604&r2=984605&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
Wed Aug 11 22:07:03 2010
@@ -98,7 +98,9 @@ class SummaryExtractor {
set(Metadata.EDIT_TIME, summary.getEditTime());
set(Metadata.LAST_SAVED, summary.getLastSaveDateTime());
set(Metadata.PAGE_COUNT, summary.getPageCount());
- metadata.set(PagedText.N_PAGES, summary.getPageCount());
+ if (summary.getPageCount() > 0) {
+ metadata.set(PagedText.N_PAGES, summary.getPageCount());
+ }
set(Metadata.SECURITY, summary.getSecurity());
set(Metadata.WORD_COUNT, summary.getWordCount());
set(Metadata.LAST_PRINTED, summary.getLastPrinted());
@@ -109,6 +111,10 @@ class SummaryExtractor {
set(Metadata.MANAGER, summary.getManager());
set(Metadata.LANGUAGE, getLanguage(summary));
set(Metadata.CATEGORY, summary.getCategory());
+ set(Metadata.SLIDE_COUNT, summary.getSlideCount());
+ if (summary.getSlideCount() > 0) {
+ metadata.set(PagedText.N_PAGES, summary.getSlideCount());
+ }
}
private String getLanguage(DocumentSummaryInformation summary) {
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java?rev=984605&r1=984604&r2=984605&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
Wed Aug 11 22:07:03 2010
@@ -107,7 +107,11 @@ public class MetadataExtractor {
addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
addProperty(metadata, Metadata.PAGE_COUNT, propsHolder.getPages());
- metadata.set(PagedText.N_PAGES, propsHolder.getPages());
+ if (propsHolder.getPages() > 0) {
+ metadata.set(PagedText.N_PAGES, propsHolder.getPages());
+ } else if (propsHolder.getSlides() > 0) {
+ metadata.set(PagedText.N_PAGES, propsHolder.getSlides());
+ }
addProperty(metadata, Metadata.PARAGRAPH_COUNT,
propsHolder.getParagraphs());
addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder
.getPresentationFormat());