This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 49ed3099f557349531076f7265db1b453a9627fb Author: TALLISON <[email protected]> AuthorDate: Thu Sep 6 11:17:13 2018 -0400 TIKA-2552 -- upgrade to POI 4.0.0 -- fix merge conflicts --- .../parser/microsoft/ooxml/MetadataExtractor.java | 109 +++++++++++++-------- 1 file changed, 70 insertions(+), 39 deletions(-) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java index 30f2975..e5da8ce 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java @@ -16,17 +16,12 @@ */ package org.apache.tika.parser.microsoft.ooxml; -import java.math.BigDecimal; -import java.util.Date; - -import org.apache.poi.POIXMLProperties.CoreProperties; -import org.apache.poi.POIXMLProperties.CustomProperties; -import org.apache.poi.POIXMLProperties.ExtendedProperties; -import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.ooxml.POIXMLProperties; +import org.apache.poi.ooxml.extractor.POIXMLTextExtractor; import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; -import org.apache.poi.openxml4j.util.Nullable; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.MSOffice; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Office; import org.apache.tika.metadata.OfficeOpenXMLCore; @@ -42,6 +37,10 @@ import org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException; import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties; +import java.math.BigDecimal; +import java.util.Date; +import java.util.Optional; + /** * OOXML metadata extractor. * <p/> @@ -70,7 +69,7 @@ public class MetadataExtractor { } } - private void extractMetadata(CoreProperties properties, Metadata metadata) { + private void extractMetadata(POIXMLProperties.CoreProperties properties, Metadata metadata) { PackagePropertiesPart propsHolder = properties .getUnderlyingProperties(); @@ -87,7 +86,7 @@ public class MetadataExtractor { .getIdentifierProperty()); addProperty(metadata, OfficeOpenXMLCore.SUBJECT, propsHolder.getSubjectProperty()); - addProperty(metadata, Office.KEYWORDS, propsHolder + addProperty(metadata, TikaCoreProperties.KEYWORDS, propsHolder .getKeywordsProperty()); setProperty(metadata, TikaCoreProperties.LANGUAGE, propsHolder .getLanguageProperty()); @@ -99,13 +98,23 @@ public class MetadataExtractor { .getModifiedProperty()); setProperty(metadata, OfficeOpenXMLCore.REVISION, propsHolder .getRevisionProperty()); - + // TODO: Move to OO subject in Tika 2.0 + setProperty(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, + propsHolder.getSubjectProperty()); setProperty(metadata, TikaCoreProperties.TITLE, propsHolder.getTitleProperty()); setProperty(metadata, OfficeOpenXMLCore.VERSION, propsHolder.getVersionProperty()); + // Legacy Tika-1.0 style stats + // TODO Remove these in Tika 2.0 + setProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty()); + setProperty(metadata, Metadata.CONTENT_STATUS, propsHolder + .getContentStatusProperty()); + setProperty(metadata, Metadata.REVISION_NUMBER, propsHolder + .getRevisionProperty()); + setProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty()); } - private void extractMetadata(ExtendedProperties properties, + private void extractMetadata(POIXMLProperties.ExtendedProperties properties, Metadata metadata) { CTProperties propsHolder = properties.getUnderlyingProperties(); @@ -143,9 +152,26 @@ public class MetadataExtractor { setProperty(metadata, Office.WORD_COUNT, propsHolder.getWords()); setProperty(metadata, Office.CHARACTER_COUNT, propsHolder.getCharacters()); setProperty(metadata, Office.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces()); + + // Legacy Tika-1.0 style stats + // TODO Remove these in Tika 2.0 + setProperty(metadata, Metadata.APPLICATION_NAME, propsHolder.getApplication()); + setProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder.getAppVersion()); + setProperty(metadata, Metadata.MANAGER, propsHolder.getManager()); + setProperty(metadata, Metadata.NOTES, propsHolder.getNotes()); + setProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder.getPresentationFormat()); + setProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate()); + setProperty(metadata, Metadata.TOTAL_TIME, totalTime); + setProperty(metadata, MSOffice.PAGE_COUNT, propsHolder.getPages()); + setProperty(metadata, MSOffice.SLIDE_COUNT, propsHolder.getSlides()); + setProperty(metadata, MSOffice.PARAGRAPH_COUNT, propsHolder.getParagraphs()); + setProperty(metadata, MSOffice.LINE_COUNT, propsHolder.getLines()); + setProperty(metadata, MSOffice.WORD_COUNT, propsHolder.getWords()); + setProperty(metadata, MSOffice.CHARACTER_COUNT, propsHolder.getCharacters()); + setProperty(metadata, MSOffice.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces()); } - private void extractMetadata(CustomProperties properties, + private void extractMetadata(POIXMLProperties.CustomProperties properties, Metadata metadata) { org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties props = properties.getUnderlyingProperties(); @@ -229,35 +255,40 @@ public class MetadataExtractor { } } - private <T> void setProperty(Metadata metadata, Property property, Nullable<T> nullableValue) { - T value = nullableValue.getValue(); - if (value != null) { - if (value instanceof Date) { - metadata.set(property, (Date) value); - } else if (value instanceof String) { - metadata.set(property, (String) value); - } else if (value instanceof Integer) { - metadata.set(property, (Integer) value); - } else if (value instanceof Double) { - metadata.set(property, (Double) value); - } + private <T> void setProperty(Metadata metadata, Property property, Optional<T> nullableValue) { + if (!nullableValue.isPresent()) { + return; + } + T value = nullableValue.get(); + if (value instanceof Date) { + metadata.set(property, (Date) value); + } else if (value instanceof String) { + metadata.set(property, (String) value); + } else if (value instanceof Integer) { + metadata.set(property, (Integer) value); + } else if (value instanceof Double) { + metadata.set(property, (Double) value); } } - private <T> void addProperty(Metadata metadata, Property property, Nullable<T> nullableValue) { - T value = nullableValue.getValue(); - if (value != null) { - if (value instanceof String) { - metadata.add(property, (String) value); - } else { - throw new IllegalArgumentException("Can't add property of class: "+nullableValue.getClass()); - } + private <T> void addProperty(Metadata metadata, Property property, Optional<T> nullableValue) { + if (!nullableValue.isPresent()) { + return; + } + T value = nullableValue.get(); + if (value instanceof String) { + metadata.add(property, (String) value); + } else { + throw new IllegalArgumentException("Can't add property of class: " + nullableValue.getClass()); } } - private void setProperty(Metadata metadata, String name, Nullable<?> value) { - if (value.getValue() != null) { - setProperty(metadata, name, value.getValue().toString()); + + private void setProperty(Metadata metadata, String property, Optional<String> nullableValue) { + if (!nullableValue.isPresent()) { + return; } + String value = nullableValue.get(); + metadata.set(property, value); } private void setProperty(Metadata metadata, Property property, String value) { @@ -284,11 +315,11 @@ public class MetadataExtractor { } } - private void addMultiProperty(Metadata metadata, Property property, Nullable<String> value) { - if (value == null) { + private void addMultiProperty(Metadata metadata, Property property, Optional<String> value) { + if (!value.isPresent()) { return; } - SummaryExtractor.addMulti(metadata, property, value.getValue()); + SummaryExtractor.addMulti(metadata, property, value.get()); } }
