Custom properties on xlsx, docx, pptx
-------------------------------------
Key: TIKA-695
URL: https://issues.apache.org/jira/browse/TIKA-695
Project: Tika
Issue Type: Bug
Components: parser
Affects Versions: 1.0
Environment: All OS
Reporter: Etienne Jouvin
Priority: Minor
Parser on office Xfiles do not get custom properties.
In class MetadataExtractor, method extract, only core and extended properties
are retrieve.
I added something like this:
extractMetadata(extractor.getCustomProperties(), metadata);
/**
* Add this method to read custom properties on document.
*
* @param properties All custom properties.
* @param metadata Metadata to complete with read properties.
*/
private void extractMetadata(CustomProperties properties, Metadata
metadata) {
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
propsHolder = properties.getUnderlyingProperties();
String value = null;
DateUtils dateUtils = DateUtils.getInstance();
BigDecimal bigDecimal;
for (CTProperty property : propsHolder.getPropertyList()) {
/* Parse each property */
if (property.isSetLpwstr()) {
value = property.getLpwstr();
} else if (property.isSetFiletime()) {
value =
dateUtils.convertDate(property.getFiletime(), null);
} else if (property.isSetDate()) {
value =
dateUtils.convertDate(property.getDate(), null);
} else if (property.isSetDecimal()) {
bigDecimal = property.getDecimal();
value = null == bigDecimal ? null :
bigDecimal.toString();
} else if (property.isSetBool()) {
value =
BooleanUtils.toStringTrueFalse(property.getBool());
} else if (property.isSetInt()) {
value = Integer.toString(property.getInt());
} else if (property.isSetLpstr()) {
value = property.getLpstr();
} else if (property.isSetI4()) {
/* Number in Excel for example.... Why i4 ? Ask
microsoft. */
value = Integer.toString(property.getI4());
} else {
/* For other type, do nothing. */
continue;
}
/* Add the custom prefix, as done in old office format.
*/
addProperty(metadata, "custom:" + property.getName(),
value);
}
}
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira