[
https://issues.apache.org/jira/browse/TIKA-695?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Etienne Jouvin updated TIKA-695:
--------------------------------
Description:
Parser on office Xfiles do not get custom properties.
In class MetadataExtractor, method extract, only core and extended properties
are retrieve.
I added something like this:
extractMetadata(extractor.getCustomProperties(), metadata);
{quote}
/**
* Add this method to read custom properties on document.
*
* @param properties All custom properties.
* @param metadata Metadata to complete with read properties.
*/
private void extractMetadata(CustomProperties properties, Metadata
metadata) {
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
propsHolder = properties.getUnderlyingProperties();
String value = null;
DateUtils dateUtils = DateUtils.getInstance();
BigDecimal bigDecimal;
for (CTProperty property : propsHolder.getPropertyList()) {
/* Parse each property */
if (property.isSetLpwstr()) {
value = property.getLpwstr();
} else if (property.isSetFiletime()) {
value =
dateUtils.convertDate(property.getFiletime(), null);
} else if (property.isSetDate()) {
value =
dateUtils.convertDate(property.getDate(), null);
} else if (property.isSetDecimal()) {
bigDecimal = property.getDecimal();
value = null == bigDecimal ? null :
bigDecimal.toString();
} else if (property.isSetBool()) {
value =
BooleanUtils.toStringTrueFalse(property.getBool());
} else if (property.isSetInt()) {
value = Integer.toString(property.getInt());
} else if (property.isSetLpstr()) {
value = property.getLpstr();
} else if (property.isSetI4()) {
/* Number in Excel for example.... Why i4 ? Ask
microsoft. */
value = Integer.toString(property.getI4());
} else {
/* For other type, do nothing. */
continue;
}
/* Add the custom prefix, as done in old office format.
*/
addProperty(metadata, "custom:" + property.getName(),
value);
}
}
{quote}
was:
Parser on office Xfiles do not get custom properties.
In class MetadataExtractor, method extract, only core and extended properties
are retrieve.
I added something like this:
extractMetadata(extractor.getCustomProperties(), metadata);
/**
* Add this method to read custom properties on document.
*
* @param properties All custom properties.
* @param metadata Metadata to complete with read properties.
*/
private void extractMetadata(CustomProperties properties, Metadata
metadata) {
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
propsHolder = properties.getUnderlyingProperties();
String value = null;
DateUtils dateUtils = DateUtils.getInstance();
BigDecimal bigDecimal;
for (CTProperty property : propsHolder.getPropertyList()) {
/* Parse each property */
if (property.isSetLpwstr()) {
value = property.getLpwstr();
} else if (property.isSetFiletime()) {
value =
dateUtils.convertDate(property.getFiletime(), null);
} else if (property.isSetDate()) {
value =
dateUtils.convertDate(property.getDate(), null);
} else if (property.isSetDecimal()) {
bigDecimal = property.getDecimal();
value = null == bigDecimal ? null :
bigDecimal.toString();
} else if (property.isSetBool()) {
value =
BooleanUtils.toStringTrueFalse(property.getBool());
} else if (property.isSetInt()) {
value = Integer.toString(property.getInt());
} else if (property.isSetLpstr()) {
value = property.getLpstr();
} else if (property.isSetI4()) {
/* Number in Excel for example.... Why i4 ? Ask
microsoft. */
value = Integer.toString(property.getI4());
} else {
/* For other type, do nothing. */
continue;
}
/* Add the custom prefix, as done in old office format.
*/
addProperty(metadata, "custom:" + property.getName(),
value);
}
}
> Custom properties on xlsx, docx, pptx
> -------------------------------------
>
> Key: TIKA-695
> URL: https://issues.apache.org/jira/browse/TIKA-695
> Project: Tika
> Issue Type: Bug
> Components: parser
> Affects Versions: 1.0
> Environment: All OS
> Reporter: Etienne Jouvin
> Priority: Minor
>
> Parser on office Xfiles do not get custom properties.
> In class MetadataExtractor, method extract, only core and extended properties
> are retrieve.
> I added something like this:
> extractMetadata(extractor.getCustomProperties(), metadata);
> {quote}
> /**
> * Add this method to read custom properties on document.
> *
> * @param properties All custom properties.
> * @param metadata Metadata to complete with read properties.
> */
> private void extractMetadata(CustomProperties properties, Metadata
> metadata) {
>
> org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
> propsHolder = properties.getUnderlyingProperties();
> String value = null;
> DateUtils dateUtils = DateUtils.getInstance();
> BigDecimal bigDecimal;
> for (CTProperty property : propsHolder.getPropertyList()) {
> /* Parse each property */
> if (property.isSetLpwstr()) {
> value = property.getLpwstr();
> } else if (property.isSetFiletime()) {
> value =
> dateUtils.convertDate(property.getFiletime(), null);
> } else if (property.isSetDate()) {
> value =
> dateUtils.convertDate(property.getDate(), null);
> } else if (property.isSetDecimal()) {
> bigDecimal = property.getDecimal();
> value = null == bigDecimal ? null :
> bigDecimal.toString();
> } else if (property.isSetBool()) {
> value =
> BooleanUtils.toStringTrueFalse(property.getBool());
> } else if (property.isSetInt()) {
> value = Integer.toString(property.getInt());
> } else if (property.isSetLpstr()) {
> value = property.getLpstr();
> } else if (property.isSetI4()) {
> /* Number in Excel for example.... Why i4 ? Ask
> microsoft. */
> value = Integer.toString(property.getI4());
> } else {
> /* For other type, do nothing. */
> continue;
> }
> /* Add the custom prefix, as done in old office format.
> */
> addProperty(metadata, "custom:" + property.getName(),
> value);
> }
> }
> {quote}
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira