This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4410 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 6ad53f44b2e738e00cc28fd61291f80be650525c Author: tallison <[email protected]> AuthorDate: Thu May 1 14:35:01 2025 -0400 TIKA-4395 -- improve handling logging of container detection --- .../main/java/org/apache/tika/metadata/Office.java | 18 +++++++ .../apache/tika/metadata/TikaCoreProperties.java | 7 ++- .../metadata/writefilter/StandardWriteFilter.java | 2 +- .../parser/microsoft/AbstractPOIFSExtractor.java | 8 +-- .../microsoft/ooxml/CommentPersonHandler.java | 47 ++++++++++++++++ .../parser/microsoft/ooxml/OPCPackageWrapper.java | 1 + .../ooxml/XSSFBExcelExtractorDecorator.java | 3 +- .../ooxml/XSSFExcelExtractorDecorator.java | 63 ++++++++++++++++++++-- .../microsoft/POIContainerExtractionTest.java | 7 +-- .../parser/microsoft/ooxml/OOXMLParserTest.java | 8 +-- 10 files changed, 142 insertions(+), 22 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Office.java b/tika-core/src/main/java/org/apache/tika/metadata/Office.java index aa4b9f002..9d5442b67 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/Office.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/Office.java @@ -152,4 +152,22 @@ public interface Office { Property PROG_ID = Property.internalText("msoffice:progID"); Property OCX_NAME = Property.internalText("msoffice:ocxName"); + + Property EMBEDDED_STORAGE_CLASS_ID = Property.internalText("msoffice:embeddedStorageClassId"); + + Property HAS_HIDDEN_SHEETS = Property.internalBoolean("msoffice:excel:has-hidden-sheets"); + + Property HAS_VERY_HIDDEN_SHEETS = Property.internalBoolean("msoffice:excel:has-very-hidden-sheets"); + + Property HIDDEN_SHEET_NAMES = Property.internalTextBag("msoffice:excel:hidden-sheet-names"); + Property VERY_HIDDEN_SHEET_NAMES = Property.internalTextBag("msoffice:excel:very-hidden-sheet-names"); + + Property PROTECTED_WORKSHEET = Property.internalBoolean("msoffice:excel:protected-worksheet"); + + Property WORKBOOK_CODENAME = Property.internalText("msoffice:excel:workbook-codename"); + + Property HAS_COMMENTS = Property.internalBoolean("msoffice:has-comments"); + + Property COMMENT_PERSONS = Property.internalTextBag("msoffice:comment-person-display-name"); + } diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java index 3d7d34d4e..7e36624c5 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java @@ -175,10 +175,9 @@ public interface TikaCoreProperties { Property TIKA_DETECTED_LANGUAGE_CONFIDENCE_RAW = Property.externalRealSeq(TIKA_META_PREFIX + "detected_language_confidence_raw"); - String RESOURCE_NAME_KEY = "resourceName"; - String PROTECTED = "protected"; - String EMBEDDED_RELATIONSHIP_ID = "embeddedRelationshipId"; - String EMBEDDED_STORAGE_CLASS_ID = "embeddedStorageClassId"; + Property RESOURCE_NAME_KEY = Property.internalText(TIKA_META_PREFIX + "resourceName"); + Property EMBEDDED_RELATIONSHIP_ID = Property.internalText(TIKA_META_PREFIX + "embeddedRelationshipId"); + String EMBEDDED_RESOURCE_TYPE_KEY = "embeddedResourceType"; /** * Some file formats can store information about their original diff --git a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java index a245e8d2c..38763d079 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java @@ -81,7 +81,7 @@ public class StandardWriteFilter implements MetadataWriteFilter, Serializable { ALWAYS_SET_FIELDS.add(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE.getName()); ALWAYS_SET_FIELDS.add(TikaCoreProperties.CONTENT_TYPE_HINT.getName()); ALWAYS_SET_FIELDS.add(TikaCoreProperties.TIKA_CONTENT.getName()); - ALWAYS_SET_FIELDS.add(TikaCoreProperties.RESOURCE_NAME_KEY); + ALWAYS_SET_FIELDS.add(TikaCoreProperties.RESOURCE_NAME_KEY.getName()); ALWAYS_SET_FIELDS.add(AccessPermissions.EXTRACT_CONTENT.getName()); ALWAYS_SET_FIELDS.add(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY.getName()); ALWAYS_SET_FIELDS.add(Metadata.CONTENT_DISPOSITION); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java index 8910b1c00..39d5a0f0a 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java @@ -126,7 +126,7 @@ abstract class AbstractPOIFSExtractor { embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, relationshipID); } if (storageClassID != null) { - embeddedMetadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID, + embeddedMetadata.set(Office.EMBEDDED_STORAGE_CLASS_ID, storageClassID.toString()); } if (mediaType != null) { @@ -200,7 +200,7 @@ abstract class AbstractPOIFSExtractor { // What kind of document is it? metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, dir.getName()); if (dir.getStorageClsid() != null) { - metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID, + metadata.set(Office.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString()); } POIFSDocumentType type = POIFSDocumentType.detectType(dir); @@ -383,7 +383,7 @@ abstract class AbstractPOIFSExtractor { return; } if (dir.getStorageClsid() != null) { - metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID, + metadata.set(Office.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString()); } embeddedDocumentUtil.parseEmbedded(tis, xhtml, metadata, outputHtml); @@ -398,7 +398,7 @@ abstract class AbstractPOIFSExtractor { try (TikaInputStream tis = TikaInputStream.get(new byte[0])) { tis.setOpenContainer(dir); if (dir.getStorageClsid() != null) { - metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID, + metadata.set(Office.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString()); } embeddedDocumentUtil.parseEmbedded(tis, xhtml, metadata, outputHtml); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/CommentPersonHandler.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/CommentPersonHandler.java new file mode 100644 index 000000000..c7efda1ae --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/CommentPersonHandler.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.microsoft.ooxml; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Office; +import org.apache.tika.utils.StringUtils; +import org.apache.tika.utils.XMLReaderUtils; + +public class CommentPersonHandler extends DefaultHandler { + + private final Metadata metadata; + + CommentPersonHandler(Metadata metadata) { + this.metadata = metadata; + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { + //what else do we want? + //<person displayName="Wiley Coyote" id="{11111111-2234-2342-2342-23498237923}" userId="55bbdf23486284" providerId="Windows Live"/> + if ("person".equals(localName)) { + String displayName = XMLReaderUtils.getAttrValue("displayName", atts); + if (!StringUtils.isBlank(displayName)) { + metadata.add(Office.COMMENT_PERSONS, displayName); + } + } + } +} diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java index 2cfd24f92..34834a416 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java @@ -29,6 +29,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage; */ public class OPCPackageWrapper implements Closeable { + public static final String PERSON_RELATION = "http://schemas.microsoft.com/office/2017/10/relationships/person"; private final OPCPackage opcPackage; public OPCPackageWrapper(OPCPackage opcPackage) { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java index 77000b9a9..51a30cdc9 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java @@ -39,6 +39,7 @@ import org.xml.sax.SAXException; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Office; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.XHTMLContentHandler; @@ -64,7 +65,7 @@ public class XSSFBExcelExtractorDecorator extends XSSFExcelExtractorDecorator { this.metadata = metadata; this.parseContext = context; - metadata.set(TikaCoreProperties.PROTECTED, "false"); + metadata.set(Office.PROTECTED_WORKSHEET, false); super.getXHTML(handler, metadata, context); } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java index da5357937..adc6aefbe 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java @@ -35,6 +35,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.TargetMode; @@ -68,11 +69,13 @@ import org.xml.sax.helpers.DefaultHandler; import org.apache.tika.exception.RuntimeSAXException; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Office; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.parser.microsoft.TikaExcelDataFormatter; import org.apache.tika.sax.XHTMLContentHandler; +import org.apache.tika.utils.StringUtils; import org.apache.tika.utils.XMLReaderUtils; public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { @@ -122,7 +125,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { this.metadata = metadata; this.parseContext = context; - metadata.set(TikaCoreProperties.PROTECTED, "false"); + metadata.set(Office.PROTECTED_WORKSHEET, "false"); super.getXHTML(handler, metadata, context); } @@ -148,7 +151,6 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { } catch (OpenXML4JException e) { throw new XmlException(e); } - while (iter.hasNext()) { SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(config, xhtml); PackagePart sheetPart = null; @@ -159,6 +161,9 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { sheetParts.add(sheetPart); Comments comments = iter.getSheetComments(); + if (comments != null && comments.getNumberOfComments() > 0) { + metadata.set(Office.HAS_COMMENTS, true); + } // Start, and output the sheet name xhtml.startElement("div"); @@ -201,13 +206,45 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { //consider adding this back to POI try (InputStream wbData = xssfReader.getWorkbookData()) { XMLReaderUtils - .parseSAX(wbData, new AbsPathExtractorHandler(), + .parseSAX(wbData, new WorkbookMetadataHandler(), parseContext); } catch (InvalidFormatException | TikaException e) { //swallow } + try { + getPersons(container, metadata); + } catch (InvalidFormatException | TikaException | IOException | SAXException e) { + //swallow + } } + private void getPersons(OPCPackage container, Metadata metadata) throws TikaException, InvalidFormatException, + IOException, SAXException { + PackageRelationship coreDocRelationship = container.getRelationshipsByType( + PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0); + if (coreDocRelationship == null) { + return; + } + // Get the part that holds the workbook + PackagePart workbookPart = container.getPart(coreDocRelationship); + if (workbookPart == null) { + return; + } + PackageRelationshipCollection coll = workbookPart.getRelationshipsByType(OPCPackageWrapper.PERSON_RELATION); + if (coll == null) { + return; + } + for (PackageRelationship rel : coll) { + PackagePart personsPart = workbookPart.getRelatedPart(rel); + if (personsPart == null) { + continue; + } + try (InputStream is = personsPart.getInputStream()) { + XMLReaderUtils.parseSAX(is, new CommentPersonHandler(metadata), parseContext); + } + } + + } protected void addDrawingHyperLinks(PackagePart sheetPart) { try { @@ -355,7 +392,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { sheetInputStream.close(); if (handler.hasProtection) { - metadata.set(TikaCoreProperties.PROTECTED, "true"); + metadata.set(Office.PROTECTED_WORKSHEET, true); } } catch (TikaException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); @@ -590,7 +627,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { } } - private class AbsPathExtractorHandler extends DefaultHandler { + private class WorkbookMetadataHandler extends DefaultHandler { @Override public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { @@ -604,7 +641,23 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { return; } } + } else if ("sheet".equals(localName)) { + String n = XMLReaderUtils.getAttrValue("name", atts); + String state = XMLReaderUtils.getAttrValue("state", atts); + if ("hidden".equals(state)) { + metadata.set(Office.HAS_HIDDEN_SHEETS, true); + metadata.add(Office.HIDDEN_SHEET_NAMES, n); + } else if ("veryHidden".equals(state)) { + metadata.set(Office.HAS_VERY_HIDDEN_SHEETS, true); + metadata.set(Office.VERY_HIDDEN_SHEET_NAMES, n); + } + } else if ("workbookPr".equals(localName)) { + String codeName = XMLReaderUtils.getAttrValue("codeName", atts); + if (!StringUtils.isBlank(codeName)) { + metadata.set(Office.WORKBOOK_CODENAME, codeName); + } } + // file version? <fileVersion appName="xl" lastEdited="7" lowestEdited="7" rupBuild="28526"/> } } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java index ba8fb1485..b0080ee76 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java @@ -31,6 +31,7 @@ import org.apache.tika.detect.microsoft.POIFSContainerDetector; import org.apache.tika.extractor.ContainerExtractor; import org.apache.tika.extractor.ParserContainerExtractor; import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Office; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; @@ -136,13 +137,13 @@ public class POIContainerExtractionTest extends AbstractPOIContainerExtractionTe List<Metadata> list = getRecursiveMetadata("testWORD_embeded.doc"); //.docx assertEquals("{F4754C9B-64F5-4B40-8AF4-679732AC0607}", - list.get(10).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID)); + list.get(10).get(Office.EMBEDDED_STORAGE_CLASS_ID)); //_1345471035.ppt assertEquals("{64818D10-4F9B-11CF-86EA-00AA00B929E8}", - list.get(14).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID)); + list.get(14).get(Office.EMBEDDED_STORAGE_CLASS_ID)); //_1345470949.xls assertEquals("{00020820-0000-0000-C000-000000000046}", - list.get(16).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID)); + list.get(16).get(Office.EMBEDDED_STORAGE_CLASS_ID)); } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java index 3a74cabeb..b9e80869f 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java @@ -116,7 +116,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest { assertNotContained("9.0", content); assertContains("196", content); assertNotContained("196.0", content); - assertEquals("false", metadata.get(TikaCoreProperties.PROTECTED)); + assertEquals("false", metadata.get(Office.PROTECTED_WORKSHEET)); } @@ -206,7 +206,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest { assertNotContained("10.0", content); assertContains("cb=sum", content); assertNotContained("13.0", content); - assertEquals("false", metadata.get(TikaCoreProperties.PROTECTED)); + assertEquals("false", metadata.get(Office.PROTECTED_WORKSHEET)); } @@ -482,7 +482,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest { assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get(Metadata.CONTENT_TYPE)); - assertEquals("true", metadata.get(TikaCoreProperties.PROTECTED)); + assertEquals("true", metadata.get(Office.PROTECTED_WORKSHEET)); } @@ -497,7 +497,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest { assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", xmlResult.metadata.get(Metadata.CONTENT_TYPE)); - assertEquals("true", xmlResult.metadata.get(TikaCoreProperties.PROTECTED)); + assertEquals("true", xmlResult.metadata.get(Office.PROTECTED_WORKSHEET)); assertContains("Office", xmlResult.xml); }
