This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 31c1a08ad TIKA-4410 (#2226) -- improve feature extraction from xlsx
31c1a08ad is described below

commit 31c1a08ad1d08fdc088dc5cbb28f18363414543e
Author: Tim Allison <[email protected]>
AuthorDate: Fri May 30 09:41:52 2025 -0400

    TIKA-4410 (#2226) -- improve feature extraction from xlsx
---
 .../main/java/org/apache/tika/metadata/Office.java | 18 +++++++
 .../apache/tika/metadata/TikaCoreProperties.java   |  7 ++-
 .../metadata/writefilter/StandardWriteFilter.java  |  2 +-
 .../org/apache/tika/pipes/PipesClientTest.java     |  5 +-
 .../parser/microsoft/AbstractPOIFSExtractor.java   |  8 +--
 .../microsoft/ooxml/CommentPersonHandler.java      | 47 ++++++++++++++++
 .../parser/microsoft/ooxml/OPCPackageWrapper.java  |  1 +
 .../ooxml/XSSFBExcelExtractorDecorator.java        |  3 +-
 .../ooxml/XSSFExcelExtractorDecorator.java         | 62 ++++++++++++++++++++--
 .../microsoft/POIContainerExtractionTest.java      |  7 +--
 .../parser/microsoft/ooxml/OOXMLParserTest.java    | 12 +++--
 .../parser/microsoft/ooxml/TruncatedOOXMLTest.java |  3 +-
 12 files changed, 149 insertions(+), 26 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Office.java 
b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
index aa4b9f002..9d5442b67 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Office.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
@@ -152,4 +152,22 @@ public interface Office {
     Property PROG_ID = Property.internalText("msoffice:progID");
 
     Property OCX_NAME = Property.internalText("msoffice:ocxName");
+
+    Property EMBEDDED_STORAGE_CLASS_ID = 
Property.internalText("msoffice:embeddedStorageClassId");
+
+    Property HAS_HIDDEN_SHEETS = 
Property.internalBoolean("msoffice:excel:has-hidden-sheets");
+
+    Property HAS_VERY_HIDDEN_SHEETS = 
Property.internalBoolean("msoffice:excel:has-very-hidden-sheets");
+
+    Property HIDDEN_SHEET_NAMES = 
Property.internalTextBag("msoffice:excel:hidden-sheet-names");
+    Property VERY_HIDDEN_SHEET_NAMES = 
Property.internalTextBag("msoffice:excel:very-hidden-sheet-names");
+
+    Property PROTECTED_WORKSHEET = 
Property.internalBoolean("msoffice:excel:protected-worksheet");
+
+    Property WORKBOOK_CODENAME = 
Property.internalText("msoffice:excel:workbook-codename");
+
+    Property HAS_COMMENTS = Property.internalBoolean("msoffice:has-comments");
+
+    Property COMMENT_PERSONS = 
Property.internalTextBag("msoffice:comment-person-display-name");
+
 }
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java 
b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index 3d7d34d4e..7e36624c5 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -175,10 +175,9 @@ public interface TikaCoreProperties {
     Property TIKA_DETECTED_LANGUAGE_CONFIDENCE_RAW = 
Property.externalRealSeq(TIKA_META_PREFIX +
             "detected_language_confidence_raw");
 
-    String RESOURCE_NAME_KEY = "resourceName";
-    String PROTECTED = "protected";
-    String EMBEDDED_RELATIONSHIP_ID = "embeddedRelationshipId";
-    String EMBEDDED_STORAGE_CLASS_ID = "embeddedStorageClassId";
+    Property RESOURCE_NAME_KEY = Property.internalText(TIKA_META_PREFIX + 
"resourceName");
+    Property EMBEDDED_RELATIONSHIP_ID = Property.internalText(TIKA_META_PREFIX 
+ "embeddedRelationshipId");
+
     String EMBEDDED_RESOURCE_TYPE_KEY = "embeddedResourceType";
     /**
      * Some file formats can store information about their original
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
 
b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
index a245e8d2c..38763d079 100644
--- 
a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
+++ 
b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
@@ -81,7 +81,7 @@ public class StandardWriteFilter implements 
MetadataWriteFilter, Serializable {
         
ALWAYS_SET_FIELDS.add(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE.getName());
         ALWAYS_SET_FIELDS.add(TikaCoreProperties.CONTENT_TYPE_HINT.getName());
         ALWAYS_SET_FIELDS.add(TikaCoreProperties.TIKA_CONTENT.getName());
-        ALWAYS_SET_FIELDS.add(TikaCoreProperties.RESOURCE_NAME_KEY);
+        ALWAYS_SET_FIELDS.add(TikaCoreProperties.RESOURCE_NAME_KEY.getName());
         ALWAYS_SET_FIELDS.add(AccessPermissions.EXTRACT_CONTENT.getName());
         
ALWAYS_SET_FIELDS.add(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY.getName());
         ALWAYS_SET_FIELDS.add(Metadata.CONTENT_DISPOSITION);
diff --git a/tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java 
b/tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java
index 35d52fc4a..2a3f71138 100644
--- a/tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java
+++ b/tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java
@@ -29,6 +29,7 @@ import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.filter.CompositeMetadataFilter;
 import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.metadata.filter.MockUpperCaseFilter;
@@ -63,7 +64,7 @@ public class PipesClientTest {
         Assertions.assertNotNull(pipesResult.getEmitData().getMetadataList());
         Assertions.assertEquals(1, 
pipesResult.getEmitData().getMetadataList().size());
         Metadata metadata = pipesResult.getEmitData().getMetadataList().get(0);
-        Assertions.assertEquals("testOverlappingText.pdf", 
metadata.get("resourceName"));
+        Assertions.assertEquals("testOverlappingText.pdf", 
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
     }
 
     @Test
@@ -77,7 +78,7 @@ public class PipesClientTest {
         Assertions.assertNotNull(pipesResult.getEmitData().getMetadataList());
         Assertions.assertEquals(1, 
pipesResult.getEmitData().getMetadataList().size());
         Metadata metadata = pipesResult.getEmitData().getMetadataList().get(0);
-        Assertions.assertEquals("TESTOVERLAPPINGTEXT.PDF", 
metadata.get("resourceName"));
+        Assertions.assertEquals("TESTOVERLAPPINGTEXT.PDF", 
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
     }
 
     @Test
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
index 8910b1c00..39d5a0f0a 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
@@ -126,7 +126,7 @@ abstract class AbstractPOIFSExtractor {
                 
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, 
relationshipID);
             }
             if (storageClassID != null) {
-                
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID,
+                embeddedMetadata.set(Office.EMBEDDED_STORAGE_CLASS_ID,
                         storageClassID.toString());
             }
             if (mediaType != null) {
@@ -200,7 +200,7 @@ abstract class AbstractPOIFSExtractor {
         // What kind of document is it?
         metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, 
dir.getName());
         if (dir.getStorageClsid() != null) {
-            metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID,
+            metadata.set(Office.EMBEDDED_STORAGE_CLASS_ID,
                     dir.getStorageClsid().toString());
         }
         POIFSDocumentType type = POIFSDocumentType.detectType(dir);
@@ -383,7 +383,7 @@ abstract class AbstractPOIFSExtractor {
             return;
         }
         if (dir.getStorageClsid() != null) {
-            metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID,
+            metadata.set(Office.EMBEDDED_STORAGE_CLASS_ID,
                     dir.getStorageClsid().toString());
         }
         embeddedDocumentUtil.parseEmbedded(tis, xhtml, metadata, outputHtml);
@@ -398,7 +398,7 @@ abstract class AbstractPOIFSExtractor {
         try (TikaInputStream tis = TikaInputStream.get(new byte[0])) {
             tis.setOpenContainer(dir);
             if (dir.getStorageClsid() != null) {
-                metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID,
+                metadata.set(Office.EMBEDDED_STORAGE_CLASS_ID,
                         dir.getStorageClsid().toString());
             }
             embeddedDocumentUtil.parseEmbedded(tis, xhtml, metadata, 
outputHtml);
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/CommentPersonHandler.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/CommentPersonHandler.java
new file mode 100644
index 000000000..c7efda1ae
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/CommentPersonHandler.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.utils.StringUtils;
+import org.apache.tika.utils.XMLReaderUtils;
+
+public class CommentPersonHandler extends DefaultHandler {
+
+    private final Metadata metadata;
+
+    CommentPersonHandler(Metadata metadata) {
+        this.metadata = metadata;
+    }
+
+    @Override
+    public void startElement(String uri, String localName, String qName, 
Attributes atts) throws SAXException {
+        //what else do we want?
+        //<person displayName="Wiley Coyote" 
id="{11111111-2234-2342-2342-23498237923}" userId="55bbdf23486284" 
providerId="Windows Live"/>
+        if ("person".equals(localName)) {
+            String displayName = XMLReaderUtils.getAttrValue("displayName", 
atts);
+            if (!StringUtils.isBlank(displayName)) {
+                metadata.add(Office.COMMENT_PERSONS, displayName);
+            }
+        }
+    }
+}
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java
index 2cfd24f92..34834a416 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java
@@ -29,6 +29,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
  */
 public class OPCPackageWrapper implements Closeable {
 
+    public static final String PERSON_RELATION = 
"http://schemas.microsoft.com/office/2017/10/relationships/person";;
     private final OPCPackage opcPackage;
 
     public OPCPackageWrapper(OPCPackage opcPackage) {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java
index 77000b9a9..51a30cdc9 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java
@@ -39,6 +39,7 @@ import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -64,7 +65,7 @@ public class XSSFBExcelExtractorDecorator extends 
XSSFExcelExtractorDecorator {
 
         this.metadata = metadata;
         this.parseContext = context;
-        metadata.set(TikaCoreProperties.PROTECTED, "false");
+        metadata.set(Office.PROTECTED_WORKSHEET, false);
 
         super.getXHTML(handler, metadata, context);
     }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
index da5357937..97c629b6a 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
@@ -35,6 +35,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackagePartName;
 import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.openxml4j.opc.TargetMode;
@@ -68,11 +69,13 @@ import org.xml.sax.helpers.DefaultHandler;
 import org.apache.tika.exception.RuntimeSAXException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.microsoft.OfficeParserConfig;
 import org.apache.tika.parser.microsoft.TikaExcelDataFormatter;
 import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.tika.utils.StringUtils;
 import org.apache.tika.utils.XMLReaderUtils;
 
 public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
@@ -122,7 +125,7 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
 
         this.metadata = metadata;
         this.parseContext = context;
-        metadata.set(TikaCoreProperties.PROTECTED, "false");
+        metadata.set(Office.PROTECTED_WORKSHEET, "false");
 
         super.getXHTML(handler, metadata, context);
     }
@@ -148,7 +151,6 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
         } catch (OpenXML4JException e) {
             throw new XmlException(e);
         }
-
         while (iter.hasNext()) {
             SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(config, 
xhtml);
             PackagePart sheetPart = null;
@@ -159,6 +161,9 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
                 sheetParts.add(sheetPart);
 
                 Comments comments = iter.getSheetComments();
+                if (comments != null && comments.getNumberOfComments() > 0) {
+                    metadata.set(Office.HAS_COMMENTS, true);
+                }
 
                 // Start, and output the sheet name
                 xhtml.startElement("div");
@@ -201,13 +206,44 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
         //consider adding this back to POI
         try (InputStream wbData = xssfReader.getWorkbookData()) {
             XMLReaderUtils
-                    .parseSAX(wbData, new AbsPathExtractorHandler(),
+                    .parseSAX(wbData, new WorkbookMetadataHandler(),
                             parseContext);
         } catch (InvalidFormatException | TikaException e) {
             //swallow
         }
+        try {
+            getPersons(container, metadata);
+        } catch (InvalidFormatException | TikaException | IOException | 
SAXException e) {
+            //swallow
+        }
     }
 
+    private void getPersons(OPCPackage container, Metadata metadata) throws 
TikaException, InvalidFormatException,
+            IOException, SAXException {
+        PackageRelationship coreDocRelationship = 
container.getRelationshipsByType(
+                PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
+        if (coreDocRelationship == null) {
+            return;
+        }
+        // Get the part that holds the workbook
+        PackagePart workbookPart = container.getPart(coreDocRelationship);
+        if (workbookPart == null) {
+            return;
+        }
+        PackageRelationshipCollection coll = 
workbookPart.getRelationshipsByType(OPCPackageWrapper.PERSON_RELATION);
+        if (coll == null) {
+            return;
+        }
+        for (PackageRelationship rel : coll) {
+            PackagePart personsPart = workbookPart.getRelatedPart(rel);
+            if (personsPart == null) {
+                continue;
+            }
+            try (InputStream is = personsPart.getInputStream()) {
+                XMLReaderUtils.parseSAX(is, new 
CommentPersonHandler(metadata), parseContext);
+            }
+        }
+    }
 
     protected void addDrawingHyperLinks(PackagePart sheetPart) {
         try {
@@ -355,7 +391,7 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
             sheetInputStream.close();
 
             if (handler.hasProtection) {
-                metadata.set(TikaCoreProperties.PROTECTED, "true");
+                metadata.set(Office.PROTECTED_WORKSHEET, true);
             }
         } catch (TikaException e) {
             throw new RuntimeException("SAX parser appears to be broken - " + 
e.getMessage());
@@ -590,7 +626,7 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
         }
     }
 
-    private class AbsPathExtractorHandler extends DefaultHandler {
+    private class WorkbookMetadataHandler extends DefaultHandler {
         @Override
         public void startElement(String uri, String localName, String qName, 
Attributes atts)
                 throws SAXException {
@@ -604,7 +640,23 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
                         return;
                     }
                 }
+            } else if ("sheet".equals(localName)) {
+                String n = XMLReaderUtils.getAttrValue("name", atts);
+                String state = XMLReaderUtils.getAttrValue("state", atts);
+                if ("hidden".equals(state)) {
+                    metadata.set(Office.HAS_HIDDEN_SHEETS, true);
+                    metadata.add(Office.HIDDEN_SHEET_NAMES, n);
+                } else if ("veryHidden".equals(state)) {
+                    metadata.set(Office.HAS_VERY_HIDDEN_SHEETS, true);
+                    metadata.set(Office.VERY_HIDDEN_SHEET_NAMES, n);
+                }
+            } else if ("workbookPr".equals(localName)) {
+                String codeName = XMLReaderUtils.getAttrValue("codeName", 
atts);
+                if (!StringUtils.isBlank(codeName)) {
+                    metadata.set(Office.WORKBOOK_CODENAME, codeName);
+                }
             }
+            // file version? <fileVersion appName="xl" lastEdited="7" 
lowestEdited="7" rupBuild="28526"/>
         }
     }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
index ba8fb1485..b0080ee76 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
@@ -31,6 +31,7 @@ import 
org.apache.tika.detect.microsoft.POIFSContainerDetector;
 import org.apache.tika.extractor.ContainerExtractor;
 import org.apache.tika.extractor.ParserContainerExtractor;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 
@@ -136,13 +137,13 @@ public class POIContainerExtractionTest extends 
AbstractPOIContainerExtractionTe
         List<Metadata> list = getRecursiveMetadata("testWORD_embeded.doc");
         //.docx
         assertEquals("{F4754C9B-64F5-4B40-8AF4-679732AC0607}",
-                
list.get(10).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
+                list.get(10).get(Office.EMBEDDED_STORAGE_CLASS_ID));
         //_1345471035.ppt
         assertEquals("{64818D10-4F9B-11CF-86EA-00AA00B929E8}",
-                
list.get(14).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
+                list.get(14).get(Office.EMBEDDED_STORAGE_CLASS_ID));
         //_1345470949.xls
         assertEquals("{00020820-0000-0000-C000-000000000046}",
-                
list.get(16).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
+                list.get(16).get(Office.EMBEDDED_STORAGE_CLASS_ID));
 
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 3a74cabeb..9559e73c2 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -116,7 +116,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest {
         assertNotContained("9.0", content);
         assertContains("196", content);
         assertNotContained("196.0", content);
-        assertEquals("false", metadata.get(TikaCoreProperties.PROTECTED));
+        assertEquals("false", metadata.get(Office.PROTECTED_WORKSHEET));
 
     }
 
@@ -206,7 +206,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest {
         assertNotContained("10.0", content);
         assertContains("cb=sum", content);
         assertNotContained("13.0", content);
-        assertEquals("false", metadata.get(TikaCoreProperties.PROTECTED));
+        assertEquals("false", metadata.get(Office.PROTECTED_WORKSHEET));
 
     }
 
@@ -482,7 +482,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest {
         
assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                 metadata.get(Metadata.CONTENT_TYPE));
 
-        assertEquals("true", metadata.get(TikaCoreProperties.PROTECTED));
+        assertEquals("true", metadata.get(Office.PROTECTED_WORKSHEET));
 
     }
 
@@ -497,7 +497,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest {
         
assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                 xmlResult.metadata.get(Metadata.CONTENT_TYPE));
 
-        assertEquals("true", 
xmlResult.metadata.get(TikaCoreProperties.PROTECTED));
+        assertEquals("true", 
xmlResult.metadata.get(Office.PROTECTED_WORKSHEET));
 
         assertContains("Office", xmlResult.xml);
     }
@@ -1393,11 +1393,13 @@ public class OOXMLParserTest extends 
MultiThreadedTikaTest {
     public void testMacroinXlsm() throws Exception {
 
         //test default is "don't extract macros"
-        for (Metadata metadata : getRecursiveMetadata("testEXCEL_macro.xlsm")) 
{
+        List<Metadata> metadataList = 
getRecursiveMetadata("testEXCEL_macro.xlsm");
+        for (Metadata metadata : metadataList) {
             if (metadata.get(Metadata.CONTENT_TYPE).equals("text/x-vbasic")) {
                 fail("Shouldn't have extracted macros as default");
             }
         }
+        assertEquals("ThisWorkbook", 
metadataList.get(0).get(Office.WORKBOOK_CODENAME));
 
         //now test that they were extracted
         ParseContext context = new ParseContext();
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/TruncatedOOXMLTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/TruncatedOOXMLTest.java
index 9046f4951..640ea0104 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/TruncatedOOXMLTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/microsoft/ooxml/TruncatedOOXMLTest.java
@@ -28,6 +28,7 @@ import org.apache.tika.TikaTest;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 
 public class TruncatedOOXMLTest extends TikaTest {
@@ -53,7 +54,7 @@ public class TruncatedOOXMLTest extends TikaTest {
         metadataList.forEach(m -> {
             System.out.println("depth: " + m.get("X-TIKA:embedded_depth"));
             System.out.println("relid: " + m.get("embeddedRelationshipId"));
-            System.out.println("res:   " + m.get("resourceName"));
+            System.out.println("res:   " + 
m.get(TikaCoreProperties.RESOURCE_NAME_KEY));
             System.out.println("cont:  " + m.get("X-TIKA:content"));
         });
 

Reply via email to