This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 945378aa79 improve hssf parsing (#2874)
945378aa79 is described below

commit 945378aa790fff06b6d86db700f62ba14bea4cc7
Author: Tim Allison <[email protected]>
AuthorDate: Fri Jun 5 12:00:22 2026 -0400

    improve hssf parsing (#2874)
---
 .../tika/parser/microsoft/ExcelExtractor.java      | 38 +++++++++++++---------
 .../tika/parser/microsoft/ExcelParserTest.java     | 30 +++++++++++++++++
 2 files changed, 52 insertions(+), 16 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
index 869f88b1c0..7a2a7ab90f 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
@@ -198,24 +198,30 @@ public class ExcelExtractor extends 
AbstractPOIFSExtractor {
             }
         }
 
-        // If a password was supplied, use it, otherwise the default
-        Biff8EncryptionKey.setCurrentUserPassword(getPassword());
-
-        // Have the file processed in event mode
-        TikaHSSFListener listener =
-                new TikaHSSFListener(workbookEntryName, xhtml, locale, this, 
officeParserConfig);
-        listener.processFile(root, isListenForAllRecords());
-        listener.throwStoredException();
-        updateMetadata(listener);
-
-        for (Entry entry : root) {
-            if (entry.getName().startsWith("MBD") && entry instanceof 
DirectoryEntry) {
-                try {
-                    handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml, 
true);
-                } catch (TikaException e) {
-                    // ignore parse errors from embedded documents
+        // Use the supplied password, otherwise the default. POI keeps it in a 
ThreadLocal;
+        // save/restore so it doesn't carry over to the next parse on this 
thread.
+        String previousPassword = Biff8EncryptionKey.getCurrentUserPassword();
+        try {
+            Biff8EncryptionKey.setCurrentUserPassword(getPassword());
+
+            // Have the file processed in event mode
+            TikaHSSFListener listener =
+                    new TikaHSSFListener(workbookEntryName, xhtml, locale, 
this, officeParserConfig);
+            listener.processFile(root, isListenForAllRecords());
+            listener.throwStoredException();
+            updateMetadata(listener);
+
+            for (Entry entry : root) {
+                if (entry.getName().startsWith("MBD") && entry instanceof 
DirectoryEntry) {
+                    try {
+                        handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml, 
true);
+                    } catch (TikaException e) {
+                        // ignore parse errors from embedded documents
+                    }
                 }
             }
+        } finally {
+            Biff8EncryptionKey.setCurrentUserPassword(previousPassword);
         }
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
index 55975455b2..4824a7f65c 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
@@ -25,7 +25,9 @@ import static org.junit.jupiter.api.Assertions.fail;
 import java.text.DecimalFormatSymbols;
 import java.util.List;
 import java.util.Locale;
+import java.util.UUID;
 
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
 import org.apache.poi.util.LocaleUtil;
 import org.junit.jupiter.api.Test;
 import org.xml.sax.ContentHandler;
@@ -210,6 +212,34 @@ public class ExcelParserTest extends TikaTest {
         }
     }
 
+    @Test
+    public void testPasswordThreadLocalRestored() throws Exception {
+        String sentinel = "sentinel-" + UUID.randomUUID();
+        String original = Biff8EncryptionKey.getCurrentUserPassword();
+        try {
+            Biff8EncryptionKey.setCurrentUserPassword(sentinel);
+
+            try (TikaInputStream tis = getResourceAsStream(
+                    "/test-documents/testEXCEL_protected_passtika.xls")) {
+                ParseContext context = new ParseContext();
+                context.set(PasswordProvider.class, metadata -> "tika");
+                new OfficeParser().parse(tis, new BodyContentHandler(), new 
Metadata(), context);
+            }
+            assertEquals(sentinel, 
Biff8EncryptionKey.getCurrentUserPassword());
+
+            try (TikaInputStream tis = getResourceAsStream(
+                    "/test-documents/testEXCEL_protected_passtika.xls")) {
+                new OfficeParser().parse(tis, new BodyContentHandler(), new 
Metadata(),
+                        new ParseContext());
+                fail("Document is encrypted, shouldn't parse");
+            } catch (EncryptedDocumentException e) {
+                assertEquals(sentinel, 
Biff8EncryptionKey.getCurrentUserPassword());
+            }
+        } finally {
+            Biff8EncryptionKey.setCurrentUserPassword(original);
+        }
+    }
+
     /**
      * TIKA-214 - Ensure we extract labels etc from Charts
      */

Reply via email to