This is an automated email from the ASF dual-hosted git repository.
tballison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new fce8805dd1 improve hssf parsing
fce8805dd1 is described below
commit fce8805dd1e17a952e454234f5a7ccc941594847
Author: tallison <[email protected]>
AuthorDate: Fri Jun 5 11:31:21 2026 -0400
improve hssf parsing
---
.../tika/parser/microsoft/ExcelExtractor.java | 38 +++++++++++++---------
.../tika/parser/microsoft/ExcelParserTest.java | 30 +++++++++++++++++
2 files changed, 52 insertions(+), 16 deletions(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
index 1604b5aab9..71d49083ae 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
@@ -188,24 +188,30 @@ public class ExcelExtractor extends
AbstractPOIFSExtractor {
}
}
- // If a password was supplied, use it, otherwise the default
- Biff8EncryptionKey.setCurrentUserPassword(getPassword());
-
- // Have the file processed in event mode
- TikaHSSFListener listener =
- new TikaHSSFListener(workbookEntryName, xhtml, locale, this,
officeParserConfig);
- listener.processFile(root, isListenForAllRecords());
- listener.throwStoredException();
- updateMetadata(listener);
-
- for (Entry entry : root) {
- if (entry.getName().startsWith("MBD") && entry instanceof
DirectoryEntry) {
- try {
- handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml,
true);
- } catch (TikaException e) {
- // ignore parse errors from embedded documents
+ // Use the supplied password, otherwise the default. POI keeps it in a
ThreadLocal;
+ // save/restore so it doesn't carry over to the next parse on this
thread.
+ String previousPassword = Biff8EncryptionKey.getCurrentUserPassword();
+ try {
+ Biff8EncryptionKey.setCurrentUserPassword(getPassword());
+
+ // Have the file processed in event mode
+ TikaHSSFListener listener =
+ new TikaHSSFListener(workbookEntryName, xhtml, locale,
this, officeParserConfig);
+ listener.processFile(root, isListenForAllRecords());
+ listener.throwStoredException();
+ updateMetadata(listener);
+
+ for (Entry entry : root) {
+ if (entry.getName().startsWith("MBD") && entry instanceof
DirectoryEntry) {
+ try {
+ handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml,
true);
+ } catch (TikaException e) {
+ // ignore parse errors from embedded documents
+ }
}
}
+ } finally {
+ Biff8EncryptionKey.setCurrentUserPassword(previousPassword);
}
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
index 1cced50eeb..44a781f2e0 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
@@ -25,7 +25,9 @@ import java.io.InputStream;
import java.text.DecimalFormatSymbols;
import java.util.List;
import java.util.Locale;
+import java.util.UUID;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.util.LocaleUtil;
import org.junit.jupiter.api.Test;
import org.xml.sax.ContentHandler;
@@ -210,6 +212,34 @@ public class ExcelParserTest extends TikaTest {
}
}
+ @Test
+ public void testPasswordThreadLocalRestored() throws Exception {
+ String sentinel = "sentinel-" + UUID.randomUUID();
+ String original = Biff8EncryptionKey.getCurrentUserPassword();
+ try {
+ Biff8EncryptionKey.setCurrentUserPassword(sentinel);
+
+ try (InputStream tis = getResourceAsStream(
+ "/test-documents/testEXCEL_protected_passtika.xls")) {
+ ParseContext context = new ParseContext();
+ context.set(PasswordProvider.class, metadata -> "tika");
+ new OfficeParser().parse(tis, new BodyContentHandler(), new
Metadata(), context);
+ }
+ assertEquals(sentinel,
Biff8EncryptionKey.getCurrentUserPassword());
+
+ try (InputStream tis = getResourceAsStream(
+ "/test-documents/testEXCEL_protected_passtika.xls")) {
+ new OfficeParser().parse(tis, new BodyContentHandler(), new
Metadata(),
+ new ParseContext());
+ fail("Document is encrypted, shouldn't parse");
+ } catch (EncryptedDocumentException e) {
+ assertEquals(sentinel,
Biff8EncryptionKey.getCurrentUserPassword());
+ }
+ } finally {
+ Biff8EncryptionKey.setCurrentUserPassword(original);
+ }
+ }
+
/**
* TIKA-214 - Ensure we extract labels etc from Charts
*/