This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-4055
in repository https://gitbox.apache.org/repos/asf/tika.git
commit a2267afc66d30a425a8ad6482adeb22dd4b91897
Author: tballison
AuthorDate: Fri May 26 12:09:52 2023 -0400
TIKA-4002 -- add mime type detection for pcapng
---
CHANGES.txt| 6 +++
.../apache/tika/parser/RecursiveParserWrapper.java | 2 +
.../tika/parser/RecursiveParserWrapperTest.java| 61 +++---
.../src/test/resources/log4j.properties| 2 +-
.../core/resource/RecursiveMetadataResource.java | 4 +-
.../standard/RecursiveMetadataResourceTest.java| 14 ++---
6 files changed, 73 insertions(+), 16 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index b3ac0be3b..5526b5f86 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,9 @@
+Release 2.8.1 - ???
+
+ * Fixed write limit bug in RecursiveParserWrapper (TIKA-4055).
+
+ * Add mime detection for many files (TIKA-3992).
+
Release 2.8.0 - 5/11/2023
* Enable counting and/or parsing of incremental updates in PDFs. This
diff --git
a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 483181b0a..e8f029770 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -372,6 +372,7 @@ public class RecursiveParserWrapper extends ParserDecorator
{
}
int availableLength = Math.min(totalWriteLimit - totalChars,
length);
super.characters(ch, start, availableLength);
+totalChars += availableLength;
if (availableLength < length) {
handleWriteLimitReached();
}
@@ -389,6 +390,7 @@ public class RecursiveParserWrapper extends ParserDecorator
{
}
int availableLength = Math.min(totalWriteLimit - totalChars,
length);
super.ignorableWhitespace(ch, start, availableLength);
+totalChars += availableLength;
if (availableLength < length) {
handleWriteLimitReached();
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 24800926a..61eeab14d 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -93,14 +93,15 @@ public class RecursiveParserWrapperTest extends TikaTest {
RecursiveParserWrapper wrapper = new
RecursiveParserWrapper(AUTO_DETECT_PARSER);
RecursiveParserWrapperHandler handler = new
RecursiveParserWrapperHandler(
-new
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, 70));
+new
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT,
+70));
try (InputStream stream =
getResourceAsStream("/test-documents/test_recursive_embedded.docx")) {
wrapper.parse(stream, handler, metadata, context);
}
List list = handler.getMetadataList();
-assertEquals(5, list.size());
+assertEquals(2, list.size());
int wlr = 0;
for (Metadata m : list) {
@@ -112,15 +113,31 @@ public class RecursiveParserWrapperTest extends TikaTest {
assertEquals(2, wlr);
}
+@Test
+public void testOne() throws Exception {
+ParseContext context = new ParseContext();
+Metadata metadata = new Metadata();
+int writeLimit = 100;
+RecursiveParserWrapper wrapper = new
RecursiveParserWrapper(AUTO_DETECT_PARSER);
+RecursiveParserWrapperHandler handler = new
RecursiveParserWrapperHandler(
+new
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT,
+writeLimit, false, context));
+try (InputStream stream = getResourceAsStream(
+"/test-documents/test_recursive_embedded" + ".docx")) {
+wrapper.parse(stream, handler, metadata, context);
+}
+List list = handler.getMetadataList();
+assertEquals(12, list.size());
+}
@Test
public void testCharLimitNoThrowOnWriteLimit() throws Exception {
ParseContext context = new ParseContext();
Metadata metadata = new Metadata();
-
+int writeLimit = 500;
RecursiveParserWrapper wrapper = new