[tika] 01/01: TIKA-4002 -- add mime type detection for pcapng

2023-05-26 Thread tallison
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4055
in repository https://gitbox.apache.org/repos/asf/tika.git

commit a2267afc66d30a425a8ad6482adeb22dd4b91897
Author: tballison 
AuthorDate: Fri May 26 12:09:52 2023 -0400

TIKA-4002 -- add mime type detection for pcapng
---
 CHANGES.txt|  6 +++
 .../apache/tika/parser/RecursiveParserWrapper.java |  2 +
 .../tika/parser/RecursiveParserWrapperTest.java| 61 +++---
 .../src/test/resources/log4j.properties|  2 +-
 .../core/resource/RecursiveMetadataResource.java   |  4 +-
 .../standard/RecursiveMetadataResourceTest.java| 14 ++---
 6 files changed, 73 insertions(+), 16 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index b3ac0be3b..5526b5f86 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,9 @@
+Release 2.8.1 - ???
+
+   * Fixed write limit bug in RecursiveParserWrapper (TIKA-4055).
+
+   * Add mime detection for many files (TIKA-3992).
+
 Release 2.8.0 - 5/11/2023
 
* Enable counting and/or parsing of incremental updates in PDFs.  This
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java 
b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 483181b0a..e8f029770 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -372,6 +372,7 @@ public class RecursiveParserWrapper extends ParserDecorator 
{
 }
 int availableLength = Math.min(totalWriteLimit - totalChars, 
length);
 super.characters(ch, start, availableLength);
+totalChars += availableLength;
 if (availableLength < length) {
 handleWriteLimitReached();
 }
@@ -389,6 +390,7 @@ public class RecursiveParserWrapper extends ParserDecorator 
{
 }
 int availableLength = Math.min(totalWriteLimit - totalChars, 
length);
 super.ignorableWhitespace(ch, start, availableLength);
+totalChars += availableLength;
 if (availableLength < length) {
 handleWriteLimitReached();
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 24800926a..61eeab14d 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -93,14 +93,15 @@ public class RecursiveParserWrapperTest extends TikaTest {
 
 RecursiveParserWrapper wrapper = new 
RecursiveParserWrapper(AUTO_DETECT_PARSER);
 RecursiveParserWrapperHandler handler = new 
RecursiveParserWrapperHandler(
-new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, 70));
+new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT,
+70));
 try (InputStream stream =
 
getResourceAsStream("/test-documents/test_recursive_embedded.docx")) {
 wrapper.parse(stream, handler, metadata, context);
 }
 List list = handler.getMetadataList();
 
-assertEquals(5, list.size());
+assertEquals(2, list.size());
 
 int wlr = 0;
 for (Metadata m : list) {
@@ -112,15 +113,31 @@ public class RecursiveParserWrapperTest extends TikaTest {
 assertEquals(2, wlr);
 }
 
+@Test
+public void testOne() throws Exception {
+ParseContext context = new ParseContext();
+Metadata metadata = new Metadata();
+int writeLimit = 100;
+RecursiveParserWrapper wrapper = new 
RecursiveParserWrapper(AUTO_DETECT_PARSER);
+RecursiveParserWrapperHandler handler = new 
RecursiveParserWrapperHandler(
+new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT,
+writeLimit, false, context));
+try (InputStream stream = getResourceAsStream(
+"/test-documents/test_recursive_embedded" + ".docx")) {
+wrapper.parse(stream, handler, metadata, context);
+}
+List list = handler.getMetadataList();
+assertEquals(12, list.size());
+}
 @Test
 public void testCharLimitNoThrowOnWriteLimit() throws Exception {
 ParseContext context = new ParseContext();
 Metadata metadata = new Metadata();
-
+int writeLimit = 500;
 RecursiveParserWrapper wrapper = new 

[tika] 01/01: TIKA-4002 -- add mime type detection for pcapng

2023-05-24 Thread tallison
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4002
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 57cbc6a166fffd7c1c00156e33475d2cb5e0c259
Author: tballison 
AuthorDate: Wed May 24 17:49:31 2023 -0400

TIKA-4002 -- add mime type detection for pcapng
---
 .../main/resources/org/apache/tika/mime/tika-mimetypes.xml   | 12 
 .../src/test/java/org/apache/tika/mime/OneOffMimeTest.java   |  5 +++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 84425c3e1..5b0a479fe 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -2918,6 +2918,18 @@
 
 
   
+  
+<_comment>TCPDump next gen pcap packet capture
+
https://www.ietf.org/staging/draft-tuexen-opsawg-pcapng-02.html
+
+  
+
+
+
+  
+
+
+  
 
   
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/OneOffMimeTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/OneOffMimeTest.java
index 8f1b628df..45c491639 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/OneOffMimeTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/OneOffMimeTest.java
@@ -39,8 +39,9 @@ public class OneOffMimeTest extends TikaTest {
 "cannot be added to Tika's repo.")
 @Test
 public void testOne() throws Exception {
-Path p = Paths.get("");
-String mime = "audio/x-sap";
+Path baseDir = Paths.get("");
+Path p = baseDir.resolve("");
+String mime = "application/vnd.tcpdump.pcapng";
 assertByData(mime, p);
 assertByName(mime, p);
 }