This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4244
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 6772787f97471181158e90319ee7d2a682fd6365
Author: tallison <talli...@apache.org>
AuthorDate: Thu Apr 25 11:38:34 2024 -0400

    TIKA-4244 -- improve ics detection
---
 .../main/resources/org/apache/tika/mime/tika-mimetypes.xml  |  2 +-
 .../src/test/java/org/apache/tika/mime/TestMimeTypes.java   |  4 ++++
 .../resources/test-documents/testICalendar_w_prodId.ics     | 13 +++++++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index a1e9de0fd..09bbd963c 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -7319,7 +7319,7 @@
   <mime-type type="text/calendar">
     <magic priority="50">
       <match value="BEGIN:VCALENDAR" type="string" offset="0">
-        <match value="VERSION:2.0" type="string" offset="15:30"/>
+        <match value="\nVERSION:2.0" type="string" offset="15:360"/>
       </match>
     </magic>
     <glob pattern="*.ics"/>
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index cd6705b69..a988c440e 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -1235,6 +1235,10 @@ public class TestMimeTypes {
         assertType("text/x-vcalendar", "testVCalendar.vcs");
         assertTypeByData("text/calendar", "testICalendar.ics");
         assertTypeByData("text/x-vcalendar", "testVCalendar.vcs");
+        //TIKA-4244
+        //this tests detection with content intervening between the 
BEGIN:VCALENDAR and the VERSION:2.0 entry
+        assertType("text/calendar", "testICalendar_w_prodId.ics");
+        assertTypeByData("text/calendar", "testICalendar_w_prodId.ics");
     }
 
     @Test
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testICalendar_w_prodId.ics
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testICalendar_w_prodId.ics
new file mode 100644
index 000000000..0af25fc46
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testICalendar_w_prodId.ics
@@ -0,0 +1,13 @@
+BEGIN:VCALENDAR
+PRODID:-//Example Corp//iCalendar Export//EN
+VERSION:2.0
+BEGIN:VEVENT
+UID:1234567...@example.com
+DTSTAMP:20240101T080000Z
+DTSTART:20240101T100000Z
+DTEND:20240101T120000Z
+SUMMARY:Sample HTML Event
+DESCRIPTION:This is a sample event with an HTML description.
+X-ALT-DESC;FMTTYPE=text/html:<html><body><h1>Sample HTML Event</h1><p>This is 
a sample event with an <strong>HTML</strong> description.</p></body></html>
+END:VEVENT
+END:VCALENDAR

Reply via email to