This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new a0ffec1  Handle .epub files using .htm rather than .html extensions 
for the embedded contents (TIKA-1288)
a0ffec1 is described below

commit a0ffec146e84fdcf4c747b4375f92ae283944f4c
Author: Nick Burch <[email protected]>
AuthorDate: Wed May 9 10:23:09 2018 +0100

    Handle .epub files using .htm rather than .html extensions for the embedded 
contents (TIKA-1288)
---
 CHANGES.txt                                                            | 3 +++
 tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 194fef8..c66e883 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -73,6 +73,9 @@ Release 2.0.0 - ???
 
    * Support for SAS7BDAT data files (TIKA-2462)
 
+   * Handle .epub files using .htm rather than .html extensions for the
+     embedded contents (TIKA-1288)
+
 Release 1.17 - 12/8/2017
 
   ***NOTE: THIS IS THE LAST VERSION OF TIKA THAT WILL RUN
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
index c4f72de..775b319 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
@@ -105,7 +105,8 @@ public class EpubParser extends AbstractParser {
                 meta.parse(zip, new DefaultHandler(), metadata, context);
             } else if (entry.getName().endsWith(".opf")) {
                 meta.parse(zip, new DefaultHandler(), metadata, context);
-            } else if (entry.getName().endsWith(".html") || 
+            } else if (entry.getName().endsWith(".htm") || 
+                           entry.getName().endsWith(".html") || 
                           entry.getName().endsWith(".xhtml")) {
                 content.parse(zip, childHandler, metadata, context);
             }

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to