This is an automated email from the ASF dual-hosted git repository.
nick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new a0ffec1 Handle .epub files using .htm rather than .html extensions
for the embedded contents (TIKA-1288)
a0ffec1 is described below
commit a0ffec146e84fdcf4c747b4375f92ae283944f4c
Author: Nick Burch <[email protected]>
AuthorDate: Wed May 9 10:23:09 2018 +0100
Handle .epub files using .htm rather than .html extensions for the embedded
contents (TIKA-1288)
---
CHANGES.txt | 3 +++
tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java | 3 ++-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 194fef8..c66e883 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -73,6 +73,9 @@ Release 2.0.0 - ???
* Support for SAS7BDAT data files (TIKA-2462)
+ * Handle .epub files using .htm rather than .html extensions for the
+ embedded contents (TIKA-1288)
+
Release 1.17 - 12/8/2017
***NOTE: THIS IS THE LAST VERSION OF TIKA THAT WILL RUN
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
b/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
index c4f72de..775b319 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
@@ -105,7 +105,8 @@ public class EpubParser extends AbstractParser {
meta.parse(zip, new DefaultHandler(), metadata, context);
} else if (entry.getName().endsWith(".opf")) {
meta.parse(zip, new DefaultHandler(), metadata, context);
- } else if (entry.getName().endsWith(".html") ||
+ } else if (entry.getName().endsWith(".htm") ||
+ entry.getName().endsWith(".html") ||
entry.getName().endsWith(".xhtml")) {
content.parse(zip, childHandler, metadata, context);
}
--
To stop receiving notification emails like this one, please contact
[email protected].