Author: jukka
Date: Fri Oct 2 10:43:17 2009
New Revision: 820956
URL: http://svn.apache.org/viewvc?rev=820956&view=rev
Log:
TIKA-290: org.apache.tika.exception.TikaException: Unexpected RuntimeException
from org.apache.tika.parser.txt.txtpar...@6caf16
Only use a detected encoding when it is supported by the Java runtime.
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=820956&r1=820955&r2=820956&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
Fri Oct 2 10:43:17 2009
@@ -23,6 +23,7 @@
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Map;
@@ -73,15 +74,20 @@
// Detect the content encoding (the stream is reset to the beginning)
// TODO: Better use of the possible encoding hint in input metadata
- CharsetMatch match = new CharsetDetector().setText(stream).detect();
- if (match != null) {
- metadata.set(Metadata.CONTENT_ENCODING, match.getName());
-
- // Is the encoding language-specific (KOI8-R, SJIS, etc.)?
- String language = match.getLanguage();
- if (language != null) {
- metadata.set(Metadata.CONTENT_LANGUAGE, match.getLanguage());
- metadata.set(Metadata.LANGUAGE, match.getLanguage());
+ CharsetDetector detector = new CharsetDetector();
+ detector.setText(stream);
+ for (CharsetMatch match : detector.detectAll()) {
+ if (Charset.isSupported(match.getName())) {
+ metadata.set(Metadata.CONTENT_ENCODING, match.getName());
+
+ // Is the encoding language-specific (KOI8-R, SJIS, etc.)?
+ String language = match.getLanguage();
+ if (language != null) {
+ metadata.set(Metadata.CONTENT_LANGUAGE,
match.getLanguage());
+ metadata.set(Metadata.LANGUAGE, match.getLanguage());
+ }
+
+ break;
}
}