Author: jukka
Date: Fri Oct  2 10:43:17 2009
New Revision: 820956

URL: http://svn.apache.org/viewvc?rev=820956&view=rev
Log:
TIKA-290: org.apache.tika.exception.TikaException: Unexpected RuntimeException 
from org.apache.tika.parser.txt.txtpar...@6caf16

Only use a detected encoding when it is supported by the Java runtime.

Modified:
    
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java

Modified: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=820956&r1=820955&r2=820956&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
 Fri Oct  2 10:43:17 2009
@@ -23,6 +23,7 @@
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 import java.util.Collections;
 import java.util.Map;
 
@@ -73,15 +74,20 @@
 
         // Detect the content encoding (the stream is reset to the beginning)
         // TODO: Better use of the possible encoding hint in input metadata
-        CharsetMatch match = new CharsetDetector().setText(stream).detect();
-        if (match != null) {
-            metadata.set(Metadata.CONTENT_ENCODING, match.getName());
-
-            // Is the encoding language-specific (KOI8-R, SJIS, etc.)?
-            String language = match.getLanguage();
-            if (language != null) {
-                metadata.set(Metadata.CONTENT_LANGUAGE, match.getLanguage());
-                metadata.set(Metadata.LANGUAGE, match.getLanguage());
+        CharsetDetector detector = new CharsetDetector();
+        detector.setText(stream);
+        for (CharsetMatch match : detector.detectAll()) {
+            if (Charset.isSupported(match.getName())) {
+                metadata.set(Metadata.CONTENT_ENCODING, match.getName());
+
+                // Is the encoding language-specific (KOI8-R, SJIS, etc.)?
+                String language = match.getLanguage();
+                if (language != null) {
+                    metadata.set(Metadata.CONTENT_LANGUAGE, 
match.getLanguage());
+                    metadata.set(Metadata.LANGUAGE, match.getLanguage());
+                }
+
+                break;
             }
         }
 


Reply via email to