jukka
Tue, 15 Dec 2009 16:59:33 -0800
Author: jukka Date: Wed Dec 16 00:59:09 2009 New Revision: 891091 URL: http://svn.apache.org/viewvc?rev=891091&view=rev Log: TIKA-352: Use MediaType.parse when extracting charset from content-type metadata in parsers
Even if MediaType.parse() can now handle a null argument, it's better style to
avoid relying on such an undocumented feature
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=891091&r1=891090&r2=891091&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
Wed Dec 16 00:59:09 2009
@@ -88,9 +88,10 @@
// hint, or the passed content-type hint.
CharsetDetector detector = new CharsetDetector();
String incomingCharset = metadata.get(Metadata.CONTENT_ENCODING);
- if (incomingCharset == null) {
+ String incomingType = metadata.get(Metadata.CONTENT_TYPE);
+ if (incomingCharset == null && incomingType != null) {
// TIKA-341: Use charset in content-type
- MediaType mt =
MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
+ MediaType mt = MediaType.parse(incomingType);
if (mt != null) {
String charset = mt.getParameters().get("charset");
if ((charset != null) && Charset.isSupported(charset)) {