Author: jukka
Date: Wed Dec 16 00:59:09 2009
New Revision: 891091
URL: http://svn.apache.org/viewvc?rev=891091&view=rev
Log:
TIKA-352: Use MediaType.parse when extracting charset from content-type
metadata in parsers
Even if MediaType.parse() can now handle a null argument, it's better style to
avoid relying on such an undocumented feature
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=891091&r1=891090&r2=891091&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
Wed Dec 16 00:59:09 2009
@@ -88,9 +88,10 @@
// hint, or the passed content-type hint.
CharsetDetector detector = new CharsetDetector();
String incomingCharset = metadata.get(Metadata.CONTENT_ENCODING);
- if (incomingCharset == null) {
+ String incomingType = metadata.get(Metadata.CONTENT_TYPE);
+ if (incomingCharset == null && incomingType != null) {
// TIKA-341: Use charset in content-type
- MediaType mt =
MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
+ MediaType mt = MediaType.parse(incomingType);
if (mt != null) {
String charset = mt.getParameters().get("charset");
if ((charset != null) && Charset.isSupported(charset)) {