Author: kkrugler
Date: Thu Aug 9 21:53:45 2012
New Revision: 1371504
URL: http://svn.apache.org/viewvc?rev=1371504&view=rev
Log:
TIKA-869: IdentityHtmlMapper.mapSafeElement() needs to return lower-cased
incoming name
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java?rev=1371504&r1=1371503&r2=1371504&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
Thu Aug 9 21:53:45 2012
@@ -37,7 +37,7 @@ public class IdentityHtmlMapper implemen
}
public String mapSafeElement(String name) {
- return name;
+ return name.toLowerCase(Locale.ENGLISH);
}
}
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=1371504&r1=1371503&r2=1371504&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Thu Aug 9 21:53:45 2012
@@ -738,4 +738,28 @@ public class HtmlParserTest extends Test
assertNotNull(content);
}
+ /**
+ * Test case for TIKA-869
+ * IdentityHtmlMapper needs to lower-case tag names.
+ *
+ * @see <a
href="https://issues.apache.org/jira/browse/TIKA-869">TIKA-869</a>
+ */
+ public void testIdentityMapper() throws Exception {
+ final String html = "<html><head><title>Title</title></head>" +
+ "<body></body></html>";
+ Metadata metadata = new Metadata();
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
+
+ StringWriter sw = new StringWriter();
+
+ new HtmlParser().parse (
+ new ByteArrayInputStream(html.getBytes("UTF-8")),
+ makeHtmlTransformer(sw), metadata, parseContext);
+
+ String result = sw.toString();
+ // Make sure we don't get <body><BODY/></body>
+ assertTrue(Pattern.matches("(?s).*<body/>.*$", result));
+ }
+
}