Author: jerome
Date: Tue Jan 10 15:52:31 2006
New Revision: 367837
URL: http://svn.apache.org/viewcvs?rev=367837&view=rev
Log:
HTMLMetaProcessor now retrieves name, http-equiv and content attributes
whatever their case
Modified:
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
Modified:
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java?rev=367837&r1=367836&r2=367837&view=diff
==============================================================================
---
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
(original)
+++
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
Tue Jan 10 15:52:31 2006
@@ -59,10 +59,22 @@
if ("meta".equalsIgnoreCase(node.getNodeName())) {
NamedNodeMap attrs = node.getAttributes();
- Node nameNode = attrs.getNamedItem("name");
- Node equivNode = attrs.getNamedItem("http-equiv");
- Node contentNode = attrs.getNamedItem("content");
-
+ Node nameNode = null;
+ Node equivNode = null;
+ Node contentNode = null;
+ // Retrieves name, http-equiv and content attribues
+ for (int i=0; i<attrs.getLength(); i++) {
+ Node attr = attrs.item(i);
+ String attrName = attr.getNodeName().toLowerCase();
+ if (attrName.equals("name")) {
+ nameNode = attr;
+ } else if (attrName.equals("http-equiv")) {
+ equivNode = attr;
+ } else if (attrName.equals("content")) {
+ contentNode = attr;
+ }
+ }
+
if (nameNode != null) {
if (contentNode != null) {
String name = nameNode.getNodeValue().toLowerCase();