Author: jerome Date: Tue Jan 10 15:52:31 2006 New Revision: 367837 URL: http://svn.apache.org/viewcvs?rev=367837&view=rev Log: HTMLMetaProcessor now retrieves name, http-equiv and content attributes whatever their case
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java?rev=367837&r1=367836&r2=367837&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java (original) +++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java Tue Jan 10 15:52:31 2006 @@ -59,10 +59,22 @@ if ("meta".equalsIgnoreCase(node.getNodeName())) { NamedNodeMap attrs = node.getAttributes(); - Node nameNode = attrs.getNamedItem("name"); - Node equivNode = attrs.getNamedItem("http-equiv"); - Node contentNode = attrs.getNamedItem("content"); - + Node nameNode = null; + Node equivNode = null; + Node contentNode = null; + // Retrieves name, http-equiv and content attribues + for (int i=0; i<attrs.getLength(); i++) { + Node attr = attrs.item(i); + String attrName = attr.getNodeName().toLowerCase(); + if (attrName.equals("name")) { + nameNode = attr; + } else if (attrName.equals("http-equiv")) { + equivNode = attr; + } else if (attrName.equals("content")) { + contentNode = attr; + } + } + if (nameNode != null) { if (contentNode != null) { String name = nameNode.getNodeValue().toLowerCase(); ------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Do you grep through log files for problems? Stop! Download the new AJAX search engine that makes searching your log files as easy as surfing the web. DOWNLOAD SPLUNK! http://ads.osdn.com/?ad_id=7637&alloc_id=16865&op=click _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs