Author: ab
Date: Mon Jul 4 03:14:58 2005
New Revision: 209054
URL: http://svn.apache.org/viewcvs?rev=209054&view=rev
Log:
Don't add script language and stylesheet type to the output text - it
pollutes the summaries. Reported by Ilia S. Yatsenko.
Modified:
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
Modified:
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java?rev=209054&r1=209053&r2=209054&view=diff
==============================================================================
---
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
(original)
+++
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
Mon Jul 4 03:14:58 2005
@@ -102,25 +102,9 @@
boolean abortOnNestedAnchors,
int anchorDepth) {
if ("script".equalsIgnoreCase(node.getNodeName())) {
- Node n = node.getAttributes().getNamedItem("language");
- if (n != null) {
- String text = n.getNodeValue();
- sb.append(text);
- }
return false;
}
if ("style".equalsIgnoreCase(node.getNodeName())) {
- Node n = node.getAttributes().getNamedItem("rel");
- if (n != null) {
- String text = n.getNodeValue();
- sb.append(text);
- }
- n = node.getAttributes().getNamedItem("type");
- if (n != null) {
- String text = n.getNodeValue();
- if (sb.length() > 0) sb.append(", ");
- sb.append(text);
- }
return false;
}
if (abortOnNestedAnchors && "a".equalsIgnoreCase(node.getNodeName())) {