Author: ab
Date: Tue Jul 5 01:56:01 2005
New Revision: 209246
URL: http://svn.apache.org/viewcvs?rev=209246&view=rev
Log:
Active this as Parser plugin (it was accidentally omitted).
Accept also empty content type, if the extension is right.
Modified:
lucene/nutch/trunk/src/plugin/parse-js/plugin.xml
lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
Modified: lucene/nutch/trunk/src/plugin/parse-js/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-js/plugin.xml?rev=209246&r1=209245&r2=209246&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-js/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-js/plugin.xml Tue Jul 5 01:56:01 2005
@@ -6,6 +6,10 @@
provider-name="nutch.org">
<extension-point
+ id="org.apache.nutch.parse.Parser"
+ name="Nutch Content Parser"/>
+
+ <extension-point
id="org.apache.nutch.parse.HtmlParseFilter"
name="HTML Parse Filter"/>
@@ -15,6 +19,14 @@
</library>
</runtime>
+ <extension id="org.apache.nutch.parse.js"
+ name="JS Parser"
+ point="org.apache.nutch.parse.Parser">
+ <implementation id="JSParser"
+ class="org.apache.nutch.parse.js.JSParseFilter"
+ contentType="application/x-javascript"
+ pathSuffix="js"/>
+ </extension>
<extension id="org.apache.nutch.parse.js.JSParseFilter"
name="Parse JS Filter"
point="org.apache.nutch.parse.HtmlParseFilter">
Modified:
lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java?rev=209246&r1=209245&r2=209246&view=diff
==============================================================================
---
lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
(original)
+++
lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
Tue Jul 5 01:56:01 2005
@@ -120,9 +120,9 @@
public Parse getParse(Content c) {
String type = c.getContentType();
- if (type != null &&
!type.toLowerCase().startsWith("application/x-javascript"))
+ if (type != null && !type.trim().equals("") &&
!type.toLowerCase().startsWith("application/x-javascript"))
return new ParseStatus(ParseStatus.FAILED_INVALID_FORMAT,
- "Content not JavaScript: " + type).getEmptyParse();
+ "Content not JavaScript: '" + type + "'").getEmptyParse();
String script = new String(c.getContent());
Outlink[] outlinks = getJSLinks(script, c.getUrl(), c.getUrl());
if (outlinks == null) outlinks = new Outlink[0];