Author: siren Date: Tue Mar 10 07:07:22 2009 New Revision: 752000 URL: http://svn.apache.org/viewvc?rev=752000&view=rev Log: NUTCH-715 - Subcollection plugin doesn't work with default subcollections.xml file. Contributed by Dmitry Lihachev
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java lucene/nutch/trunk/src/plugin/build.xml lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=752000&r1=751999&r2=752000&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Tue Mar 10 07:07:22 2009 @@ -378,6 +378,9 @@ 142. NUTCH-684 - Dedup support for Solr. (dogacan) +143. NUTCH-715 - Subcollection plugin doesn't work with default + subcollections.xml file (Dmitry Lihachev via siren) + Release 0.9 - 2007-04-02 1. Changed log4j confiquration to log to stdout on commandline Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java?rev=752000&r1=751999&r2=752000&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java Tue Mar 10 07:07:22 2009 @@ -60,7 +60,11 @@ input = new InputSource(is); input.setEncoding("UTF-8"); parser.parse(input); - element = (Element) parser.getDocument().getChildNodes().item(0); + int i = 0; + while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) { + i++; + } + element = (Element)parser.getDocument().getChildNodes().item(i); } catch (FileNotFoundException e) { e.printStackTrace(LogUtil.getWarnStream(LOG)); } catch (SAXException e) { Modified: lucene/nutch/trunk/src/plugin/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/build.xml?rev=752000&r1=751999&r2=752000&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/build.xml (original) +++ lucene/nutch/trunk/src/plugin/build.xml Tue Mar 10 07:07:22 2009 @@ -112,6 +112,7 @@ <ant dir="parse-swf" target="test"/> <ant dir="parse-zip" target="test"/> <ant dir="query-url" target="test"/> + <ant dir="subcollection" target="test"/> <ant dir="urlfilter-automaton" target="test"/> <ant dir="urlfilter-domain" target="test" /> <ant dir="urlfilter-regex" target="test"/> Modified: lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java?rev=752000&r1=751999&r2=752000&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java (original) +++ lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java Tue Mar 10 07:07:22 2009 @@ -49,6 +49,7 @@ public void testInput(){ StringBuffer xml=new StringBuffer(); xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); + xml.append("<!-- just a comment -->"); xml.append("<subcollections>"); xml.append("<subcollection>"); xml.append("<name>nutch collection</name>");