Author: lewismc
Date: Tue Jun 25 20:19:58 2013
New Revision: 1496628
URL: http://svn.apache.org/r1496628
Log:
NUTCH-1571 SolrInputSplit doesn't implement Writable and crawl script doesn't
pass crawlId to generate and updatedb tasks
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/bin/crawl
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1496628&r1=1496627&r2=1496628&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Tue Jun 25 20:19:58 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1571 SolrInputSplit doesn't implement Writable and crawl script
doesn't pass crawlId to generate and updatedb tasks (yuanyun.cn via lewismc)
+
* NUTCH-1126 JUnit test for urlfilter-prefix (Talat UYARER via markus)
* NUTCH-1585 Ensure duplicate tags do not exist in microformat-reltag tag set
(lewismc)
Modified: nutch/branches/2.x/src/bin/crawl
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/bin/crawl?rev=1496628&r1=1496627&r2=1496628&view=diff
==============================================================================
--- nutch/branches/2.x/src/bin/crawl (original)
+++ nutch/branches/2.x/src/bin/crawl Tue Jun 25 20:19:58 2013
@@ -140,8 +140,8 @@ do
fi
# updatedb with this batch
- echo "CrawlDB update"
- $bin/nutch updatedb $commonOptions
+ echo "CrawlDB update for $CRAWL_ID"
+ $bin/nutch updatedb $commonOptions -crawlId $CRAWL_ID
if [ $? -ne 0 ]
then exit $?
Modified:
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java?rev=1496628&r1=1496627&r2=1496628&view=diff
==============================================================================
---
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
(original)
+++
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
Tue Jun 25 20:19:58 2013
@@ -139,7 +139,7 @@ implements Tool {
}
}
- public static class SolrInputSplit extends InputSplit {
+ public static class SolrInputSplit extends InputSplit implements Writable {
private int docBegin;
private int numDocs;
@@ -164,6 +164,18 @@ implements Tool {
public String[] getLocations() throws IOException {
return new String[] {} ;
}
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ docBegin = in.readInt();
+ numDocs = in.readInt();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(docBegin);
+ out.writeInt(numDocs);
+ }
}
public static class SolrRecordReader extends RecordReader<Text, SolrRecord> {