Author: snagel
Date: Thu Oct 11 20:20:14 2012
New Revision: 1397281
URL: http://svn.apache.org/viewvc?rev=1397281&view=rev
Log:
NUTCH-1252 SegmentReader -get shows wrong data
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1397281&r1=1397280&r2=1397281&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Oct 11 20:20:14 2012
@@ -2,7 +2,9 @@ Nutch Change Log
(trunk) Current Development:
-* NUTCH-1344 BasicURLNormalizer to normalize https same as http
+* NUTCH-1252 SegmentReader -get shows wrong data (snagel)
+
+* NUTCH-1344 BasicURLNormalizer to normalize https same as http (snagel)
* NUTCH-706 Url regex normalizer: pattern for session id removal not to match
"newsId" (Meghna Kukreja via snagel)
Modified: nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=1397281&r1=1397280&r2=1397281&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Thu Oct 11
20:20:14 2012
@@ -386,8 +386,15 @@ public class SegmentReader extends Confi
Writable value = (Writable)valueClass.newInstance();
// we don't know the partitioning schema
for (int i = 0; i < readers.length; i++) {
- if (readers[i].get(key, value) != null)
+ if (readers[i].get(key, value) != null) {
res.add(value);
+ value = (Writable)valueClass.newInstance();
+ Text aKey = (Text) keyClass.newInstance();
+ while (readers[i].next(aKey, value) && aKey.equals(key)) {
+ res.add(value);
+ value = (Writable)valueClass.newInstance();
+ }
+ }
readers[i].close();
}
return res;
@@ -404,8 +411,10 @@ public class SegmentReader extends Confi
Writable value = (Writable)valueClass.newInstance();
for (int i = 0; i < readers.length; i++) {
while (readers[i].next(aKey, value)) {
- if (aKey.equals(key))
+ if (aKey.equals(key)) {
res.add(value);
+ value = (Writable)valueClass.newInstance();
+ }
}
readers[i].close();
}