Author: ab Date: Wed Apr 26 03:54:53 2006 New Revision: 397169 URL: http://svn.apache.org/viewcvs?rev=397169&view=rev Log: Don't allow CrawlDatum.getMetaData() to return null. Underlying MapWritable is lazily instantiated to minimize the number of created objects.
Refactor CrawlDbReducer to use this assumption. Add missing statements in CrawlDatum.equals() and CrawlDatum.hashCode() that deal with metaData. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=397169&r1=397168&r2=397169&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Wed Apr 26 03:54:53 2006 @@ -121,11 +121,11 @@ public void setMetaData(MapWritable mapWritable) {this.metaData = mapWritable; } /** - * returns a MapWritable if it was set or read @see readFields(DataInput), - * returns null in case CrawlDatum was freshly generated or an empty map - * in case CrawlDatum is a recycled instance. + * returns a MapWritable if it was set or read in @see readFields(DataInput), + * returns empty map in case CrawlDatum was freshly created (lazily instantiated). */ public MapWritable getMetaData() { + if (this.metaData == null) this.metaData = new MapWritable(); return this.metaData; } @@ -291,6 +291,7 @@ buf.append("Retry interval: " + getFetchInterval() + " days\n"); buf.append("Score: " + getScore() + "\n"); buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n"); + buf.append("Metadata: " + (metaData != null ? metaData.toString() : "null") + "\n"); return buf.toString(); } @@ -298,7 +299,7 @@ if (!(o instanceof CrawlDatum)) return false; CrawlDatum other = (CrawlDatum)o; - return + boolean res = (this.status == other.status) && (this.fetchTime == other.fetchTime) && (this.modifiedTime == other.modifiedTime) && @@ -306,6 +307,19 @@ (this.fetchInterval == other.fetchInterval) && (SignatureComparator._compare(this.signature, other.signature) == 0) && (this.score == other.score); + if (!res) return res; + // allow zero-sized metadata to be equal to null metadata + if (this.metaData == null) { + if (other.metaData != null && other.metaData.size() > 0) return false; + else return true; + } else { + if (other.metaData == null) { + if (this.metaData.size() == 0) return true; + else return false; + } else { + return this.metaData.equals(other.metaData); + } + } } public int hashCode() { @@ -316,6 +330,7 @@ signature[i+2] << 8 + signature[i+3]); } } + if (metaData != null) res ^= metaData.hashCode(); return res ^ status ^ ((int)fetchTime) ^ Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=397169&r1=397168&r2=397169&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Wed Apr 26 03:54:53 2006 @@ -67,11 +67,10 @@ result.set(highest); if (old != null) { // copy metadata from old, if exists - if (old.getMetaData() != null) { - if (result.getMetaData() == null) result.setMetaData(new MapWritable()); + if (old.getMetaData().size() > 0) { result.getMetaData().putAll(old.getMetaData()); // overlay with new, if any - if (highest.getMetaData() != null) + if (highest.getMetaData().size() > 0) result.getMetaData().putAll(highest.getMetaData()); } // set the most recent valid value of modifiedTime