Author: dogacan
Date: Tue Sep  8 13:15:03 2009
New Revision: 812497

URL: http://svn.apache.org/viewvc?rev=812497&view=rev
Log:
NUTCH-702 - Lazy Instanciation of Metadata in CrawlDatum. Contributed by Julien 
Nioche.


Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=812497&r1=812496&r2=812497&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Sep  8 13:15:03 2009
@@ -7,6 +7,8 @@
 
  2. NUTCH-721 - Fetcher2 Slow (Julien Nioche via dogacan)
 
+ 3. NUTCH-702 - Lazy Instanciation of Metadata in CrawlDatum (Julien Nioche 
via dogacan)
+
 Release 1.0 - 2009-03-23
 
  1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=812497&r1=812496&r2=812497&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Tue Sep  
8 13:15:03 2009
@@ -131,9 +131,7 @@
     return false;
   }
 
-  public CrawlDatum() {
-    metaData = new org.apache.hadoop.io.MapWritable();
-  }
+  public CrawlDatum() { }
 
   public CrawlDatum(int status, int fetchInterval) {
     this();
@@ -213,7 +211,7 @@
     */
    public void putAllMetaData(CrawlDatum other) {
      for (Entry<Writable, Writable> e : other.getMetaData().entrySet()) {
-       metaData.put(e.getKey(), e.getValue());
+       getMetaData().put(e.getKey(), e.getValue());
      }
    }
 
@@ -257,11 +255,14 @@
         in.readFully(signature);
       } else signature = null;
     }
-    metaData = new org.apache.hadoop.io.MapWritable();
+    
     if (version > 3) {
+      boolean hasMetadata = false;
       if (version < 7) {
         MapWritable oldMetaData = new MapWritable();
         if (in.readBoolean()) {
+          hasMetadata = true;
+          metaData = new org.apache.hadoop.io.MapWritable();
           oldMetaData.readFields(in);
         }
         for (Writable key : oldMetaData.keySet()) {
@@ -269,9 +270,12 @@
         }
       } else {
         if (in.readBoolean()) {
+          hasMetadata = true;
+          metaData = new org.apache.hadoop.io.MapWritable();
           metaData.readFields(in);
         }
       }
+      if (hasMetadata==false) metaData = null;
     }
     // translate status codes
     if (version < 5) {
@@ -301,7 +305,7 @@
       out.writeByte(signature.length);
       out.write(signature);
     }
-    if (metaData.size() > 0) {
+    if (metaData != null && metaData.size() > 0) {
       out.writeBoolean(true);
       metaData.write(out);
     } else {
@@ -318,7 +322,9 @@
     this.score = that.score;
     this.modifiedTime = that.modifiedTime;
     this.signature = that.signature;
-    this.metaData = new org.apache.hadoop.io.MapWritable(that.metaData); // 
make a deep copy
+    if (that.metaData != null) {
+      this.metaData = new org.apache.hadoop.io.MapWritable(that.metaData); // 
make a deep copy
+    }
   }
 
 
@@ -400,16 +406,25 @@
     buf.append("Score: " + getScore() + "\n");
     buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n");
     buf.append("Metadata: ");
-    for (Entry<Writable, Writable> e : metaData.entrySet()) {
-      buf.append(e.getKey());
-      buf.append(": ");
-      buf.append(e.getValue());
+    if (metaData != null) {
+      for (Entry<Writable, Writable> e : metaData.entrySet()) {
+        buf.append(e.getKey());
+        buf.append(": ");
+        buf.append(e.getValue());
+      }
     }
     buf.append('\n');
     return buf.toString();
   }
   
   private boolean metadataEquals(org.apache.hadoop.io.MapWritable 
otherMetaData) {
+    if (metaData==null || metaData.size() ==0) {
+      return otherMetaData == null || otherMetaData.size() == 0;
+    }
+    if (otherMetaData == null) {
+      // we already know that the current object is not null or empty
+      return false;
+    }
     HashSet<Entry<Writable, Writable>> set1 =
       new HashSet<Entry<Writable,Writable>>(metaData.entrySet());
     HashSet<Entry<Writable, Writable>> set2 =
@@ -441,7 +456,9 @@
                 signature[i+2] << 8 + signature[i+3]);
       }
     }
-    res ^= metaData.entrySet().hashCode();
+    if (metaData != null) {
+      res ^= metaData.entrySet().hashCode();
+    }
     return
       res ^ status ^
       ((int)fetchTime) ^


Reply via email to