Author: jimk
Date: Mon Jan 14 00:09:11 2008
New Revision: 611734

URL: http://svn.apache.org/viewvc?rev=611734&view=rev
Log:
HADOOP-2558 org.onelab.filter.BloomFilter class uses 8X the memory it should be 
using

Modified:
    lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
    
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/BloomFilter.java
    
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/CountingBloomFilter.java
    
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/DynamicBloomFilter.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/Filter.java
    
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/RetouchedBloomFilter.java

Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=611734&r1=611733&r2=611734&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Mon Jan 14 00:09:11 2008
@@ -177,6 +177,8 @@
    HADOOP-2548 Make TableMap and TableReduce generic
                (Frederik Hedberg via Stack)
    HADOOP-2557 Shell count function (Edward Yoon via Stack)
+   HADOOP-2558 org.onelab.filter.BloomFilter class uses 8X the memory it should
+               be using
 
 Release 0.15.1
 Branch 0.15

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/BloomFilter.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/BloomFilter.java?rev=611734&r1=611733&r2=611734&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/BloomFilter.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/BloomFilter.java
 Mon Jan 14 00:09:11 2008
@@ -51,6 +51,8 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
+import java.util.BitSet;
+
 /**
  * Implements a <i>Bloom filter</i>, as defined by Bloom in 1970.
  * <p>
@@ -72,11 +74,24 @@
  * @see <a 
href="http://portal.acm.org/citation.cfm?id=362692&dl=ACM&coll=portal";>Space/Time
 Trade-Offs in Hash Coding with Allowable Errors</a>
  */
 public class BloomFilter extends Filter {
+  private static final byte[] bitvalues = new byte[] {
+    (byte)0x01,
+    (byte)0x02,
+    (byte)0x04,
+    (byte)0x08,
+    (byte)0x10,
+    (byte)0x20,
+    (byte)0x40,
+    (byte)0x80
+  };
+  
   /** The bit vector. */
-  boolean[] vector;
+  BitSet bits;
 
   /** Default constructor - use with readFields */
-  public BloomFilter() {}
+  public BloomFilter() {
+    super();
+  }
   
   /**
    * Constructor
@@ -86,7 +101,7 @@
   public BloomFilter(int vectorSize, int nbHash){
     super(vectorSize, nbHash);
 
-    vector = new boolean[this.vectorSize];
+    bits = new BitSet(this.vectorSize);
   }//end constructor
 
   /** [EMAIL PROTECTED] */
@@ -100,7 +115,7 @@
     hash.clear();
 
     for(int i = 0; i < nbHash; i++) {
-      vector[h[i]] = true;
+      bits.set(h[i]);
     }
   }//end add()
 
@@ -114,11 +129,7 @@
       throw new IllegalArgumentException("filters cannot be and-ed");
     }
 
-    BloomFilter bf = (BloomFilter)filter;
-
-    for(int i = 0; i < vectorSize; i++) {
-      this.vector[i] &= bf.vector[i];
-    }
+    this.bits.and(((BloomFilter) filter).bits);
   }//end and()
 
   /** [EMAIL PROTECTED] */
@@ -131,7 +142,7 @@
     int[] h = hash.hash(key);
     hash.clear();
     for(int i = 0; i < nbHash; i++) {
-      if(!vector[h[i]]) {
+      if(!bits.get(h[i])) {
         return false;
       }
     }
@@ -141,9 +152,7 @@
   /** [EMAIL PROTECTED] */
   @Override
   public void not(){
-    for(int i = 0; i < vectorSize; i++) {
-      vector[i] = !vector[i];
-    }
+    bits.flip(0, vectorSize - 1);
   }//end not()
 
   /** [EMAIL PROTECTED] */
@@ -155,12 +164,7 @@
         || filter.nbHash != this.nbHash) {
       throw new IllegalArgumentException("filters cannot be or-ed");
     }
-
-    BloomFilter bf = (BloomFilter)filter;
-
-    for(int i = 0; i < vectorSize; i++) {
-      this.vector[i] |= bf.vector[i];
-    }
+    bits.or(((BloomFilter) filter).bits);
   }//end or()
 
   /** [EMAIL PROTECTED] */
@@ -172,24 +176,13 @@
         || filter.nbHash != this.nbHash) {
       throw new IllegalArgumentException("filters cannot be xor-ed");
     }
-
-    BloomFilter bf = (BloomFilter)filter;
-
-    for(int i = 0; i < vectorSize; i++) {
-      this.vector[i] = (this.vector[i] && !bf.vector[i])
-      || (!this.vector[i] && bf.vector[i]);
-    }
+    bits.xor(((BloomFilter) filter).bits);
   }//and xor()
 
   /** [EMAIL PROTECTED] */
   @Override
   public String toString(){
-    StringBuilder res = new StringBuilder();
-
-    for(int i = 0; i < vectorSize; i++) {
-      res.append(vector[i] ? "1" : "0");
-    }
-    return res.toString();
+    return bits.toString();
   }//end toString()
 
   /** [EMAIL PROTECTED] */
@@ -200,56 +193,50 @@
     return bf;
   }//end clone()
 
-  /** [EMAIL PROTECTED] */
-  @Override
-  public boolean equals(Object o) {
-    return this.compareTo(o) == 0;
-  }
-  
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int hashCode() {
-    int result = super.hashCode();
-    for(int i = 0; i < vector.length; i++) {
-      result ^= Boolean.valueOf(vector[i]).hashCode();
-    }
-    return result;
-  }
-
   // Writable
 
   /** [EMAIL PROTECTED] */
   @Override
   public void write(DataOutput out) throws IOException {
     super.write(out);
-    for(int i = 0; i < vector.length; i++) {
-      out.writeBoolean(vector[i]);
+    byte[] bytes = new byte[getNBytes()];
+    for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, 
bitIndex++) {
+      if (bitIndex == 8) {
+        bitIndex = 0;
+        byteIndex++;
+      }
+      if (bitIndex == 0) {
+        bytes[byteIndex] = 0;
+      }
+      if (bits.get(i)) {
+        bytes[byteIndex] |= bitvalues[bitIndex];
+      }
     }
+    out.write(bytes);
   }
 
   /** [EMAIL PROTECTED] */
   @Override
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
-    vector = new boolean[vectorSize];
-    for(int i = 0; i < vector.length; i++) {
-      vector[i] = in.readBoolean();
+    byte[] bytes = new byte[getNBytes()];
+    in.readFully(bytes);
+    for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, 
bitIndex++) {
+      if (bitIndex == 8) {
+        bitIndex = 0;
+        byteIndex++;
+      }
+      if (bitIndex == 0) {
+        bytes[byteIndex] = 0;
+      }
+      if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
+        bits.set(i);
+      }
     }
   }
-
-  // Comparable
   
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int compareTo(Object o) {
-    int result = super.compareTo(o);
-    
-    BloomFilter other = (BloomFilter)o;
-      
-    for(int i = 0; result == 0 && i < vector.length; i++) {
-      result = (vector[i] == other.vector[i] ? 0
-          : (vector[i] ? 1 : -1));
-    }
-    return result;
-  }// end compareTo
+  /* @return number of bytes needed to hold bit vector */
+  private int getNBytes() {
+    return (vectorSize + 7) / 8;
+  }
 }//end class

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/CountingBloomFilter.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/CountingBloomFilter.java?rev=611734&r1=611733&r2=611734&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/CountingBloomFilter.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/CountingBloomFilter.java
 Mon Jan 14 00:09:11 2008
@@ -213,22 +213,6 @@
     return cbf;
   }//end clone()
 
-  /** [EMAIL PROTECTED] */
-  @Override
-  public boolean equals(Object o) {
-    return this.compareTo(o) == 0;
-  }
-  
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int hashCode() {
-    int result = super.hashCode();
-    for(int i = 0; i < vector.length; i++) {
-      result ^= Byte.valueOf(vector[i]).hashCode();
-    }
-    return result;
-  }
-
   // Writable
 
   /** [EMAIL PROTECTED] */
@@ -249,25 +233,4 @@
       vector[i] = in.readByte();
     }
   }
-
-  // Comparable
-  
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int compareTo(Object o) {
-    int result = super.compareTo(o);
-    
-    if(result == 0) {
-      CountingBloomFilter other = (CountingBloomFilter)o;
-      
-      for(int i = 0; i < vector.length; i++) {
-        result = vector[i] - other.vector[i];
-        
-        if(result != 0) {
-          break;
-        }
-      }
-    }
-    return result;
-  }// end compareTo
 }//end class

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/DynamicBloomFilter.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/DynamicBloomFilter.java?rev=611734&r1=611733&r2=611734&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/DynamicBloomFilter.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/DynamicBloomFilter.java
 Mon Jan 14 00:09:11 2008
@@ -247,22 +247,6 @@
     return dbf;
   }//end clone()
 
-  /** [EMAIL PROTECTED] */
-  @Override
-  public boolean equals(Object o) {
-    return this.compareTo(o) == 0;
-  }
-  
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int hashCode() {
-    int result = super.hashCode();
-    for(int i = 0; i < matrix.length; i++) {
-      result ^= matrix[i].hashCode();
-    }
-    return result;
-  }
-
   // Writable
 
   /** [EMAIL PROTECTED] */
@@ -283,35 +267,6 @@
       matrix[i].readFields(in);
     }
   }
-
-  // Comparable
-  
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int compareTo(Object o) {
-    int result = super.compareTo(o);
-    
-    if(result == 0) {
-      DynamicBloomFilter other = (DynamicBloomFilter)o;
-      
-      result = this.nr - other.nr;
-      
-      if(result == 0) {
-        result = this.currentNbRecord - other.currentNbRecord;
-        
-        if(result == 0) {
-          for(int i = 0; i < matrix.length; i++) {
-            result = matrix[i].compareTo(other.matrix[i]) ;
-            
-            if(result != 0) {
-              break;
-            }
-          }
-        }
-      }
-    }
-    return result;
-  }// end compareTo
 
   /**
    * Adds a new row to <i>this</i> dynamic Bloom filter.

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/Filter.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/Filter.java?rev=611734&r1=611733&r2=611734&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/Filter.java 
(original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/Filter.java 
Mon Jan 14 00:09:11 2008
@@ -54,7 +54,7 @@
 import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
-import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
 
 /**
  * Defines the general behavior of a filter.
@@ -74,7 +74,7 @@
  * @see org.onelab.filter.Key The general behavior of a key
  * @see org.onelab.filter.HashFunction A hash function
  */
-public abstract class Filter implements WritableComparable {
+public abstract class Filter implements Writable {
   /** The vector size of <i>this</i> filter. */
   int vectorSize;
 
@@ -182,14 +182,6 @@
     }
   }//end add()
   
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int hashCode() {
-    int result = Integer.valueOf(this.nbHash).hashCode();
-    result ^= Integer.valueOf(this.vectorSize);
-    return result;
-  }
-
   // Writable interface
   
   /** [EMAIL PROTECTED] */
@@ -204,19 +196,4 @@
     this.vectorSize = in.readInt();
     this.hash = new HashFunction(this.vectorSize, this.nbHash);
   }
-  
-  // Comparable interface
-  
-  /** [EMAIL PROTECTED] */
-  public int compareTo(Object o) {
-    Filter other = (Filter)o;
-    int result = this.vectorSize - other.vectorSize;
-    if(result == 0) {
-      result = this.nbHash - other.nbHash;
-    }
-    
-    return result;
-  }
-
-  
 }//end class

Modified: 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/RetouchedBloomFilter.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/RetouchedBloomFilter.java?rev=611734&r1=611733&r2=611734&view=diff
==============================================================================
--- 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/RetouchedBloomFilter.java
 (original)
+++ 
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/onelab/filter/RetouchedBloomFilter.java
 Mon Jan 14 00:09:11 2008
@@ -118,7 +118,7 @@
     hash.clear();
 
     for(int i = 0; i < nbHash; i++) {
-      vector[h[i]] = true;
+      bits.set(h[i]);
       keyVector[h[i]].add(key);
     }//end for - i
   }//end add()
@@ -333,7 +333,7 @@
     ratio[index] = 0.0;
 
     //update bit vector
-    vector[index] = false;
+    bits.clear(index);
   }//end clearBit()
 
   /**
@@ -395,28 +395,6 @@
     }//end for -i
   }//end createVector()
   
-  /** [EMAIL PROTECTED] */
-  @Override
-  public boolean equals(Object o) {
-    return this.compareTo(o) == 0;
-  }
-  
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int hashCode() {
-    int result = super.hashCode();
-    for(int i = 0; i < fpVector.length; i++) {
-      result ^= fpVector[i].hashCode();
-    }
-    for(int i = 0; i < keyVector.length; i++) {
-      result ^= keyVector[i].hashCode();
-    }
-    for(int i = 0; i < ratio.length; i++) {
-      result ^= Double.valueOf(ratio[i]).hashCode();
-    }
-    return result;
-  }
-
   // Writable
 
   /** [EMAIL PROTECTED] */
@@ -469,38 +447,4 @@
       ratio[i] = in.readDouble();
     }
   }
-
-  // Comparable
-  
-  /** [EMAIL PROTECTED] */
-  @Override
-  public int compareTo(Object o) {
-    int result = super.compareTo(o);
-    
-    RetouchedBloomFilter other = (RetouchedBloomFilter)o;
-      
-    for(int i = 0; result == 0 && i < fpVector.length; i++) {
-      List<Key> mylist = fpVector[i];
-      List<Key> otherlist = other.fpVector[i];
-        
-      for(int j = 0; result == 0 && j < mylist.size(); j++) {
-        result = mylist.get(j).compareTo(otherlist.get(j));
-      }
-    }
-
-    for(int i = 0; result == 0 && i < keyVector.length; i++) {
-      List<Key> mylist = keyVector[i];
-      List<Key> otherlist = other.keyVector[i];
-        
-      for(int j = 0; result == 0 && j < mylist.size(); j++) {
-        result = mylist.get(j).compareTo(otherlist.get(j));
-      }
-    }
-
-    for(int i = 0; result == 0 && i < ratio.length; i++) {
-      result = Double.valueOf(this.ratio[i] - other.ratio[i]).intValue();
-    }
-    
-    return result;
-  }// end compareTo
 }//end class


Reply via email to