This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch ThetaGetEstimate
in repository 
https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git

commit 806a514ac100da15057338259233df965f933647
Author: Lee Rhodes <lee...@users.noreply.github.com>
AuthorDate: Thu Feb 13 14:45:50 2020 -0800

    Improved Theta getEstimate() speed performance.
---
 .../org/apache/datasketches/theta/DirectCompactSketch.java  | 12 ++++++++++--
 .../apache/datasketches/theta/DirectQuickSelectSketchR.java |  9 +++++++++
 .../org/apache/datasketches/theta/EmptyCompactSketch.java   |  9 ++++++---
 .../java/org/apache/datasketches/theta/HeapAlphaSketch.java |  9 +++------
 .../org/apache/datasketches/theta/HeapCompactSketch.java    |  9 +++++++--
 .../apache/datasketches/theta/HeapQuickSelectSketch.java    | 13 +++++++++----
 src/main/java/org/apache/datasketches/theta/Sketch.java     | 12 +++---------
 src/main/java/org/apache/datasketches/theta/Sketches.java   |  2 +-
 .../java/org/apache/datasketches/theta/HeapAnotBTest.java   |  5 ++---
 9 files changed, 50 insertions(+), 30 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java 
b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java
index 8a4b6bf..03543b5 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java
@@ -49,8 +49,11 @@ abstract class DirectCompactSketch extends CompactSketch {
   }
 
   @Override
-  public HashIterator iterator() {
-    return new MemoryHashIterator(mem_, getRetainedEntries(), getThetaLong());
+  public double getEstimate() {
+    final int curCount = extractCurCount(mem_);
+    final int preLongs = extractPreLongs(mem_);
+    final long thetaLong = (preLongs > 2) ? extractThetaLong(mem_) : 
Long.MAX_VALUE;
+    return Sketch.estimate(thetaLong, curCount);
   }
 
   //overidden by EmptyCompactSketch and SingleItemSketch
@@ -87,6 +90,11 @@ abstract class DirectCompactSketch extends CompactSketch {
   }
 
   @Override
+  public HashIterator iterator() {
+    return new MemoryHashIterator(mem_, getRetainedEntries(), getThetaLong());
+  }
+
+  @Override
   public byte[] toByteArray() {
     return
         compactMemoryToByteArray(mem_, getCurrentPreambleLongs(true), 
getRetainedEntries(true));
diff --git 
a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java 
b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
index c50e677..38421ff 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
@@ -28,9 +28,11 @@ import static 
org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE;
 import static org.apache.datasketches.theta.PreambleUtil.P_FLOAT;
 import static org.apache.datasketches.theta.PreambleUtil.RETAINED_ENTRIES_INT;
 import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG;
+import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
 import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
 import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs;
 import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
 
 import org.apache.datasketches.Family;
 import org.apache.datasketches.ResizeFactor;
@@ -119,6 +121,13 @@ class DirectQuickSelectSketchR extends UpdateSketch {
   }
 
   @Override
+  public double getEstimate() {
+    final int curCount = extractCurCount(mem_);
+    final long thetaLong = extractThetaLong(mem_);
+    return Sketch.estimate(thetaLong, curCount);
+  }
+
+  @Override
   public Family getFamily() {
     final int familyID = mem_.getByte(FAMILY_BYTE) & 0XFF;
     return Family.idToFamily(familyID);
diff --git 
a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java 
b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java
index 7f3e6e0..81d80f1 100644
--- a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java
@@ -66,9 +66,7 @@ final class EmptyCompactSketch extends CompactSketch {
   }
 
   @Override
-  public HashIterator iterator() {
-    return new HeapHashIterator(new long[0], 0, Long.MAX_VALUE);
-  }
+  public double getEstimate() { return 0; }
 
   @Override
   public int getRetainedEntries(final boolean valid) {
@@ -100,6 +98,11 @@ final class EmptyCompactSketch extends CompactSketch {
     return true;
   }
 
+  @Override
+  public HashIterator iterator() {
+    return new HeapHashIterator(new long[0], 0, Long.MAX_VALUE);
+  }
+
   /**
    * Returns 8 bytes representing a CompactSketch that the following flags set:
    * ordered, compact, empty, readOnly. The SerVer is 3, the Family is 
COMPACT(3),
diff --git a/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java 
b/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java
index 77e800e..21057d2 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java
@@ -166,12 +166,9 @@ final class HeapAlphaSketch extends HeapUpdateSketch {
 
   @Override
   public double getEstimate() {
-    if (isEstimationMode()) {
-      final int curCount = getRetainedEntries(true);
-      final double theta = getTheta();
-      return (thetaLong_ > split1_) ? curCount / theta : (1 << lgNomLongs_) / 
theta;
-    }
-    return curCount_;
+    return (thetaLong_ > split1_)
+        ? Sketch.estimate(thetaLong_, curCount_)
+        : (1 << lgNomLongs_) * (MAX_THETA_LONG_AS_DOUBLE / thetaLong_);
   }
 
   @Override
diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java 
b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
index eb4831d..be41215 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
@@ -68,8 +68,8 @@ abstract class HeapCompactSketch extends CompactSketch {
   }
 
   @Override
-  public HashIterator iterator() {
-    return new HeapHashIterator(cache_, cache_.length, thetaLong_);
+  public double getEstimate() {
+    return Sketch.estimate(thetaLong_, curCount_);
   }
 
   @Override
@@ -97,6 +97,11 @@ abstract class HeapCompactSketch extends CompactSketch {
     return empty_;
   }
 
+  @Override
+  public HashIterator iterator() {
+    return new HeapHashIterator(cache_, cache_.length, thetaLong_);
+  }
+
   //restricted methods
 
   @Override
diff --git 
a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java 
b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
index bff7d0b..cb40feb 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
@@ -142,13 +142,13 @@ class HeapQuickSelectSketch extends HeapUpdateSketch {
   //Sketch
 
   @Override
-  public Family getFamily() {
-    return MY_FAMILY;
+  public double getEstimate() {
+    return Sketch.estimate(thetaLong_, curCount_);
   }
 
   @Override
-  public HashIterator iterator() {
-    return new HeapHashIterator(cache_, 1 << lgArrLongs_, thetaLong_);
+  public Family getFamily() {
+    return MY_FAMILY;
   }
 
   @Override
@@ -167,6 +167,11 @@ class HeapQuickSelectSketch extends HeapUpdateSketch {
   }
 
   @Override
+  public HashIterator iterator() {
+    return new HeapHashIterator(cache_, 1 << lgArrLongs_, thetaLong_);
+  }
+
+  @Override
   public byte[] toByteArray() {
     return toByteArray(preambleLongs_, (byte) MY_FAMILY.getID());
   }
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java 
b/src/main/java/org/apache/datasketches/theta/Sketch.java
index a1188ca..50a1544 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -229,9 +229,7 @@ public abstract class Sketch {
    * Gets the unique count estimate.
    * @return the sketch's best estimate of the cardinality of the input stream.
    */
-  public double getEstimate() {
-    return estimate(getThetaLong(), getRetainedEntries(true), isEmpty());
-  }
+  public abstract double getEstimate();
 
   /**
    * Returns the Family that this sketch belongs to
@@ -618,12 +616,8 @@ public abstract class Sketch {
     return ((curCount == 0) && (thetaLong == Long.MAX_VALUE));
   }
 
-  static final double estimate(final long thetaLong, final int curCount, final 
boolean empty) {
-    if (estMode(thetaLong, empty)) {
-      final double theta = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
-      return curCount / theta;
-    }
-    return curCount;
+  static final double estimate(final long thetaLong, final int curCount) {
+    return curCount * (MAX_THETA_LONG_AS_DOUBLE / thetaLong);
   }
 
   static final double lowerBound(final int curCount, final long thetaLong, 
final int numStdDev,
diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java 
b/src/main/java/org/apache/datasketches/theta/Sketches.java
index 4f5ebe5..749cfbf 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketches.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketches.java
@@ -293,7 +293,7 @@ public final class Sketches {
    */
   public static double getEstimate(final Memory srcMem) {
     checkIfValidThetaSketch(srcMem);
-    return Sketch.estimate(getThetaLong(srcMem), getRetainedEntries(srcMem), 
getEmpty(srcMem));
+    return Sketch.estimate(getThetaLong(srcMem), getRetainedEntries(srcMem));
   }
 
   /**
diff --git a/src/test/java/org/apache/datasketches/theta/HeapAnotBTest.java 
b/src/test/java/org/apache/datasketches/theta/HeapAnotBTest.java
index f8ccdb6..2f1ae23 100644
--- a/src/test/java/org/apache/datasketches/theta/HeapAnotBTest.java
+++ b/src/test/java/org/apache/datasketches/theta/HeapAnotBTest.java
@@ -25,11 +25,10 @@ import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertNull;
 import static org.testng.Assert.assertTrue;
 
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.WritableMemory;
 import org.apache.datasketches.Family;
 import org.apache.datasketches.Util;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
 
 /**
  * @author Lee Rhodes


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datasketches.apache.org
For additional commands, e-mail: commits-h...@datasketches.apache.org

Reply via email to